Skip to contents

expression() is an analysis function that computes the protein summary statistics for a given tidyproteomics data object.

Usage

expression(
  data = NULL,
  ...,
  .pairs = NULL,
  .method = stats::t.test,
  .p.adjust = "BH"
)

Arguments

data

tidyproteomics data object

...

two sample comparison e.g. experimental/control

.method

a two-distribution test function returning a p_value for the null hypothesis. Example functions include t.test, wilcox.test, stats::ks.test, additionally, the string "limma" can be used to select from the limma package to compute an empirical Bayesian estimation which performs better with non-linear distributions and uneven replicate balance between samples.

.p.adjust

a stats::p.adjust string for multiple test correction, default is 'BH' (Benjamini & Hochberg, 1995)

Value

a tibble

Examples

library(dplyr, warn.conflicts = FALSE)
library(tidyproteomics)

# simple t.test expression analysis
hela_proteins %>%
   expression(knockdown/control) %>%
   export_analysis(knockdown/control, .analysis = "expression")
#>  .. expression::t_test testing knockdown / control
#>  .. expression::t_test testing knockdown / control [3.1s]
#> 
#> # A tibble: 5,187 × 25
#>    protein imputed     n average_expression proportional_expression foldchange
#>    <chr>     <dbl> <int>              <dbl>                   <dbl>      <dbl>
#>  1 Q5SVJ8    0.333     6            755886.              0.00000293      52.1 
#>  2 Q15011    0         6           4919332.              0.0000190       48.6 
#>  3 F6SA91    0.167     6            879218.              0.00000340      39.6 
#>  4 Q9NXV2    0         6           3103100.              0.0000120       19.6 
#>  5 Q13751    0         6           1072517.              0.00000415      11.9 
#>  6 O00308    0         6            733272.              0.00000284      10.1 
#>  7 Q9BY50    0         6           2049680.              0.00000793       9.75
#>  8 Q8TBM8    0.333     6           3374207.              0.0000131        8.72
#>  9 Q5JUW8    0         6           1745373.              0.00000676       7.98
#> 10 Q5T9L3    0.167     6            961926.              0.00000372       7.97
#> # ℹ 5,177 more rows
#> # ℹ 19 more variables: log2_foldchange <dbl>, p_value <dbl>, adj_p_value <dbl>,
#> #   normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> #   abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> #   abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> #   description <chr>, biological_process <chr>, cellular_component <chr>,
#> #   molecular_function <chr>, gene_id_entrez <chr>, gene_name <chr>, …

# a wilcox.test expression analysis
hela_proteins %>%
   expression(knockdown/control, .method = stats::wilcox.test) %>%
   export_analysis(knockdown/control, .analysis = "expression")
#>  .. expression::wilcox_test testing knockdown / control
#>  .. expression::wilcox_test testing knockdown / control [3s]
#> 
#> # A tibble: 5,187 × 25
#>    protein imputed     n average_expression proportional_expression foldchange
#>    <chr>     <dbl> <int>              <dbl>                   <dbl>      <dbl>
#>  1 Q5SVJ8    0.333     6            755886.              0.00000293      52.1 
#>  2 Q15011    0         6           4919332.              0.0000190       48.6 
#>  3 F6SA91    0.167     6            879218.              0.00000340      39.6 
#>  4 Q9NXV2    0         6           3103100.              0.0000120       19.6 
#>  5 Q13751    0         6           1072517.              0.00000415      11.9 
#>  6 O00308    0         6            733272.              0.00000284      10.1 
#>  7 Q9BY50    0         6           2049680.              0.00000793       9.75
#>  8 Q8TBM8    0.333     6           3374207.              0.0000131        8.72
#>  9 Q5JUW8    0         6           1745373.              0.00000676       7.98
#> 10 Q5T9L3    0.167     6            961926.              0.00000372       7.97
#> # ℹ 5,177 more rows
#> # ℹ 19 more variables: log2_foldchange <dbl>, p_value <dbl>, adj_p_value <dbl>,
#> #   normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> #   abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> #   abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> #   description <chr>, biological_process <chr>, cellular_component <chr>,
#> #   molecular_function <chr>, gene_id_entrez <chr>, gene_name <chr>, …

# a one-tailed wilcox.test expression analysis
wilcoxon_less <- function(x, y) {
   stats::wilcox.test(x, y, alternative = "less")
}
hela_proteins <- hela_proteins %>%
   expression(knockdown/control, .method = stats::wilcox.test)
#>  .. expression::wilcox_test testing knockdown / control
#>  .. expression::wilcox_test testing knockdown / control [2.9s]
#> 

hela_proteins %>% export_analysis(knockdown/control, .analysis = "expression")
#> # A tibble: 5,187 × 25
#>    protein imputed     n average_expression proportional_expression foldchange
#>    <chr>     <dbl> <int>              <dbl>                   <dbl>      <dbl>
#>  1 Q5SVJ8    0.333     6            755886.              0.00000293      52.1 
#>  2 Q15011    0         6           4919332.              0.0000190       48.6 
#>  3 F6SA91    0.167     6            879218.              0.00000340      39.6 
#>  4 Q9NXV2    0         6           3103100.              0.0000120       19.6 
#>  5 Q13751    0         6           1072517.              0.00000415      11.9 
#>  6 O00308    0         6            733272.              0.00000284      10.1 
#>  7 Q9BY50    0         6           2049680.              0.00000793       9.75
#>  8 Q8TBM8    0.333     6           3374207.              0.0000131        8.72
#>  9 Q5JUW8    0         6           1745373.              0.00000676       7.98
#> 10 Q5T9L3    0.167     6            961926.              0.00000372       7.97
#> # ℹ 5,177 more rows
#> # ℹ 19 more variables: log2_foldchange <dbl>, p_value <dbl>, adj_p_value <dbl>,
#> #   normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> #   abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> #   abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> #   description <chr>, biological_process <chr>, cellular_component <chr>,
#> #   molecular_function <chr>, gene_id_entrez <chr>, gene_name <chr>, …

# Note: the userdefined function is preserved in the operations tracking
hela_proteins %>% operations()
#>  Data Transformations
#>   • Data files (p97KD_HCT116_proteins.xlsx) were imported as proteins from
#>   ProteomeDiscoverer
#>   • Analysis: expression difference wilcox_test knockdown/control, p.adjust =
#>   BH

# limma expression analysis
hela_proteins %>%
   expression(knockdown/control, .method = "limma") %>%
   export_analysis(knockdown/control, .analysis = "expression")
#>  .. expression::limma testing knockdown / control
#> ! expression::limma removed 159 proteins with completely missing values
#>  .. expression::limma testing knockdown / control

#>  .. expression::limma testing knockdown / control [421ms]
#> 
#> # A tibble: 5,187 × 27
#>    protein imputed     n log2_foldchange foldchange average_expression
#>    <chr>     <dbl> <int>           <dbl>      <dbl>              <dbl>
#>  1 Q15011    0         6            5.48      44.6            2460624.
#>  2 F6SA91    0.167     6            5.31      39.6             272410.
#>  3 Q5SVJ8    0.333     6            3.99      15.9             214100.
#>  4 Q13751    0         6            3.66      12.6             734867.
#>  5 Q9NXV2    0         6            3.63      12.4            1554000.
#>  6 O00308    0         6            3.34      10.1             392333.
#>  7 Q9BY50    0         6            3.09       8.51           1484051.
#>  8 Q5JUW8    0         6            3.05       8.28           1039329.
#>  9 Q5T9L3    0.167     6            2.99       7.97            605468.
#> 10 O95833    0.167     6            2.88       7.37           2160177.
#> # ℹ 5,177 more rows
#> # ℹ 21 more variables: proportional_expression <dbl>, p_value <dbl>,
#> #   adj_p_value <dbl>, limma_t_statistic <dbl>, limma_B_statistic <dbl>,
#> #   normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> #   abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> #   abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> #   description <chr>, biological_process <chr>, cellular_component <chr>, …

# using the .pairs argument when multiple comparisons are needed
comps <- list(c("control","knockdown"),
            c("knockdown","control"))

hela_proteins %>%
   expression(.pairs = comps)
#> Using the supplied 2 sample pairs ...
#>  .. expression::t_test testing control / knockdown
#>  .. expression::t_test testing control / knockdown [3s]
#> 
#>  .. expression::t_test testing knockdown / control
#>  .. expression::t_test testing knockdown / control [3s]
#> 
#> 
#> ── Quantitative Proteomics Data Object ──
#> 
#> Origin          ProteomeDiscoverer 
#>                 proteins (11.96 MB) 
#> Composition     6 files 
#>                 2 samples (control, knockdown) 
#> Quantitation    7055 proteins 
#>                 4 log10 dynamic range 
#>                 28.8% missing values 
#>  *imputed        
#> Accounting      (4) num_peptides num_psms num_unique_peptides imputed 
#> Annotations     (9) description biological_process cellular_component molecular_function
#>                 gene_id_entrez gene_name wiki_pathway reactome_pathway
#>                 gene_id_ensemble 
#> Analyses        (2) 
#>                 knockdown/control -> expression  
#>                 control/knockdown -> expression  
#>