Summarize the data
expression.Rd
expression()
is an analysis function that computes the protein summary
statistics for a given tidyproteomics data object.
Usage
expression(
data = NULL,
...,
.pairs = NULL,
.method = stats::t.test,
.p.adjust = "BH"
)
Arguments
- data
tidyproteomics data object
- ...
two sample comparison e.g. experimental/control
- .method
a two-distribution test function returning a p_value for the null hypothesis. Example functions include t.test, wilcox.test, stats::ks.test, additionally, the string "limma" can be used to select from the limma package to compute an empirical Bayesian estimation which performs better with non-linear distributions and uneven replicate balance between samples.
- .p.adjust
a stats::p.adjust string for multiple test correction, default is 'BH' (Benjamini & Hochberg, 1995)
Examples
library(dplyr, warn.conflicts = FALSE)
library(tidyproteomics)
# simple t.test expression analysis
hela_proteins %>%
expression(knockdown/control) %>%
export_analysis(knockdown/control, .analysis = "expression")
#> ℹ .. expression::t_test testing knockdown / control
#> ✔ .. expression::t_test testing knockdown / control [3.1s]
#>
#> # A tibble: 5,187 × 25
#> protein imputed n average_expression proportional_expression foldchange
#> <chr> <dbl> <int> <dbl> <dbl> <dbl>
#> 1 Q5SVJ8 0.333 6 755886. 0.00000293 52.1
#> 2 Q15011 0 6 4919332. 0.0000190 48.6
#> 3 F6SA91 0.167 6 879218. 0.00000340 39.6
#> 4 Q9NXV2 0 6 3103100. 0.0000120 19.6
#> 5 Q13751 0 6 1072517. 0.00000415 11.9
#> 6 O00308 0 6 733272. 0.00000284 10.1
#> 7 Q9BY50 0 6 2049680. 0.00000793 9.75
#> 8 Q8TBM8 0.333 6 3374207. 0.0000131 8.72
#> 9 Q5JUW8 0 6 1745373. 0.00000676 7.98
#> 10 Q5T9L3 0.167 6 961926. 0.00000372 7.97
#> # ℹ 5,177 more rows
#> # ℹ 19 more variables: log2_foldchange <dbl>, p_value <dbl>, adj_p_value <dbl>,
#> # normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> # abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> # abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> # description <chr>, biological_process <chr>, cellular_component <chr>,
#> # molecular_function <chr>, gene_id_entrez <chr>, gene_name <chr>, …
# a wilcox.test expression analysis
hela_proteins %>%
expression(knockdown/control, .method = stats::wilcox.test) %>%
export_analysis(knockdown/control, .analysis = "expression")
#> ℹ .. expression::wilcox_test testing knockdown / control
#> ✔ .. expression::wilcox_test testing knockdown / control [3s]
#>
#> # A tibble: 5,187 × 25
#> protein imputed n average_expression proportional_expression foldchange
#> <chr> <dbl> <int> <dbl> <dbl> <dbl>
#> 1 Q5SVJ8 0.333 6 755886. 0.00000293 52.1
#> 2 Q15011 0 6 4919332. 0.0000190 48.6
#> 3 F6SA91 0.167 6 879218. 0.00000340 39.6
#> 4 Q9NXV2 0 6 3103100. 0.0000120 19.6
#> 5 Q13751 0 6 1072517. 0.00000415 11.9
#> 6 O00308 0 6 733272. 0.00000284 10.1
#> 7 Q9BY50 0 6 2049680. 0.00000793 9.75
#> 8 Q8TBM8 0.333 6 3374207. 0.0000131 8.72
#> 9 Q5JUW8 0 6 1745373. 0.00000676 7.98
#> 10 Q5T9L3 0.167 6 961926. 0.00000372 7.97
#> # ℹ 5,177 more rows
#> # ℹ 19 more variables: log2_foldchange <dbl>, p_value <dbl>, adj_p_value <dbl>,
#> # normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> # abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> # abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> # description <chr>, biological_process <chr>, cellular_component <chr>,
#> # molecular_function <chr>, gene_id_entrez <chr>, gene_name <chr>, …
# a one-tailed wilcox.test expression analysis
wilcoxon_less <- function(x, y) {
stats::wilcox.test(x, y, alternative = "less")
}
hela_proteins <- hela_proteins %>%
expression(knockdown/control, .method = stats::wilcox.test)
#> ℹ .. expression::wilcox_test testing knockdown / control
#> ✔ .. expression::wilcox_test testing knockdown / control [2.9s]
#>
hela_proteins %>% export_analysis(knockdown/control, .analysis = "expression")
#> # A tibble: 5,187 × 25
#> protein imputed n average_expression proportional_expression foldchange
#> <chr> <dbl> <int> <dbl> <dbl> <dbl>
#> 1 Q5SVJ8 0.333 6 755886. 0.00000293 52.1
#> 2 Q15011 0 6 4919332. 0.0000190 48.6
#> 3 F6SA91 0.167 6 879218. 0.00000340 39.6
#> 4 Q9NXV2 0 6 3103100. 0.0000120 19.6
#> 5 Q13751 0 6 1072517. 0.00000415 11.9
#> 6 O00308 0 6 733272. 0.00000284 10.1
#> 7 Q9BY50 0 6 2049680. 0.00000793 9.75
#> 8 Q8TBM8 0.333 6 3374207. 0.0000131 8.72
#> 9 Q5JUW8 0 6 1745373. 0.00000676 7.98
#> 10 Q5T9L3 0.167 6 961926. 0.00000372 7.97
#> # ℹ 5,177 more rows
#> # ℹ 19 more variables: log2_foldchange <dbl>, p_value <dbl>, adj_p_value <dbl>,
#> # normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> # abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> # abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> # description <chr>, biological_process <chr>, cellular_component <chr>,
#> # molecular_function <chr>, gene_id_entrez <chr>, gene_name <chr>, …
# Note: the userdefined function is preserved in the operations tracking
hela_proteins %>% operations()
#> ℹ Data Transformations
#> • Data files (p97KD_HCT116_proteins.xlsx) were imported as proteins from
#> ProteomeDiscoverer
#> • Analysis: expression difference wilcox_test knockdown/control, p.adjust =
#> BH
# limma expression analysis
hela_proteins %>%
expression(knockdown/control, .method = "limma") %>%
export_analysis(knockdown/control, .analysis = "expression")
#> ℹ .. expression::limma testing knockdown / control
#> ! expression::limma removed 159 proteins with completely missing values
#> ℹ .. expression::limma testing knockdown / control
#> ✔ .. expression::limma testing knockdown / control [421ms]
#>
#> # A tibble: 5,187 × 27
#> protein imputed n log2_foldchange foldchange average_expression
#> <chr> <dbl> <int> <dbl> <dbl> <dbl>
#> 1 Q15011 0 6 5.48 44.6 2460624.
#> 2 F6SA91 0.167 6 5.31 39.6 272410.
#> 3 Q5SVJ8 0.333 6 3.99 15.9 214100.
#> 4 Q13751 0 6 3.66 12.6 734867.
#> 5 Q9NXV2 0 6 3.63 12.4 1554000.
#> 6 O00308 0 6 3.34 10.1 392333.
#> 7 Q9BY50 0 6 3.09 8.51 1484051.
#> 8 Q5JUW8 0 6 3.05 8.28 1039329.
#> 9 Q5T9L3 0.167 6 2.99 7.97 605468.
#> 10 O95833 0.167 6 2.88 7.37 2160177.
#> # ℹ 5,177 more rows
#> # ℹ 21 more variables: proportional_expression <dbl>, p_value <dbl>,
#> # adj_p_value <dbl>, limma_t_statistic <dbl>, limma_B_statistic <dbl>,
#> # normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> # abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> # abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> # description <chr>, biological_process <chr>, cellular_component <chr>, …
# using the .pairs argument when multiple comparisons are needed
comps <- list(c("control","knockdown"),
c("knockdown","control"))
hela_proteins %>%
expression(.pairs = comps)
#> Using the supplied 2 sample pairs ...
#> ℹ .. expression::t_test testing control / knockdown
#> ✔ .. expression::t_test testing control / knockdown [3s]
#>
#> ℹ .. expression::t_test testing knockdown / control
#> ✔ .. expression::t_test testing knockdown / control [3s]
#>
#>
#> ── Quantitative Proteomics Data Object ──
#>
#> Origin ProteomeDiscoverer
#> proteins (11.96 MB)
#> Composition 6 files
#> 2 samples (control, knockdown)
#> Quantitation 7055 proteins
#> 4 log10 dynamic range
#> 28.8% missing values
#> *imputed
#> Accounting (4) num_peptides num_psms num_unique_peptides imputed
#> Annotations (9) description biological_process cellular_component molecular_function
#> gene_id_entrez gene_name wiki_pathway reactome_pathway
#> gene_id_ensemble
#> Analyses (2)
#> knockdown/control -> expression
#> control/knockdown -> expression
#>