Summarize the data
expression.Rd
expression()
is an analysis function that computes the protein summary
statistics for a given tidyproteomics data object.
Usage
expression(
data = NULL,
...,
.pairs = NULL,
.method = stats::t.test,
.p.adjust = "BH"
)
Arguments
- data
tidyproteomics data object
- ...
two sample comparison e.g. experimental/control
- .method
a two-distribution test function returning a p_value for the null hypothesis. Example functions include t.test, wilcox.test, stats::ks.test, additionally, the string "limma" can be used to select from the limma package to compute an empirical Bayesian estimation which performs better with non-linear distributions and uneven replicate balance between samples.
- .p.adjust
a stats::p.adjust string for multiple test correction, default is 'BH' (Benjamini & Hochberg, 1995)
Examples
library(dplyr, warn.conflicts = FALSE)
library(tidyproteomics)
# simple t.test expression analysis
hela_proteins %>%
expression(knockdown/control) %>%
export_analysis(knockdown/control, .analysis = "expression")
#> ℹ .. expression::t_test testing knockdown / control
#> ✔ .. expression::t_test testing knockdown / control [3.4s]
#>
#> # A tibble: 5,187 × 27
#> protein imputed n imputed_control imputed_knockdown average_expression
#> <chr> <dbl> <int> <dbl> <dbl> <dbl>
#> 1 Q5SVJ8 0.333 6 0.667 0 755886.
#> 2 Q15011 0 6 0 0 4919332.
#> 3 F6SA91 0.167 6 0.333 0 879218.
#> 4 Q9NXV2 0 6 0 0 3103100.
#> 5 Q13751 0 6 0 0 1072517.
#> 6 O00308 0 6 0 0 733272.
#> 7 Q9BY50 0 6 0 0 2049680.
#> 8 Q8TBM8 0.333 6 0.667 0 3374207.
#> 9 Q5JUW8 0 6 0 0 1745373.
#> 10 Q5T9L3 0.167 6 0.333 0 961926.
#> # ℹ 5,177 more rows
#> # ℹ 21 more variables: proportional_expression <dbl>, foldchange <dbl>,
#> # log2_foldchange <dbl>, p_value <dbl>, adj_p_value <dbl>,
#> # normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> # abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> # abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> # description <chr>, biological_process <chr>, cellular_component <chr>, …
# a wilcox.test expression analysis
hela_proteins %>%
expression(knockdown/control, .method = stats::wilcox.test) %>%
export_analysis(knockdown/control, .analysis = "expression")
#> ℹ .. expression::wilcox_test testing knockdown / control
#> ✔ .. expression::wilcox_test testing knockdown / control [3.3s]
#>
#> # A tibble: 5,187 × 27
#> protein imputed n imputed_control imputed_knockdown average_expression
#> <chr> <dbl> <int> <dbl> <dbl> <dbl>
#> 1 Q5SVJ8 0.333 6 0.667 0 755886.
#> 2 Q15011 0 6 0 0 4919332.
#> 3 F6SA91 0.167 6 0.333 0 879218.
#> 4 Q9NXV2 0 6 0 0 3103100.
#> 5 Q13751 0 6 0 0 1072517.
#> 6 O00308 0 6 0 0 733272.
#> 7 Q9BY50 0 6 0 0 2049680.
#> 8 Q8TBM8 0.333 6 0.667 0 3374207.
#> 9 Q5JUW8 0 6 0 0 1745373.
#> 10 Q5T9L3 0.167 6 0.333 0 961926.
#> # ℹ 5,177 more rows
#> # ℹ 21 more variables: proportional_expression <dbl>, foldchange <dbl>,
#> # log2_foldchange <dbl>, p_value <dbl>, adj_p_value <dbl>,
#> # normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> # abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> # abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> # description <chr>, biological_process <chr>, cellular_component <chr>, …
# a one-tailed wilcox.test expression analysis
wilcoxon_less <- function(x, y) {
stats::wilcox.test(x, y, alternative = "less")
}
hela_proteins <- hela_proteins %>%
expression(knockdown/control, .method = stats::wilcox.test)
#> ℹ .. expression::wilcox_test testing knockdown / control
#> ✔ .. expression::wilcox_test testing knockdown / control [3.1s]
#>
hela_proteins %>% export_analysis(knockdown/control, .analysis = "expression")
#> # A tibble: 5,187 × 27
#> protein imputed n imputed_control imputed_knockdown average_expression
#> <chr> <dbl> <int> <dbl> <dbl> <dbl>
#> 1 Q5SVJ8 0.333 6 0.667 0 755886.
#> 2 Q15011 0 6 0 0 4919332.
#> 3 F6SA91 0.167 6 0.333 0 879218.
#> 4 Q9NXV2 0 6 0 0 3103100.
#> 5 Q13751 0 6 0 0 1072517.
#> 6 O00308 0 6 0 0 733272.
#> 7 Q9BY50 0 6 0 0 2049680.
#> 8 Q8TBM8 0.333 6 0.667 0 3374207.
#> 9 Q5JUW8 0 6 0 0 1745373.
#> 10 Q5T9L3 0.167 6 0.333 0 961926.
#> # ℹ 5,177 more rows
#> # ℹ 21 more variables: proportional_expression <dbl>, foldchange <dbl>,
#> # log2_foldchange <dbl>, p_value <dbl>, adj_p_value <dbl>,
#> # normalization <chr>, abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> # abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> # abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>,
#> # description <chr>, biological_process <chr>, cellular_component <chr>, …
# Note: the userdefined function is preserved in the operations tracking
hela_proteins %>% operations()
#> ℹ Data Transformations
#> • Data files (p97KD_HCT116_proteins.xlsx) were imported as proteins from
#> ProteomeDiscoverer
#> • Analysis: expression difference wilcox_test knockdown/control, p.adjust =
#> BH
# limma expression analysis
hela_proteins %>%
expression(knockdown/control, .method = "limma") %>%
export_analysis(knockdown/control, .analysis = "expression")
#> ℹ .. expression::limma testing knockdown / control
#> ! expression::limma removed 159 proteins with completely missing values
#> ℹ .. expression::limma testing knockdown / control
#> ✔ .. expression::limma testing knockdown / control [598ms]
#>
#> # A tibble: 5,187 × 29
#> protein imputed n imputed_control imputed_knockdown log2_foldchange
#> <chr> <dbl> <int> <dbl> <dbl> <dbl>
#> 1 Q15011 0 6 0 0 5.48
#> 2 F6SA91 0.167 6 0.333 0 5.31
#> 3 Q5SVJ8 0.333 6 0.667 0 3.99
#> 4 Q13751 0 6 0 0 3.66
#> 5 Q9NXV2 0 6 0 0 3.63
#> 6 O00308 0 6 0 0 3.34
#> 7 Q9BY50 0 6 0 0 3.09
#> 8 Q5JUW8 0 6 0 0 3.05
#> 9 Q5T9L3 0.167 6 0.333 0 2.99
#> 10 O95833 0.167 6 0.333 0 2.88
#> # ℹ 5,177 more rows
#> # ℹ 23 more variables: foldchange <dbl>, average_expression <dbl>,
#> # proportional_expression <dbl>, p_value <dbl>, adj_p_value <dbl>,
#> # limma_t_statistic <dbl>, limma_B_statistic <dbl>, normalization <chr>,
#> # abundance_control_1 <dbl>, abundance_control_2 <dbl>,
#> # abundance_control_3 <dbl>, abundance_knockdown_1 <dbl>,
#> # abundance_knockdown_2 <dbl>, abundance_knockdown_3 <dbl>, …
# using the .pairs argument when multiple comparisons are needed
comps <- list(c("control","knockdown"),
c("knockdown","control"))
hela_proteins %>%
expression(.pairs = comps)
#> Expression Analysis - using the supplied 2 sample pairs ...
#> ℹ .. expression::t_test testing control / knockdown
#> ✔ .. expression::t_test testing control / knockdown [3.4s]
#>
#> ℹ .. expression::t_test testing knockdown / control
#> ✔ .. expression::t_test testing knockdown / control [3.4s]
#>
#>
#> ── Quantitative Proteomics Data Object ──
#>
#> Origin ProteomeDiscoverer
#> proteins (12.13 MB)
#> Composition 6 files
#> 2 samples (control, knockdown)
#> Quantitation 7055 proteins
#> 4 log10 dynamic range
#> 28.8% missing values
#> *imputed
#> Accounting (4) num_peptides num_psms num_unique_peptides imputed
#> Annotations (9) description biological_process cellular_component molecular_function
#> gene_id_entrez gene_name wiki_pathway reactome_pathway
#> gene_id_ensemble
#> Analyses (2)
#> knockdown/control -> expression
#> control/knockdown -> expression
#>