Pubication Demo • tidyproteomics

The following is a demonstration workflow for generating the figures from the publication.

library(tidyverse)
library(tidyproteomics)

Import

rdata <- hela_proteins

Summary Plots

rdata <- rdata %>%
  # plot some simple summary stats
  plot_counts(destination = "png") %>%
  plot_quantrank(destination = "png") %>%
  plot_venn(destination = "png") %>%
  plot_euler(destination = "png")

Summary Stats

rdata <- rdata %>%
  # save a table of simple summary stats
  summary("sample", destination = "save") %>%
  # save a report on contamination
  summary(contamination = "CRAP", destination = "save") %>%
  # remove contamination
  subset(!description %like% "^CRAP")

Normalization and Imputation

rdata <- rdata %>%
  # normalize via several methods, best method will be automatically selected
  normalize(c("median","linear","limma","randomforest")) %>%
  # impute with a minimum value (this is a knock-out)
  impute(base::min)
  # plot visualizations comparing normalization methods
  plot_normalization(destination = "png") %>%
  plot_variation_cv(destination = "png") %>% 
  plot_variation_pca(destination = "png") %>%
  plot_dynamic_range(destination = "png") %>%
  # plot visualizations of unbiased clustering
  plot_heatmap(destination = "png") %>%
  plot_pca(destination = "png")

Expression Analysis

rdata <- rdata %>%
  # calculate the expression between experiment: ko and control: wt
  expression(kndw/ctrl) %>%
  # plot the expression analysis
  plot_volcano(kndw/ctrl, destination = "png", significance_column = "p_value") %>% 
  plot_proportion(kndw/ctrl, destination = "png")

Enrichment Analysis

rdata <- rdata %>%
  # calculate the enrichment of the GO term(s) using the results
  # from the expression analysis
  enrichment(kndw/ctrl, .terms = "biological_process") %>%
  enrichment(kndw/ctrl, .terms = "cellular_component") %>%
  enrichment(kndw/ctrl, .terms = "molecular_function") %>%
  # plot the enrichment analysis
  plot_enrichment(kndw/ctrl, .terms = "biological_process", destination = "png") %>%
  plot_enrichment(kndw/ctrl, .terms = "cellular_component", destination = "png") %>%
  plot_enrichment(kndw/ctrl, .terms = "molecular_function", destination = "png")

Advanced

Plot Quantitation-Rank with Log2 Cutoff

# SUPPLEMENTAL FIGURES
# plot an alternate quantitative ranking
rdata %>%
  plot_quantrank(display_filter = "log2_foldchange",
                 display_cutoff = 5)
ggsave("plot_proteins_quantitation_rank_filtered.png",
       width = 6, h = 4)

Plot Comparison of Two Expressions

# import the data again to now avoid imputation
rdata <- path_to_package_data("p97KD_HCT116") %>%
  # import the data set
  import('ProteomeDiscoverer', 'proteins') %>%
  # change the labels on the samples containing 'ko'
  reassign("sample", "ctl", "ctrl") %>%
  reassign("sample", "p97", "kndw")

# run a an expression analysis using a t.test statistical comparison
tbl_expression_ttest <- rdata %>%
  expression(kndw/ctrl, .method = stats::t.test) %>%
  # export the results table to the assigned object
  export_analysis(kndw/ctrl, .analysis = "expression")

# run a an expression analysis using the limma statistical method
tbl_expression_limma <- rdata %>%
  expression(kndw/ctrl, .method = "limma") %>%
  # export the results table to the assigned object
  export_analysis(kndw/ctrl, .analysis = "expression")

# plot the two expression tables two compare similarities between methods
plot_compexp(tbl_expression_ttest,
             tbl_expression_limma,
             labels_column = "gene_name",
             log2fc_min = 1, significance_column = "p_value") +
  ggplot2::labs(x = "(log2 FC) Wilcoxon Rank Sum",
                y = "(log2 FC) Emperical Bayes (limma)",
                title = "Hela p97 KD ~ Ctrl")

ggsave("plot_enrichment_comparison.png",
       width = 6, h = 4)