vignettes/articles/volcano_plots.Rmd
volcano_plots.Rmd
This article contains generates volcano plots (Fig 2A; Extended Data Fig. 2A–D). By default, the chunks are not set to evaluate, since the volcano plots exceed the maximum dimensions. Run each chunk in this article individually (do not knit) to generate and save the volcano plots.
library(MotrpacRatTraining6moWATData)
library(MotrpacRatTraining6moWAT)
library(dplyr)
library(tidyr)
library(tibble)
library(purrr)
library(latex2exp)
library(ggplot2)
library(ggrepel)
library(scales)
# List of all differential analysis results
all_DA <- list(TRNSCRPT = TRNSCRPT_DA,
PROT = PROT_DA,
PHOSPHO = PHOSPHO_DA,
METAB = METAB_DA)
all_DA$PHOSPHO$MvF_SED <- all_DA$PHOSPHO$MvF_SED %>%
mutate(site = paste0(gene_symbol, "-", site))
## Updates required for volcano plots
update_dea_res <- function(x) {
x <- x %>%
dplyr::select(feature, contrast, P.Value, adj.P.Val, logFC, feature,
any_of(c("site", "gene_symbol",
"entrez_gene", "name_in_figures"))) %>%
mutate(log10_pval = -log10(adj.P.Val),
sign_logFC = as.character(sign(logFC)),
sign_logFC = ifelse(adj.P.Val < 0.05, sign_logFC, "NS"),
sign_logFC = factor(sign_logFC,
levels = c("-1", "1", "NS"),
labels = c("dn", "up", "NS"))) %>%
arrange(contrast, sign_logFC)
# Count number of features by sign(logFC)
x_sub <- x %>%
group_by(contrast, sign_logFC) %>%
summarise(n = n()) %>%
mutate(label = paste(n, sign_logFC)) %>%
group_by(contrast) %>%
summarise(label = paste(label, collapse = ", "))
x <- left_join(x, x_sub, by = "contrast")
return(x)
}
# Function for label nudge
nudge_volcano_labels <- function(x, offset = 0.4) {
y <- x
len_y <- length(y)
increment <- offset / 8
if (len_y > 1) {
for (i in 2:len_y) {
while (y[i] <= y[i - 1] + offset) {
y[i] <- y[i] + increment
}
}
}
y <- y - x
return(y)
}
## Volcano plots for combined MvF SED results --------------------
MvF_volcano <- map(.x = all_DA, pluck, "MvF_SED") %>%
map(~ update_dea_res(.x) %>%
mutate(across(starts_with("entrez_gene"), as.character))
) %>%
enframe(name = "ome") %>%
unnest(value) %>%
mutate(ome = paste0(ome, "\n", label),
ome = factor(ome, levels = unique(ome))) %>%
plot_volcano(colors = c("#ff6eff", "#5555ff", "grey")) +
facet_wrap(~ ome, nrow = 1) +
scale_y_continuous(limits = c(0, 30),
sec.axis = sec_axis(trans = ~ 10 ^ (-.),
breaks = 0.05),
expand = expansion(mult = 0.01)) +
labs(x = TeX("$log_2$(fold-change): Male - Female"),
y = TeX("$-log_{10}$(BH-adjusted p-value)")) +
theme(strip.text = element_text(margin = margin(0, 0, 5, 0, "pt")))
# Add feature labels
point_labels <- MvF_volcano$data %>%
filter(adj.P.Val < 0.05) %>%
group_by(ome, contrast, sign_logFC) %>%
slice_min(order_by = adj.P.Val, n = 5) %>%
rbind(filter(MvF_volcano$data,
adj.P.Val < 0.05,
gene_symbol %in% c("Adipoq", "Slc2a4"))) %>%
mutate(SiteID = ifelse(grepl("^PHOSPHO", ome),
paste0(gene_symbol, "-", site),
NA),
feature_label = case_when(
!is.na(site) ~ site,
!is.na(gene_symbol) ~ gene_symbol,
!is.na(entrez_gene) ~ entrez_gene,
!is.na(name_in_figures) ~ name_in_figures,
TRUE ~ feature)) %>%
# Remove certain features from the plot
filter(!(feature_label %in% c("AABR07039356.2", "Prickle2") &
grepl("^TRNSCRPT", ome)),
!(feature_label %in% c("Stn1", "Ccar2") &
grepl("^PROT", ome))) %>%
dplyr::select(ome, contrast, sign_logFC, feature, feature_label,
adj.P.Val, P.Value, logFC) %>%
mutate(feature_label = case_when(
feature_label == "TG 58:34*" ~ "TG 58:4",
feature_label == "PE P-38:5 or PE O-38:6" ~ "PE O-38:6",
feature_label == "Hydroxydodecanoic acid" ~ "FA 12:0-OH",
feature_label == "Hydroxydecanoic acid" ~ "FA 10:0-OH",
TRUE ~ feature_label),
log10_pval = -log10(adj.P.Val)) %>%
arrange(contrast, log10_pval, -P.Value) %>%
group_by(contrast, sign_logFC, ome) %>%
mutate(nudge_y = nudge_volcano_labels(x = log10_pval, offset = 2.2),
nudge_x = sign(logFC) * c(0.5, 1.05) * max(abs(logFC), 3) +
2 * sign(logFC)) %>%
ungroup()
MvF_volcano <- MvF_volcano +
geom_label_repel(aes(x = logFC, y = -log10(adj.P.Val),
label = feature_label),
data = point_labels,
force = 0,
size = 5 / .pt,
fill = alpha("white", 0.7),
label.padding = 0.12,
label.size = 0.2,
segment.size = 0.3,
box.padding = 0.15,
min.segment.length = unit(0, "pt"),
nudge_x = point_labels$nudge_x,
nudge_y = point_labels$nudge_y,
max.overlaps = Inf)
MvF_volcano
ggsave(file.path("..", "..", "plots", "volcano_MvF_SED_all_omes.pdf"),
MvF_volcano, height = 2.3, width = 6.5, units = "in",
dpi = 400, bg = "white")
## Create volcano plots for each ome
omes <- c("TRNSCRPT", "PROT", "PHOSPHO", "METAB")
features <- c("Transcripts", "Proteins", "Phosphosites", "Metabolites")
for (i in seq_along(omes)) {
# Get DEA results
features <- features[i]
ome <- omes[i]
message(ome)
dea_res <- map(all_DA[[i]], .f = update_dea_res)
## Trained vs SED ------------------------------------------------------------
message("Timewise")
train_res <- dea_res$trained_vs_SED %>%
mutate(contrast = factor(
contrast, levels = unique(contrast),
labels = TeX(
sprintf("(%dW - SED)$_{%s}$",
rep(2 ^ (0:3), times = 2),
rep(c("F", "M"), each = 4))
)
))
switch(ome,
TRNSCRPT = {
features_to_label = data.frame(
feature_label = c(
# Female
## 1W - SED
"Ace", "Smpd3", "Aamdc", "Camk2b", "Acly", "Fasn", "Grb14",
"Olah", "Hmgcs2", "Ca12",
## 2W - SED
"Fasn", "Acly", "Grb14", "Olah", "Hmgcs2", "Ca12",
## 4W - SED
"Fasn", "Aqp3", "Crabp2", "Aacs", "Acly", "Grb14", "Olah",
"Hmgcs2", "Ca12",
## 8W - SED
"Crabp2", "Fads2", "Acaca", "Fasn", "Acaca", "Endou", "Orm1",
"Acly", "Grb14", "Olah", "Hmgcs2", "Ca12",
# Male
## 1W - SED
"Acot1", "Adcy3", "Hif1a", "Steap4", "Fasn", "Acly", "Grb14",
"Olah", "Hmgcs2", "Ca12",
## 2W - SED
"Kdr", "Elovl6", "Pdgfrb", "Hif1a", "Fasn", "Acly", "Grb14",
"Olah", "Hmgcs2", "Ca12",
## 4W - SED
"Kcnj13", "Ctsz", "Nkap", "Fasn", "Acly", "Grb14", "Olah",
"Hmgcs2", "Ca12",
## 8W - SED
"RT1-A1", "Ctsz", "Atp11a", "Lpcat1", "Fasn", "Acly", "Grb14",
"Olah", "Hmgcs2", "Ca12"),
contrast = rep(levels(train_res$contrast),
c(10, 6, 9, 12,
10, 10, 9, 10))) %>%
mutate(contrast = factor(contrast,
levels = levels(train_res$contrast)))
},
PROT = {
features_to_label <- data.frame(
feature_label = c(
# Female
## 1W - SED
"Camk2b", "Ace", "Maip", "Atp6v0d1", "Immt", "Slc25a11",
"Slc25a3", "Slc25a15", "Tmed2", "Sec61a1", "Lrpprc", "Slc25a15",
"Grb14", "Orm1",
## 2W - SED
"Camk2b", "Ace", "Lrg1", "Maip", "Atp6v0d1", "Immt", "Slc25a11",
"Slc25a3", "Slc25a15", "Tmed2", "Sec61a1", "Lrpprc", "Slc25a15",
"Grb14", "Orm1",
## 4W - SED
"Lifr", "Orm1", "Ltbp2", "Il1r2", "Maip", "Atp6v0d1", "Immt",
"Slc25a11", "Slc25a3", "Slc25a15", "Tmed2", "Sec61a1", "Lrpprc",
"Slc25a15", "Grb14",
## 8W - SED
"Lifr", "Endou", "Arg1", "Orm1", "Tgfbr3", "Hspa1b", "Maip",
"Atp6v0d1", "Immt", "Slc25a11", "Slc25a3", "Slc25a15", "Tmed2",
"Sec61a1", "Lrpprc", "Slc25a15", "Grb14",
# Male
## 1W - SED
"Tie1", "Hdac4", "Fth1", "Maip", "Immt", "Slc25a11", "Slc25a3",
"Slc25a15", "Lrpprc", "Slc25a15", "Grb14", "Orm1",
## 2W - SED
"Fth1", "Cuta", "Hsd17b12", "Glyctk", "Hdac4", "Maip", "Immt",
"Slc25a11", "Slc25a3", "Slc25a15", "Lrpprc", "Slc25a15", "Grb14",
"Orm1",
## 4W - SED
"Fth1", "Hebp2", "Crabp1", "Slc27a1", "Maoa", "Rbp1", "Hsd17b12",
"Akr1d1", "Nos1", "Maip", "Immt", "Slc25a11", "Slc25a3",
"Slc25a15", "Lrpprc", "Slc25a15", "Grb14", "Orm1",
## 8W - SED
"Hacl1", "Hsph1", "Hspa1b", "Lep", "Sparc", "Maip", "Immt",
"Slc25a11", "Slc25a3", "Slc25a15", "Lrpprc", "Slc25a15",
"Grb14", "Orm1"),
contrast = rep(levels(train_res$contrast),
c(14, 15, 15, 17,
12, 14, 18, 14))) %>%
mutate(contrast = factor(contrast,
levels = levels(train_res$contrast)))
},
METAB = {
metab_features <- train_res %>%
filter(adj.P.Val < 0.05) %>%
dplyr::select(name_in_figures, P.Value, adj.P.Val, contrast) %>%
distinct() %>%
group_by(name_in_figures) %>%
filter(all(levels(train_res$contrast)[1:4] %in% contrast) |
all(levels(train_res$contrast)[5:8] %in% contrast)) %>%
group_by(name_in_figures, contrast) %>%
slice_min(P.Value, n = 1, with_ties = FALSE) %>%
mutate(sex_female = grepl("F", contrast)) %>%
group_by(name_in_figures, sex_female) %>%
filter(n() == 4) %>%
group_by(name_in_figures, sex_female) %>%
summarise(mean_padj = mean(-log10(adj.P.Val))) %>%
group_by(sex_female) %>%
slice_max(mean_padj, n = 5) %>%
pull(name_in_figures) %>%
unique()
features_to_label <- train_res %>%
filter(adj.P.Val < 0.05) %>%
filter(name_in_figures %in% metab_features) %>%
dplyr::select(feature_label = name_in_figures, contrast) %>%
distinct() %>%
mutate(feature_label = sub(" or P[EC] ", "\\|\\|", feature_label))
},
PHOSPHO = {
features_to_label <- train_res %>%
mutate(feature_label = paste0(gene_symbol, "-", site)) %>%
filter(feature_label %in%
c("Pde4a-S140", "Pde4b-S118", "Ankrd2-S317",
"Lrrfip1-S257", "Lrrfip1-S232", "Lrrfip1-S85",
"Camk2b-T398", "Htt-S621", "Klc1-S521;S524",
"Uhrf1bp1l-S418", "Bnip3-T66", "Ulf1-S458")) %>%
filter(adj.P.Val < 0.05) %>%
dplyr::select(-feature) %>%
distinct() %>%
group_by(contrast, feature_label) %>%
slice_min(order_by = P.Value, n = 1, with_ties = FALSE) %>%
dplyr::select(feature_label, contrast)
}
)
train_labels <- train_res %>%
filter(adj.P.Val < 0.05) %>%
add_column(site = NA,
name_in_figures = NA,
gene_symbol = NA,
.name_repair = "unique") %>%
mutate(name_in_figures = sub(" or P[EC] ", "\\|\\|", name_in_figures),
feature_label = case_when(
!is.na(site) ~ paste(gene_symbol, site, sep = "-"),
!is.na(name_in_figures) ~ name_in_figures,
TRUE ~ gene_symbol
)) %>%
dplyr::select(contrast, feature_label, adj.P.Val,
log10_pval, P.Value, logFC) %>%
distinct() %>%
inner_join(features_to_label,
by = c("contrast", "feature_label")) %>%
group_by(contrast, feature_label) %>%
slice_min(order_by = adj.P.Val, n = 1) %>%
ungroup() %>%
arrange(contrast, log10_pval, -P.Value) %>%
group_by(contrast, sign(logFC)) %>%
mutate(nudge_y = nudge_volcano_labels(
x = log10_pval,
offset = 0.4 + 0.1 * (ome == "TRNSCRPT") - 0.05 * (ome == "PHOSPHO")
) - 0.25 * (n() == 1),
nudge_x = sign(logFC) *
rescale(rank(abs(logFC)),
to = c(0.6, 1.5) * max(abs(logFC), 3))) %>%
ungroup() %>%
group_by(contrast) %>%
mutate(nudge_x = nudge_x - (ome == "METAB") * 1 * min(abs(logFC)))
## Volcano plots
v_train <- plot_volcano(train_res) +
facet_wrap(~ contrast, nrow = 1,
scales = "free_x",
labeller = label_parsed) +
labs(x = TeX("$log_2$(fold-change)"),
y = TeX("$-log_{10}$(BH-adjusted p-value)"))
# Add annotations for number of up, down, or NS features
v_train <- v_train +
geom_label(data = distinct(train_res, contrast, label),
aes(label = label, x = -Inf, y = Inf),
size = 5 / .pt,
label.size = NA,
label.padding = unit(4, "pt"),
fill = alpha("white", 0.5),
# label.r = unit(1.5, "pt"),
hjust = 0.05, vjust = 0) +
coord_cartesian(clip = "off")
# Set axis limits
if (ome != "TRNSCRPT") {
v_train <- v_train +
scale_x_continuous(breaks = seq(-8, 8, 2),
expand = expansion(mult = 0.05,
add = 0.5))
}
if (ome == "METAB") {
v_train <- v_train +
scale_y_continuous(limits = c(0, 6),
expand = expansion(mult = 1e-2),
sec.axis = sec_axis(trans = ~ 10 ^ (-.),
breaks = 0.05))
}
v_train <- v_train +
geom_label_repel(aes(x = logFC, y = log10_pval,
label = feature_label),
data = train_labels,
force = 0 + 0.5 * (ome == "METAB"),
direction = "x",
size = 5 / .pt, # (5 - 0.7 * (ome == "METAB")) / .pt,
fill = alpha("white", 0.65),
label.padding = 0.10,
label.size = 0.16,
segment.size = 0.3,
box.padding = 0.13,
min.segment.length = unit(0, "pt"),
nudge_x = train_labels$nudge_x,
nudge_y = train_labels$nudge_y +
case_when(ome == "TRNSCRPT" ~ 0.6, # 0.4
ome == "PHOSPHO" ~ 0.6, # 0.4
ome == "PROT" ~ 0.6, # 0.5
ome == "METAB" ~ 0.8), # 0.6
seed = 0,
max.overlaps = Inf) +
theme(panel.grid.major = element_blank())
v_train # display plot
ggsave(file.path("..", "..", "plots",
sprintf("volcano_%s_trained_vs_SED.pdf", ome)),
v_train, height = 55, width = 180, units = "mm", family = "ArialMT")
}
sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] scales_1.3.0 ggrepel_0.9.5
#> [3] ggplot2_3.5.1 latex2exp_0.9.6
#> [5] purrr_1.0.2 tibble_3.2.1
#> [7] tidyr_1.3.1 dplyr_1.1.4
#> [9] MotrpacRatTraining6moWAT_1.0.1 Biobase_2.64.0
#> [11] BiocGenerics_0.50.0 MotrpacRatTraining6moWATData_2.0.0
#>
#> loaded via a namespace (and not attached):
#> [1] RColorBrewer_1.1-3 rstudioapi_0.16.0 jsonlite_1.8.8
#> [4] shape_1.4.6.1 magrittr_2.0.3 ggbeeswarm_0.7.2
#> [7] rmarkdown_2.26 GlobalOptions_0.1.2 fs_1.6.4
#> [10] zlibbioc_1.50.0 ragg_1.3.0 vctrs_0.6.5
#> [13] memoise_2.0.1 base64enc_0.1-3 rstatix_0.7.2
#> [16] htmltools_0.5.8.1 dynamicTreeCut_1.63-1 curl_5.2.1
#> [19] broom_1.0.5 Formula_1.2-5 sass_0.4.9
#> [22] bslib_0.7.0 htmlwidgets_1.6.4 desc_1.4.3
#> [25] impute_1.78.0 cachem_1.0.8 lifecycle_1.0.4
#> [28] iterators_1.0.14 pkgconfig_2.0.3 Matrix_1.7-0
#> [31] R6_2.5.1 fastmap_1.1.1 GenomeInfoDbData_1.2.12
#> [34] clue_0.3-65 digest_0.6.35 colorspace_2.1-0
#> [37] patchwork_1.2.0 AnnotationDbi_1.65.2 S4Vectors_0.42.0
#> [40] textshaping_0.3.7 Hmisc_5.1-2 RSQLite_2.3.6
#> [43] ggpubr_0.6.0 filelock_1.0.3 fansi_1.0.6
#> [46] httr_1.4.7 abind_1.4-5 compiler_4.4.0
#> [49] withr_3.0.0 bit64_4.0.5 doParallel_1.0.17
#> [52] htmlTable_2.4.2 backports_1.4.1 BiocParallel_1.38.0
#> [55] carData_3.0-5 DBI_1.2.2 ggsignif_0.6.4
#> [58] rjson_0.2.21 tools_4.4.0 vipor_0.4.7
#> [61] foreign_0.8-86 beeswarm_0.4.0 msigdbr_7.5.1
#> [64] nnet_7.3-19 glue_1.7.0 grid_4.4.0
#> [67] checkmate_2.3.1 cluster_2.1.6 fgsea_1.30.0
#> [70] generics_0.1.3 gtable_0.3.5 preprocessCore_1.66.0
#> [73] data.table_1.15.4 WGCNA_1.72-5 car_3.1-2
#> [76] utf8_1.2.4 XVector_0.44.0 foreach_1.5.2
#> [79] pillar_1.9.0 stringr_1.5.1 babelgene_22.9
#> [82] limma_3.60.0 circlize_0.4.16 splines_4.4.0
#> [85] BiocFileCache_2.12.0 lattice_0.22-6 survival_3.5-8
#> [88] bit_4.0.5 tidyselect_1.2.1 GO.db_3.19.1
#> [91] ComplexHeatmap_2.20.0 locfit_1.5-9.9 Biostrings_2.72.0
#> [94] knitr_1.46 gridExtra_2.3 IRanges_2.38.0
#> [97] edgeR_4.2.0 stats4_4.4.0 xfun_0.43
#> [100] statmod_1.5.0 matrixStats_1.3.0 stringi_1.8.3
#> [103] UCSC.utils_1.0.0 yaml_2.3.8 evaluate_0.23
#> [106] codetools_0.2-20 cli_3.6.2 ontologyIndex_2.12
#> [109] rpart_4.1.23 systemfonts_1.0.6 munsell_0.5.1
#> [112] jquerylib_0.1.4 Rcpp_1.0.12 GenomeInfoDb_1.40.0
#> [115] dbplyr_2.5.0 png_0.1-8 fastcluster_1.2.6
#> [118] parallel_4.4.0 pkgdown_2.0.9 blob_1.2.4
#> [121] crayon_1.5.2 GetoptLong_1.0.5 rlang_1.1.3
#> [124] cowplot_1.1.3 fastmatch_1.1-4 KEGGREST_1.44.0