vignettes/PHOSPHO_KSEA.Rmd
PHOSPHO_KSEA.Rmd
library(MotrpacRatTraining6moWATData)
library(MotrpacRatTraining6moWAT)
library(dplyr)
library(tidyr)
library(tibble)
library(purrr)
library(stringr)
library(data.table)
library(fgsea)
# Reformat DEA results
human_res <- PHOSPHO_DA %>%
map(function(res_i) {
filter(res_i, !is.na(human_uniprot)) %>%
mutate(num_sites = str_count(human_site, ";") + 1) %>%
separate_rows(human_site) %>% # single-site-level data
mutate(human_feature = paste0(human_uniprot, "_", human_site)) %>%
dplyr::select(contrast, human_feature, logFC, P.Value, num_sites) %>%
distinct()
})
map(human_res, head)
#> $trained_vs_SED
#> # A tibble: 6 × 5
#> contrast human_feature logFC P.Value num_sites
#> <fct> <chr> <dbl> <dbl> <dbl>
#> 1 F_1W - F_SED Q9UHL9_S448 -0.105 0.669 1
#> 2 F_1W - F_SED P02545_S12 0.189 0.344 1
#> 3 F_1W - F_SED P02545_S143 0.157 0.448 1
#> 4 F_1W - F_SED P02545_S153 -0.0221 0.937 1
#> 5 F_1W - F_SED P02545_S17 0.0838 0.607 1
#> 6 F_1W - F_SED P02545_S18 -0.0247 0.834 1
#>
#> $MvF_SED
#> # A tibble: 6 × 5
#> contrast human_feature logFC P.Value num_sites
#> <fct> <chr> <dbl> <dbl> <dbl>
#> 1 M_SED - F_SED Q9UHL9_S448 -0.123 0.657 1
#> 2 M_SED - F_SED P02545_S12 0.193 0.378 1
#> 3 M_SED - F_SED P02545_S143 -0.789 0.00131 1
#> 4 M_SED - F_SED P02545_S153 -0.0890 0.800 1
#> 5 M_SED - F_SED P02545_S17 -0.366 0.0401 1
#> 6 M_SED - F_SED P02545_S18 -0.161 0.216 1
#>
#> $MvF_exercise_response
#> # A tibble: 6 × 5
#> contrast human_feature logFC P.Value num_sites
#> <fct> <chr> <dbl> <dbl> <dbl>
#> 1 (M_1W - M_SED) - (F_1W - F_SED) Q9UHL9_S448 0.289 0.474 1
#> 2 (M_1W - M_SED) - (F_1W - F_SED) P02545_S12 -0.339 0.264 1
#> 3 (M_1W - M_SED) - (F_1W - F_SED) P02545_S143 0.130 0.689 1
#> 4 (M_1W - M_SED) - (F_1W - F_SED) P02545_S153 -0.355 0.431 1
#> 5 (M_1W - M_SED) - (F_1W - F_SED) P02545_S17 -0.0562 0.817 1
#> 6 (M_1W - M_SED) - (F_1W - F_SED) P02545_S18 -0.0172 0.923 1
# List of substrate sites by kinase (379 kinases before filtering)
KS_sets <- PSP_KINASE_SUBSTRATE %>%
transmute(kinase = GENE,
substrate = paste0(SUB_ACC_ID, "_", SUB_MOD_RSD)) %>%
# Filter to what is in the DEA results
filter(substrate %in% human_res$MvF_SED$human_feature) %>%
group_by(kinase) %>%
summarise(substrate = list(substrate)) %>%
deframe()
head(KS_sets)
#> $AAK1
#> [1] "Q96CW1_T156"
#>
#> $ABL1
#> [1] "P49023_Y118" "P46109_Y207" "Q03135_Y14" "Q14498_Y95" "P16333_Y105"
#> [6] "P46108_Y251" "P52566_Y130"
#>
#> $AKT1
#> [1] "O60343_S318" "Q2PPJ7_T715" "P19634_S703" "Q8TDN4_T415" "P62136_T320"
#> [6] "P98177_S262" "P08727_S35" "Q9Y3M2_S20" "P53396_S455" "O60825_S466"
#> [11] "Q07352_S92" "Q14315_S2233" "P98177_T32" "Q13131_S496" "P04150_S134"
#> [16] "P14618_S97" "Q53EL6_S457" "P26373_S106" "Q96F86_S161" "Q99623_S91"
#> [21] "Q9UBP6_S27" "P49815_S981" "Q12778_S256" "P53396_T447" "O60343_S570"
#> [26] "P46937_S127" "P45985_S80" "Q96QB1_S766" "Q16875_S461" "Q12778_T24"
#> [31] "P26358_S143" "Q92934_S99" "O60825_S483" "Q9BZQ8_S602" "P09651_S199"
#> [36] "P42858_S419" "P49840_S21" "P04792_S82" "O15151_S367" "Q7Z5H3_S16"
#> [41] "P02545_S404" "Q3V6T2_S1417" "Q13045_S436" "P19634_S796" "Q9UN36_T348"
#> [46] "P29474_S1177" "P49815_S939" "P49841_S9" "Q9UN36_S332" "Q9H6Z4_S126"
#> [51] "Q96B36_T246" "Q5XUX0_S33" "O43524_T32" "O60343_S341" "P98177_S197"
#> [56] "Q12778_S319" "P15056_S365" "P62753_S236" "Q9P0V3_S246" "O00429_S616"
#> [61] "P54253_S775" "Q9BRS8_S451" "P14618_S202" "P21333_S2152" "O60343_T642"
#> [66] "P08670_S39" "P20749_S41" "O60343_S588" "P46527_S10" "Q96QB1_S1004"
#> [71] "Q9UNN5_S582" "Q2PPJ7_S486" "O60331_S555" "P35222_S552" "P02545_S301"
#> [76] "Q9Y4P1_S34" "Q8WX93_S1118" "Q9Y2I7_S307" "Q7Z6J0_S304" "O43524_S253"
#> [81] "P05787_S432" "Q7Z589_S209" "Q15121_S116"
#>
#> $AKT2
#> [1] "Q9Y3M2_S20" "Q14315_S2233" "Q96F86_S161" "Q04656_S1466" "Q99623_S91"
#> [6] "Q9Y4H2_S306" "O60331_S555" "P35222_S552" "Q9Y4H2_S577" "Q7Z6J0_S304"
#>
#> $AKT3
#> [1] "Q9UKV8_S387" "P15056_S365" "Q7Z589_S209"
#>
#> $ALK
#> [1] "P40763_Y705" "P50552_Y39" "Q06124_Y542"
# How many substrates are in PSP?
table(unique(human_res$MvF_SED$human_feature) %in% unlist(KS_sets))
#>
#> FALSE TRUE
#> 18019 1118
# FALSE TRUE
# 18019 1118
Only about 5.8% of the substrates in the phosphoproteomics data are in PhosphoSitePlus.
# How many will pass the size filter?
KS_sets <- KS_sets[lengths(KS_sets) >= 3]
length(KS_sets) # 121 kinases
#> [1] 121
# Removing small kinase sets drops some substrate sites
table(unique(human_res$MvF_SED$human_feature) %in% unlist(KS_sets))
#>
#> FALSE TRUE
#> 18063 1074
# FALSE TRUE
# 18063 1074
## Conversion vectors for KSEA leadingEdge
# Uniprot to gene symbol sites
human_uniprot_to_symbol <- PSP_KINASE_SUBSTRATE %>%
transmute(across(c(SUB_ACC_ID, SUB_GENE),
~ paste0(.x, "_", SUB_MOD_RSD))) %>%
distinct() %>%
deframe()
head(human_uniprot_to_symbol)
#> P05198_S52 P05198_S49 Q9UQL6_S259 P18433-2_S204 P10415_S70
#> "EIF2S1_S52" "EIF2S1_S49" "HDAC5_S259" "PTPRA_S204" "BCL2_S70"
#> P61978_S302
#> "HNRNPK_S302"
# Map human to rat sites (single-site level)
human_to_rat <- fData(PHOSPHO_EXP) %>%
filter(!is.na(human_uniprot)) %>%
separate_rows(site, human_site, sep = ";") %>%
transmute(human = paste0(human_uniprot, "_", human_site),
rat = paste0(gene_symbol, "_", site)) %>%
distinct() %>%
deframe()
head(human_to_rat)
#> Q9UHL9_S448 P02545_S12 P02545_S143 P02545_S153 P02545_S17
#> "Gtf2ird1_S448" "Lmna_S12" "Lmna_S143" "Lmna_S153" "Lmna_S17"
#> P02545_S18
#> "Lmna_S18"
# KSEA
PHOSPHO_KSEA <- map(human_res, function(res_i)
{
rank_list <- rank_genes(res_i, genes = "human_feature")
map(names(rank_list), function(contr_i) {
set.seed(0)
fgseaMultilevel(pathways = KS_sets,
stats = rank_list[[contr_i]],
minSize = 3,
nproc = 1, nPermSimple = 10000) %>%
mutate(contrast = contr_i)
}) %>%
rbindlist() %>%
mutate(padj = p.adjust(pval, method = "BH"),
contrast = factor(contrast, levels = unique(contrast)),
gs_subcat = "kinase",
leadingEdge_symbol = map(.x = leadingEdge,
.f = ~ human_uniprot_to_symbol[.x]),
leadingEdge_rno = map(.x = leadingEdge,
.f = ~ human_to_rat[.x])) %>%
dplyr::rename(kinase = pathway) %>%
relocate(contrast, .before = leadingEdge) %>%
dplyr::select(-gs_subcat)
})
# Save
usethis::use_data(PHOSPHO_KSEA, internal = FALSE,
overwrite = TRUE, version = 3, compress = "bzip2")
sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] fgsea_1.30.0 data.table_1.15.4
#> [3] stringr_1.5.1 purrr_1.0.2
#> [5] tibble_3.2.1 tidyr_1.3.1
#> [7] dplyr_1.1.4 MotrpacRatTraining6moWAT_1.0.1
#> [9] Biobase_2.64.0 BiocGenerics_0.50.0
#> [11] MotrpacRatTraining6moWATData_2.0.0
#>
#> loaded via a namespace (and not attached):
#> [1] RColorBrewer_1.1-3 rstudioapi_0.16.0 jsonlite_1.8.8
#> [4] shape_1.4.6.1 magrittr_2.0.3 ggbeeswarm_0.7.2
#> [7] rmarkdown_2.26 GlobalOptions_0.1.2 fs_1.6.4
#> [10] zlibbioc_1.50.0 ragg_1.3.0 vctrs_0.6.5
#> [13] memoise_2.0.1 base64enc_0.1-3 rstatix_0.7.2
#> [16] htmltools_0.5.8.1 dynamicTreeCut_1.63-1 curl_5.2.1
#> [19] broom_1.0.5 Formula_1.2-5 sass_0.4.9
#> [22] bslib_0.7.0 htmlwidgets_1.6.4 desc_1.4.3
#> [25] impute_1.78.0 cachem_1.0.8 lifecycle_1.0.4
#> [28] iterators_1.0.14 pkgconfig_2.0.3 Matrix_1.7-0
#> [31] R6_2.5.1 fastmap_1.1.1 GenomeInfoDbData_1.2.12
#> [34] clue_0.3-65 digest_0.6.35 colorspace_2.1-0
#> [37] patchwork_1.2.0 AnnotationDbi_1.65.2 S4Vectors_0.42.0
#> [40] textshaping_0.3.7 Hmisc_5.1-2 RSQLite_2.3.6
#> [43] ggpubr_0.6.0 filelock_1.0.3 latex2exp_0.9.6
#> [46] fansi_1.0.6 httr_1.4.7 abind_1.4-5
#> [49] compiler_4.4.0 withr_3.0.0 bit64_4.0.5
#> [52] doParallel_1.0.17 htmlTable_2.4.2 backports_1.4.1
#> [55] BiocParallel_1.38.0 carData_3.0-5 DBI_1.2.2
#> [58] ggsignif_0.6.4 rjson_0.2.21 tools_4.4.0
#> [61] vipor_0.4.7 foreign_0.8-86 beeswarm_0.4.0
#> [64] msigdbr_7.5.1 nnet_7.3-19 glue_1.7.0
#> [67] grid_4.4.0 checkmate_2.3.1 cluster_2.1.6
#> [70] generics_0.1.3 gtable_0.3.5 preprocessCore_1.66.0
#> [73] WGCNA_1.72-5 car_3.1-2 utf8_1.2.4
#> [76] XVector_0.44.0 foreach_1.5.2 pillar_1.9.0
#> [79] babelgene_22.9 limma_3.60.0 circlize_0.4.16
#> [82] splines_4.4.0 BiocFileCache_2.12.0 lattice_0.22-6
#> [85] survival_3.5-8 bit_4.0.5 tidyselect_1.2.1
#> [88] GO.db_3.19.1 ComplexHeatmap_2.20.0 locfit_1.5-9.9
#> [91] Biostrings_2.72.0 knitr_1.46 gridExtra_2.3
#> [94] IRanges_2.38.0 edgeR_4.2.0 stats4_4.4.0
#> [97] xfun_0.43 statmod_1.5.0 matrixStats_1.3.0
#> [100] stringi_1.8.3 UCSC.utils_1.0.0 yaml_2.3.8
#> [103] evaluate_0.23 codetools_0.2-20 cli_3.6.2
#> [106] ontologyIndex_2.12 rpart_4.1.23 systemfonts_1.0.6
#> [109] munsell_0.5.1 jquerylib_0.1.4 Rcpp_1.0.12
#> [112] GenomeInfoDb_1.40.0 dbplyr_2.5.0 png_0.1-8
#> [115] fastcluster_1.2.6 parallel_4.4.0 pkgdown_2.0.9
#> [118] ggplot2_3.5.1 blob_1.2.4 scales_1.3.0
#> [121] crayon_1.5.2 GetoptLong_1.0.5 rlang_1.1.3
#> [124] cowplot_1.1.3 fastmatch_1.1-4 KEGGREST_1.44.0