Skip to contents

Load feature annotation

Usage

load_feature_annotation(assay, scratchdir = ".")

Arguments

assay

character, assay abbreviation, one of MotrpacRatTraining6moData::ASSAY_ABBREV

scratchdir

character, local directory in which to download data from Google Cloud Storage. Current working directory by default. Not used if assay is "TRNSCRPT", "IMMUNO", or "METAB".

Value

data frame with one row per feature with unique metadata

Examples

head(load_feature_annotation("UBIQ", scratchdir="/tmp"))
#>       protein_id                 redundant_ids is_contaminant peptide_score
#> 1 XP_017456475.1 XP_017447817.1|XP_017456475.1          FALSE    0.04957858
#> 2 XP_017456475.1 XP_017456475.1|XP_017447817.1          FALSE    0.05035247
#> 3 XP_017456475.1 XP_017456475.1|XP_017447817.1          FALSE    0.03248863
#> 4 XP_017456475.1 XP_017456475.1|XP_017447817.1          FALSE    0.04591368
#> 5 XP_017456475.1 XP_017447817.1|XP_017456475.1          FALSE    0.06269592
#> 6 XP_017456475.1 XP_017447817.1|XP_017456475.1          FALSE    0.03202049
#>                  sequence     organism_name               ptm_id
#> 1           LMIPAVTkANSGR RATTUS NORVEGICUS  XP_017456475.1_K72k
#> 2           VTGIPTPVVkFYR RATTUS NORVEGICUS XP_017456475.1_K136k
#> 3     ATSTADLLVQGEEVVPAkK RATTUS NORVEGICUS XP_017456475.1_K202k
#> 4          TkTIVSTAQISETR RATTUS NORVEGICUS XP_017456475.1_K205k
#> 5               kIEAHFDAR RATTUS NORVEGICUS XP_017456475.1_K224k
#> 6 SIATVEMVIDGATGQLPHkTPPR RATTUS NORVEGICUS XP_017456475.1_K251k
#>                                    ptm_peptide confident_score confident_site
#> 1            XP_017456475.1_K72k-LMIPAVTkANSGR          99.000           TRUE
#> 2           XP_017456475.1_K136k-VTGIPTPVVkFYR          99.000           TRUE
#> 3     XP_017456475.1_K202k-ATSTADLLVQGEEVVPAkK           1.636           TRUE
#> 4          XP_017456475.1_K205k-TkTIVSTAQISETR          99.000           TRUE
#> 5               XP_017456475.1_K224k-kIEAHFDAR          99.000           TRUE
#> 6 XP_017456475.1_K251k-SIATVEMVIDGATGQLPHkTPPR          99.000           TRUE
#>   tissue assay
#> 1  HEART  UBIQ
#> 2  HEART  UBIQ
#> 3  HEART  UBIQ
#> 4  HEART  UBIQ
#> 5  HEART  UBIQ
#> 6  HEART  UBIQ
head(load_feature_annotation("TRNSCRPT", scratchdir="/tmp"))
#>          seqname  source feature start   end score strand frame
#> 1 AABR07022258.1 ensembl    gene   405   848     .      -     .
#> 2 AABR07022620.1 ensembl    gene   122   427     .      -     .
#> 3 AABR07022926.1 ensembl    gene    18    85     .      +     .
#> 4 AABR07024031.1 ensembl    gene 18673 58717     .      -     .
#> 5 AABR07024032.1 ensembl    gene 17425 17528     .      -     .
#> 6 AABR07024040.1 ensembl    gene  4281  5213     .      +     .
#>              gene_id gene_version      gene_name gene_source   gene_biotype
#> 1 ENSRNOG00000055633            1   LOC100910067     ensembl protein_coding
#> 2 ENSRNOG00000058846            1 AABR07022620.1     ensembl protein_coding
#> 3 ENSRNOG00000055717            1 AABR07022926.1     ensembl          miRNA
#> 4 ENSRNOG00000017648            7         Vom2r8     ensembl protein_coding
#> 5 ENSRNOG00000056404            1        RF00026     ensembl          snRNA
#> 6 ENSRNOG00000061350            1   LOC103690271     ensembl protein_coding
head(load_feature_annotation("METAB"))
#> Using FEATURE_TO_GENE_FILT and METAB_FEATURE_ID_MAP
#>          feature_ID kegg_id          dataset metabolite_refmet
#> 1 1-Methylhistidine  C01152   metab-t-amines 1-Methylhistidine
#> 2 1-methyladenosine  C02494 metab-u-hilicpos 1-Methyladenosine
#> 3 1-methyladenosine  C02494 metab-u-hilicpos 1-Methyladenosine
#> 4 1-methyladenosine  C02494 metab-u-hilicpos 1-Methyladenosine
#> 5 1-methyladenosine  C02494 metab-u-hilicpos 1-Methyladenosine
#> 6 1-methyladenosine  C02494 metab-u-hilicpos 1-Methyladenosine
#>   feature_ID_sample_data     feature_ID_da feature_ID_metareg  dataset_metareg
#> 1      1-Methylhistidine 1-Methylhistidine  1-Methylhistidine   metab-t-amines
#> 2      1-methyladenosine 1-methyladenosine  1-methyladenosine metab-u-hilicpos
#> 3      1-methyladenosine 1-methyladenosine  1-methyladenosine metab-u-hilicpos
#> 4      1-methyladenosine 1-methyladenosine  1-methyladenosine metab-u-hilicpos
#> 5      1-methyladenosine 1-methyladenosine  1-methyladenosine metab-u-hilicpos
#> 6      1-methyladenosine 1-methyladenosine  1-methyladenosine metab-u-hilicpos
#>     rt       mz neutral_mass    formula
#> 1 4.10 340.6000     169.0851  C7H11N3O2
#> 2 7.63 282.1189     281.1124 C11H15N5O4
#> 3 7.66 282.1191     281.1124 C11H15N5O4
#> 4 7.63 282.1191     281.1124 C11H15N5O4
#> 5 7.64 282.1192     281.1124 C11H15N5O4
#> 6 7.67 282.1190     281.1124 C11H15N5O4
#>                                                    tissue
#> 1 BAT,HEART,HIPPOC,KIDNEY,LIVER,LUNG,PLASMA,SKM-GN,WAT-SC
#> 2                                            ADRNL,VENACV
#> 3                                                     BAT
#> 4                                                   COLON
#> 5                                            CORTEX,OVARY
#> 6                                                   HEART
head(load_feature_annotation("IMMUNO"))
#> Using FEATURE_TO_GENE_FILT and IMMUNO_NORM_DATA_FLAT
#>    feature_ID                   dataset entrez_gene rgd_gene gene_symbol
#> 1        ACTH             rat-pituitary      282839   628649        Mc2r
#> 2 ADIPONECTIN               ADIPONECTIN      246253   628748      Adipoq
#> 3      AMYLIN             rat-metabolic       24476     2854        Iapp
#> 4        BDNF rat-myokine,rat-pituitary       24225     2202        Bdnf
#> 5   C Peptide             rat-metabolic       24505     2915        Ins1
#> 6         EGF             rat-mag27plex       25313     2542         Egf
#>    old_gene_symbol       ensembl_gene
#> 1             <NA> ENSRNOG00000016681
#> 2 Acdc;Acrp30;Adid ENSRNOG00000001821
#> 3              DAP ENSRNOG00000012417
#> 4        MGC105254 ENSRNOG00000047466
#> 5             <NA> ENSRNOG00000012052
#> 6     LOC103691699 ENSRNOG00000053979