Filter edge sets to largest trajectories
Source:R/bayesian_graphical_clustering.R
filter_edge_sets_by_trajectories.Rd
Keep the edges of the top trajectories of an edge set of a graphical solution.
Usage
filter_edge_sets_by_trajectories(
edge_sets = MotrpacRatTraining6moData::GRAPH_COMPONENTS$edge_sets,
topk = 5,
min_path_size = 5
)
Arguments
- edge_sets
A named list of string vectors. The name of an edge is
node_id---node_id
edges with no analytes have a NULL set (a set of size zero, but are still represented), node ids are[timepoints]_F[x]_M[y]
wherex
andy
represent the up/down state in each sex.GRAPH_COMPONENTS$edge_sets
by default.- topk
A number. The maximal number of full trajectories to include in the new solution.
- min_path_size
An integer specifying the minimal path size to be considered.
Value
A named list of edge sets. All possible edges in our 9 x 4 grid will appear in the solution. Edges that are removed will have no features/analytes in their entry.
Examples
if (FALSE) { # \dontrun{
### Example 1: Simulate data with a single cluster
zcolnames = c(
paste("female",c("1w","2w","4w","8w"),sep="_"),
paste("male",c("1w","2w","4w","8w"),sep="_")
)
zscores = matrix(rnorm(80000),ncol=8,dimnames = list(1:10000,zcolnames))
# now add a cluster with a strong signal and rerun
zscores[1:500,1:4] = zscores[1:500,1:4] + 5
# run the clustering solution wrapper
clustering_sol = bayesian_graphical_clustering(zscores)
# extract the top full trajectories in the data
# these should be the clusters with at least 10 features
min_cluster_size=10
get_trajectory_sizes_from_edge_sets(clustering_sol$edge_sets, min_size = min_cluster_size)
# extract the edges of the top two full trjectories
# this step "cleans" the edge sets by removing edges of trajectories with very few features
top2traj_edge_sets = filter_edge_sets_by_trajectories(clustering_sol$edge_sets,
topk = 2,
min_path_size = 10)
# examine the new edge set sizes, excluded edges should have zero size
sapply(top2traj_edge_sets,length)
# for comparison examine the edge sets of the Bayesian clustering solution:
sapply(clustering_sol$edge_sets,length)
} # }
### Example 2: Use published data
# Get edges corresponding to 5 largest trajectories in the liver
tissue_edge_sets = limit_sets_by_regex(MotrpacRatTraining6moData::GRAPH_COMPONENTS$edge_sets,
"LIVER")
res = filter_edge_sets_by_trajectories(tissue_edge_sets)
lapply(res[1:10], head)
#> $`0w---1w_F-1_M-1`
#> character(0)
#>
#> $`0w---1w_F-1_M0`
#> character(0)
#>
#> $`0w---1w_F-1_M1`
#> character(0)
#>
#> $`0w---1w_F0_M-1`
#> character(0)
#>
#> $`0w---1w_F0_M0`
#> [1] "ACETYL;LIVER;NP_001004250.1_K85k" "ACETYL;LIVER;NP_001005875.1_K52k"
#> [3] "ACETYL;LIVER;NP_001006996.1_K136k" "ACETYL;LIVER;NP_001007667.1_K213k"
#> [5] "ACETYL;LIVER;NP_001007804.1_K126k" "ACETYL;LIVER;NP_001009600.1_K134k"
#>
#> $`0w---1w_F0_M1`
#> [1] "ACETYL;LIVER;AP_004896.1_K54k" "ACETYL;LIVER;NP_001004085.1_K395k"
#> [3] "ACETYL;LIVER;NP_001004258.1_K350k" "ACETYL;LIVER;NP_001005550.1_K311k"
#> [5] "ACETYL;LIVER;NP_001006967.1_K158k" "ACETYL;LIVER;NP_001006971.1_K158k"
#>
#> $`0w---1w_F1_M-1`
#> character(0)
#>
#> $`0w---1w_F1_M0`
#> character(0)
#>
#> $`0w---1w_F1_M1`
#> [1] "ACETYL;LIVER;NP_001013175.1_K397k" "ACETYL;LIVER;NP_001014181.2_K32k"
#> [3] "ACETYL;LIVER;NP_001094009.1_K125k" "ACETYL;LIVER;NP_001099389.1_K85k"
#> [5] "ACETYL;LIVER;NP_001101226.1_K384k" "ACETYL;LIVER;NP_001101972.2_K531k"
#>
#> $`1w_F-1_M-1---2w_F-1_M-1`
#> character(0)
#>