Filter edge sets to largest trajectories — filter_edge_sets_by

Keep the edges of the top trajectories of an edge set of a graphical solution.

Usage

filter_edge_sets_by_trajectories(
  edge_sets = MotrpacRatTraining6moData::GRAPH_COMPONENTS$edge_sets,
  topk = 5,
  min_path_size = 5
)

Arguments

edge_sets: A named list of string vectors. The name of an edge is node_id---node_id edges with no analytes have a NULL set (a set of size zero, but are still represented), node ids are [timepoints]_F[x]_M[y] where x and y represent the up/down state in each sex. GRAPH_COMPONENTS$edge_sets by default.
topk: A number. The maximal number of full trajectories to include in the new solution.
min_path_size: An integer specifying the minimal path size to be considered.

Value

A named list of edge sets. All possible edges in our 9 x 4 grid will appear in the solution. Edges that are removed will have no features/analytes in their entry.

Examples

if (FALSE) { # \dontrun{
### Example 1: Simulate data with a single cluster
zcolnames = c(
  paste("female",c("1w","2w","4w","8w"),sep="_"),
  paste("male",c("1w","2w","4w","8w"),sep="_")
)
zscores = matrix(rnorm(80000),ncol=8,dimnames = list(1:10000,zcolnames))
# now add a cluster with a strong signal and rerun
zscores[1:500,1:4] = zscores[1:500,1:4] + 5

# run the clustering solution wrapper
clustering_sol = bayesian_graphical_clustering(zscores)

# extract the top full trajectories in the data
# these should be the clusters with at least 10 features
min_cluster_size=10
get_trajectory_sizes_from_edge_sets(clustering_sol$edge_sets, min_size = min_cluster_size)

# extract the edges of the top two full trjectories
# this step "cleans" the edge sets by removing edges of trajectories with very few features
top2traj_edge_sets = filter_edge_sets_by_trajectories(clustering_sol$edge_sets,
                                                      topk = 2,
                                                      min_path_size = 10)
# examine the new edge set sizes, excluded edges should have zero size
sapply(top2traj_edge_sets,length)
# for comparison examine the edge sets of the Bayesian clustering solution:
sapply(clustering_sol$edge_sets,length)
} # }

### Example 2: Use published data
# Get edges corresponding to 5 largest trajectories in the liver
tissue_edge_sets = limit_sets_by_regex(MotrpacRatTraining6moData::GRAPH_COMPONENTS$edge_sets,
                                       "LIVER")
res = filter_edge_sets_by_trajectories(tissue_edge_sets)
lapply(res[1:10], head)
#> $`0w---1w_F-1_M-1`
#> character(0)
#> 
#> $`0w---1w_F-1_M0`
#> character(0)
#> 
#> $`0w---1w_F-1_M1`
#> character(0)
#> 
#> $`0w---1w_F0_M-1`
#> character(0)
#> 
#> $`0w---1w_F0_M0`
#> [1] "ACETYL;LIVER;NP_001004250.1_K85k"  "ACETYL;LIVER;NP_001005875.1_K52k" 
#> [3] "ACETYL;LIVER;NP_001006996.1_K136k" "ACETYL;LIVER;NP_001007667.1_K213k"
#> [5] "ACETYL;LIVER;NP_001007804.1_K126k" "ACETYL;LIVER;NP_001009600.1_K134k"
#> 
#> $`0w---1w_F0_M1`
#> [1] "ACETYL;LIVER;AP_004896.1_K54k"     "ACETYL;LIVER;NP_001004085.1_K395k"
#> [3] "ACETYL;LIVER;NP_001004258.1_K350k" "ACETYL;LIVER;NP_001005550.1_K311k"
#> [5] "ACETYL;LIVER;NP_001006967.1_K158k" "ACETYL;LIVER;NP_001006971.1_K158k"
#> 
#> $`0w---1w_F1_M-1`
#> character(0)
#> 
#> $`0w---1w_F1_M0`
#> character(0)
#> 
#> $`0w---1w_F1_M1`
#> [1] "ACETYL;LIVER;NP_001013175.1_K397k" "ACETYL;LIVER;NP_001014181.2_K32k" 
#> [3] "ACETYL;LIVER;NP_001094009.1_K125k" "ACETYL;LIVER;NP_001099389.1_K85k" 
#> [5] "ACETYL;LIVER;NP_001101226.1_K384k" "ACETYL;LIVER;NP_001101972.2_K531k"
#> 
#> $`1w_F-1_M-1---2w_F-1_M-1`
#> character(0)
#>