supp_table_1 <-
setNames(
vector(mode = "list", length = 2),
c("Column_descriptions", "Results")
)
supp_table_1[[2]] <-
leafcutter_list$`in-house`$cluster_significance %>%
dplyr::select(comparison, cluster, status, loglr, df, p, p.adjust) %>%
dplyr::inner_join(
intron_list[[2]] %>%
dplyr::select(
comparison, cluster_id,
chr = seqnames, start, end, strand, logef,
deltapsi, Control, PD, PDD, DLB,
gene_name_junc, gene_id_junc, junc_in_ref, junc_cat
) %>%
tidyr::unnest(gene_name_junc, gene_id_junc) %>%
dplyr::group_by(
comparison, cluster_id, chr, start, end, strand,
logef, deltapsi, Control, PD, PDD, DLB,
junc_in_ref, junc_cat
) %>%
# Unlist list components of columns
dplyr::summarise(
gene_name_junc = toString(gene_name_junc),
gene_id_junc = toString(gene_id_junc)
),
by = c("comparison", "cluster" = "cluster_id")
) %>%
dplyr::mutate(comparison = comparison %>%
fct_relevel(
.,
fct_disease
)) %>%
dplyr::arrange(comparison, p.adjust)
supp_table_1[[1]] <-
tibble(
`Column name` = colnames(supp_table_1[[2]]),
Description = c(
"Pairwise comparison in question. Results extracted with the name group[1]_vs_group[2], where group[1] is the baseline/reference value and group[2] is in reference to the baseline value.",
"Cluster ID, assigned by Leafcutter during intron clustering",
"Whether this cluster was a) successfully tested b) not tested for some reason (e.g. too many introns) c) there was an error during testing (this is rare)",
"Log likelihood ratio between the null model (no difference between the groups) and alternative (there is a difference)",
"Degrees of freedom, equal to the number of introns in the cluster minus one",
"The resulting unadjusted p-value under the asymptotic Chi-squared distribution",
"FDR-adjusted p-value",
"Chromosome on which intron is located",
"Start co-ordinate of the intron",
"End co-ordinate of the intron. To ensure optimal mapping to reference annotation this is the equivalent of the intron end outputted by Leafcutter - 1 bp",
"Strand on which intron is located",
"Log effect size (as fitted by LeafCutter)",
"Delta percent-spliced-in (PSI), calculated by taking the difference between the PSI of group[2] - group[1]",
"Fitted usage proportion in the control group in the relevant comparison",
"Fitted usage proportion in the PD group in the relevant comparison",
"Fitted usage proportion in the PDD group in the relevant comparison",
"Fitted usage proportion in the DLB group in the relevant comparison",
"Logical value designating whether junction and the intron it represents is present in reference annotation",
"Junction category assigned",
"HGNC symbol of the gene the junction overlaps",
"Ensembl ID of the gene the junction overlaps"
)
)
supp_table_2 <-
setNames(
vector(mode = "list", length = 3),
c("Column_descriptions", "DS", "DS_replication")
)
supp_table_2[2:3] <-
ewce %>%
lapply(., function(df) {
df %>%
dplyr::inner_join(ct_class) %>%
dplyr::select(comparison_type, GeneSet,
specificity_matrix = Study, cell_type = class,
p, fold_change, sd_from_mean, FDR = FDR.p
) %>%
dplyr::arrange(comparison_type, GeneSet, specificity_matrix)
})
supp_table_2[[1]] <-
tibble(
`Column name` = colnames(supp_table_2[[2]]),
Description = c(
"Whether pairwise comparisons involve comparisons of diseased individuals to control individuals (Ref: control) or to other diseased individuals (Ref: disease)",
"Set of genes run in EWCE analysis",
"Disease group used to generate specificity values for EWCE analysis",
"Cell type",
"Probability of cellular enrichment",
"Expression in the target gene list divided by the mean level of expression in the bootstrap samples",
"The distance (in standard deviations) of the target list from the mean of the bootstrap sample",
"FDR-adjusted p-value"
)
)
supp_table_3 <-
setNames(
vector(mode = "list", length = 2),
c("Column_descriptions", "Results")
)
supp_table_3[[2]] <-
cluster_compare_reduced %>%
dplyr::select(
dataset, comparison,
n_input = n_genes, go_type,
parent_term = parent_id, parent_description = parent_term,
child_to_parent_sim_score = parent_sim_score,
child_term = go_id, child_description = Description,
GeneRatio, term_size, overlap = Count, pvalue,
FDR = p.adjust, overlapping_genes = geneID
) %>%
dplyr::arrange(dataset, comparison, go_type, parent_term, -child_to_parent_sim_score)
supp_table_3[[1]] <-
tibble(
`Column name` = colnames(supp_table_3[[2]]),
Description = c(
"Dataset on which differential splicing was performed i.e. our own data or from the replication dataset from recount2",
"Set of differentially spliced genes run in pathway analysis",
"Size of input gene list",
"Domain of Gene Ontology from which child and parent term are derived (BP = biological process; CC = cellular component; MF = molecular function)",
"ID of the parent GO term",
"Description of the parent GO term",
"Child to parent semantic similarity score",
"ID of the enriched GO term",
"Description of the enriched GO term",
"Proportion of genes in the input list that are annotated to the GO term (i.e. intersection/input list)",
"The number of genes in the GO term",
"Intersection size",
"P-value of enrichment using a hypergeometric test",
"FDR-corrected p-value",
"The gene IDs found overlapping between the tested set of differentially expressed genes and the child GO term"
)
)