Aims: to determine overlap of junctions and clusters derived from our own data and SRP058181
source(here::here("R", "file_paths.R"))
source(here::here("R", "convert_leafcutter.R"))
clu <-
setNames(
list(
fread(
file.path(
path_to_results,
"leafcutter/intron_clustering/tissue_polyA_test_diseasegroups_perind_numers.counts.gz"
)
),
fread(
file.path(
path_to_results,
"SRP058181/leafcutter/intron_clustering/test_diseasegroups_perind_numers.counts.gz"
)
)
),
c("own", "SRP058181")
) %>%
lapply(., function(x){
x %>%
dplyr::rename(intron = V1) %>%
convert_leafcutter(leafcutter_results = .,
use_strand = TRUE) %>%
dasper::annotate_junc_ref(junc_metadata = .,
gtf = path_to_ref_gtf)
})
saveRDS(
clu$own,
file =
file.path(
path_to_results,
"leafcutter/intron_clustering/tissue_polyA_all_clusters_gtfannotated.Rds"
)
)
saveRDS(
clu$SRP058181,
file =
file.path(
path_to_results,
"SRP058181/leafcutter/intron_clustering/SRP058181_all_clusters_gtfannotated.Rds"
)
)
# Import annotated clusters
clu <-
setNames(
list(
readRDS(
file.path(
path_to_results,
"leafcutter/intron_clustering/tissue_polyA_all_clusters_gtfannotated.Rds"
)
),
readRDS(
file.path(
path_to_results,
"SRP058181/leafcutter/intron_clustering/SRP058181_all_clusters_gtfannotated.Rds"
)
)
),
c("own", "SRP058181")
)
# Check that majority of clu are annotated
clu %>%
lapply(., function(x){
x %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n()) %>%
dplyr::ungroup() %>%
dplyr::mutate(prop = n/sum(n)) %>%
dplyr::arrange(-prop)
})
## $own
## # A tibble: 7 x 3
## junc_cat n prop
## <chr> <int> <dbl>
## 1 annotated 96622 0.634
## 2 novel_acceptor 18186 0.119
## 3 novel_donor 12927 0.0849
## 4 none 10988 0.0721
## 5 novel_exon_skip 9255 0.0608
## 6 ambig_gene 2903 0.0191
## 7 novel_combo 1417 0.00930
##
## $SRP058181
## # A tibble: 7 x 3
## junc_cat n prop
## <chr> <int> <dbl>
## 1 annotated 87133 0.676
## 2 novel_acceptor 13479 0.105
## 3 novel_donor 11717 0.0910
## 4 novel_exon_skip 7924 0.0615
## 5 none 4820 0.0374
## 6 ambig_gene 2544 0.0198
## 7 novel_combo 1183 0.00918
Final analysis included in paper: cluster-level overlaps using differentially spliced clusters and the "exact match" definition (most stringent, but also makes the most sense)
# Find junctions that overlap
overlapped_junctions <-
GenomicRanges::findOverlaps(query = clu$SRP058181,
subject = clu$own,
ignore.strand = F,
type = "equal")
# Absolute numbers
print(str_c("Total number of junctions found in own data: ", length(clu$own)))
## [1] "Total number of junctions found in own data: 152298"
print(str_c("Total number of junctions found in SRP058181: ", length(clu$SRP058181)))
## [1] "Total number of junctions found in SRP058181: 128800"
print(str_c("Number of intersecting junctions: ", c(overlapped_junctions %>% subjectHits() %>% unique() %>% length())))
## [1] "Number of intersecting junctions: 107703"
# Percentage overlap from smaller SRP058181 dataset into larger
print(str_c("Percentage overlap from smaller SRP058181 dataset into larger: ",
c((overlapped_junctions %>% queryHits() %>% unique() %>% length())/(clu$SRP058181 %>% length()) * 100)))
## [1] "Percentage overlap from smaller SRP058181 dataset into larger: 83.6203416149068"
# Percentage overlap from larger own data into smaller SRP058181
print(str_c("Percentage overlap from larger own data into smaller SRP058181: ",
c((overlapped_junctions %>% subjectHits() %>% unique() %>% length())/(clu$own %>% length()) * 100)))
## [1] "Percentage overlap from larger own data into smaller SRP058181: 70.7185911830753"
plot <-
clu$SRP058181[queryHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n(),
prop_overlap = n/length(clu$SRP058181)) %>%
dplyr::mutate(overlap = "SRP058181 into own data") %>%
dplyr::bind_rows(clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n(),
prop_overlap = n/length(clu$own)) %>%
dplyr::mutate(overlap = "Own data into SRP058181")) %>%
dplyr::filter(junc_cat != "ambig_gene") %>%
dplyr::mutate(junc_cat = junc_cat %>% factor(levels = c("annotated", "novel_acceptor", "novel_donor", "novel_exon_skip", "novel_combo", "none"))) %>%
# ggplot(aes(x = junc_cat, y = prop_overlap, fill = junc_cat, alpha = overlap)) +
ggplot(aes(x = junc_cat, y = prop_overlap, alpha = overlap)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0), colour = "black") +
# facet_wrap(vars(overlap)) +
labs(x = "", y = "Proportion of overlapping junctions") +
# scale_fill_manual(values = c(pal_npg(palette = c("nrc"), alpha = 1)(10)[c(4,2,1,3)], "#808080")) +
scale_alpha_manual(name = "Direction of overlap",
values = c(1, 0.5)) +
guides(fill = FALSE,
alpha=guide_legend(nrow=2,byrow=TRUE)) +
theme_rhr +
theme(legend.position = "bottom")
plot
Figure 4.1: Overlap of junctions within Leafcutter-defined intron clusters from our own dataset into the recount dataset, SRP058181, and vice-versa.
# ggsave(plot,
# filename = "/home/rreynolds/projects/Aim2_PDsequencing_wd/figures/Departmental/SRP058181_junction_overlap.tiff",
# device = "tiff",
# height = 100,
# width = 100,
# dpi = 200,
# units = "mm")
# Import leafcutter lists
leafcutter_list <-
setNames(
list(
readRDS(
file.path(
path_to_results,
"leafcutter/diff_splicing/allcomparisons_leafcutter_ds_SexRINAoD.Rds"
)
),
readRDS(
file.path(
path_to_results,
"SRP058181/leafcutter/diff_splicing/allcomparisons_leafcutter_ds_RINAoD.Rds"
)
)
),
c("own", "SRP058181")
)
# Import differentially spliced clusters (no dPSI filter, yet)
ds <-
leafcutter_list %>%
lapply(., function(x){
x$significant_clusters_0.05_filter %>%
dplyr::mutate(chr = chr %>%
str_replace(., "chr", ""),
cluster_id = str_c(chr, ":", cluster)) %>%
dplyr::select(comparison, chr, cluster, cluster_id, everything()) %>%
# Filtering for only those comparisons that are comparable between the two datasets
# I.e. Control_vs_PD, Control_vs_PDD, PD_vs_PDD
dplyr::filter(comparison %in% c("Control_vs_PD", "Control_vs_PDD", "PD_vs_PDD"))
})
# Filter by differentially spliced clusters
ds_clu <-
setNames(list(clu$own[elementMetadata(clu$own)[, "cluster_id"] %in%
c(ds$own$cluster_id %>% unique())],
clu$SRP058181[elementMetadata(clu$SRP058181)[, "cluster_id"] %in%
c(ds$SRP058181$cluster_id %>% unique())]),
c("own", "SRP058181"))
# Find junctions that overlap
overlapped_junctions <-
GenomicRanges::findOverlaps(query = ds_clu$SRP058181,
subject = ds_clu$own,
ignore.strand = F,
type = "equal")
# Absolute numbers
print(str_c("Total number of junctions within ds clusters in own data: ", length(ds_clu$own)))
## [1] "Total number of junctions within ds clusters in own data: 15876"
print(str_c("Total number of junctions within ds clusters in SRP058181: ", length(ds_clu$SRP058181)))
## [1] "Total number of junctions within ds clusters in SRP058181: 2992"
print(str_c("Number of intersecting junctions: ", c(overlapped_junctions %>% subjectHits() %>% unique() %>% length())))
## [1] "Number of intersecting junctions: 726"
# Percentage overlaps
print(str_c("Percentage overlap of junctions within differentially spliced cluster from SRP058181 into own data: ",
c((overlapped_junctions %>% queryHits() %>% unique() %>% length())/(ds_clu$SRP058181 %>% length()) * 100)))
## [1] "Percentage overlap of junctions within differentially spliced cluster from SRP058181 into own data: 24.2647058823529"
print(str_c("Percentage overlap of junctions within differentially spliced cluster from own data into smaller SRP058181: ",
c((overlapped_junctions %>% subjectHits() %>% unique() %>% length())/(ds_clu$own %>% length()) * 100)))
## [1] "Percentage overlap of junctions within differentially spliced cluster from own data into smaller SRP058181: 4.572940287226"
ds_clu$SRP058181[queryHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n(),
prop_overlap = n/length(ds_clu$SRP058181)) %>%
dplyr::mutate(overlap = "SRP058181 into own data") %>%
dplyr::bind_rows(ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n(),
prop_overlap = n/length(ds_clu$own)) %>%
dplyr::mutate(overlap = "Own data into SRP058181")) %>%
dplyr::filter(junc_cat != "ambig_gene") %>%
dplyr::mutate(junc_cat = junc_cat %>% factor(levels = c("annotated", "novel_acceptor", "novel_donor", "novel_exon_skip", "novel_combo", "none"))) %>%
# ggplot(aes(x = junc_cat, y = prop_overlap, fill = junc_cat, alpha = overlap)) +
ggplot(aes(x = junc_cat, y = prop_overlap, alpha = overlap)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0), colour = "black") +
# facet_wrap(vars(overlap)) +
labs(x = "", y = "Proportion of overlapping junctions") +
# scale_fill_manual(values = c(pal_npg(palette = c("nrc"), alpha = 1)(10)[c(4,2,1,3)], "#808080")) +
scale_alpha_manual(name = "Direction of overlap",
values = c(1, 0.5)) +
guides(fill = FALSE,
alpha=guide_legend(nrow=2,byrow=TRUE)) +
theme_rhr +
theme(legend.position = "bottom")
Figure 5.1: Overlap of junctions within differentially spliced intron clusters from our own dataset with junctions within differentially spliced intron clusters from the recount dataset, SRP058181, and vice-versa.
# Number of genes
ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
.[["gene_name_junc"]] %>%
unlist() %>%
unique() %>%
length()
## [1] 104
# Gene names
ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
.[["gene_name_junc"]] %>%
unlist() %>%
unique() %>%
sort()
## [1] "ABCD4" "ACAA1" "ACBD4" "ACSBG1" "ACTR8"
## [6] "ACYP2" "ADAM15" "AGRN" "AKAP8L" "AL139260.1"
## [11] "ANK2" "AP3S1" "ARMC10" "ART3" "ASPH"
## [16] "BCAS1" "BRSK2" "C1orf61" "CAMK2B" "CAPN10"
## [21] "CDK5RAP1" "CEP290" "CERS4" "CHKA" "CIRBP"
## [26] "CLK4" "COMMD4" "CSAD" "CSPG5" "DAZAP1"
## [31] "DHX30" "DOCK10" "DPH7" "DST" "EIF2D"
## [36] "ELOVL2" "ENY2" "EPB41L2" "EPB41L3" "ETNPPL"
## [41] "FAM168A" "FASTK" "FBRSL1" "FBXO7" "FYN"
## [46] "GDPD5" "GGT5" "GPBP1" "IL17RB" "ILK"
## [51] "IQSEC1" "KMT2E" "LIMCH1" "LUC7L3" "MARK2"
## [56] "MBP" "METTL26" "MOK" "MRNIP" "MTX2"
## [61] "MVD" "MYL6" "MYO6" "NPL" "NSFL1C"
## [66] "PCM1" "PCNX2" "PICALM" "PIP5K1C" "PPIE"
## [71] "PPP1R3F" "PREPL" "PRMT7" "PRPF38B" "PTPN12"
## [76] "R3HDM1" "R3HDM2" "RBM3" "RBM39" "RHOT1"
## [81] "RPAIN" "SBDSP1" "SEPT4" "SEPT8" "SERPINH1"
## [86] "SFXN5" "SHTN1" "SLC25A23" "SLC25A27" "SLC30A3"
## [91] "SMARCD3" "SUZ12P1" "SYNGAP1" "TAZ" "TESPA1"
## [96] "TIAL1" "TJP1" "TMEM144" "TMEM161B-AS1" "TMEM165"
## [101] "TPD52L1" "TRIM33" "TSPOAP1" "UQCRB"
# Read in PD genes
PD_genes <-
setNames(vector(mode = "list", length = 2),
c("mendelian", "sporadic"))
PD_genes$mendelian <-
read_delim(
file.path(
path_to_raw_data,
"misc/PD_risk_genes/20191128_GE_ParkinsonDiseaseComplexParkinsonism_greengenes.tsv"
),
delim = "\t"
) %>%
dplyr::rename(Gene = `Gene Symbol`)
PD_genes$sporadic <-
read_excel(
file.path(
path_to_raw_data,
"misc/PD_risk_genes/TableS6_Complete_summary_statistics_for_QTL_Mendelian_randomization.xlsx"
)
) %>%
dplyr::filter(`Pass Bonferroni` == "pass")
# Check for overlaps
ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::filter(gene_name_junc %in% unique(c(PD_genes$mendelian$Gene, PD_genes$sporadic$Gene))) %>%
.[["gene_name_junc"]] %>%
unlist() %>%
unique() %>%
sort()
## [1] "FBRSL1" "FBXO7"
# Construct contingency table of differential splicing
# Extract cluster_significance from corresponding leafcutter lists
clu_significance <-
leafcutter_list%>%
lapply(., function(x){
x$cluster_significance %>%
dplyr::filter(comparison %in% c("Control_vs_PD", "Control_vs_PDD", "PD_vs_PDD"),
!str_detect(genes, ",")) %>%
dplyr::mutate(cluster_id = str_replace(cluster, "chr", "")) %>%
dplyr::select(comparison, cluster_id, everything(), -cluster)
})
cont_table <- setNames(vector(mode = "list", length = length(clu)),
names(clu))
for(i in 1:length(clu)){
cont_table[[i]] <- mcols(clu[[i]])[c("cluster_id", "gene_name_junc")] %>%
as_tibble() %>%
dplyr::filter(cluster_id %in% c(clu_significance[[names(clu[i])]] %>%
dplyr::filter(status == "Success") %>%
.[["cluster_id"]] %>%
unique())) %>%
tidyr::unnest(gene_name_junc) %>%
dplyr::distinct(cluster_id, gene_name_junc) %>%
dplyr::group_by(cluster_id) %>%
dplyr::filter(n() <= 1) %>%
dplyr::mutate(ds = case_when(cluster_id %in% c(clu_significance[[names(clu[i])]] %>%
dplyr::filter(p.adjust < 0.05) %>%
.[["cluster_id"]] %>%
unique()) ~ "yes",
TRUE~ "no"),
PD_gene = case_when(gene_name_junc %in%
unique(c(PD_genes$mendelian$Gene, PD_genes$sporadic$Gene)) ~ "yes",
TRUE ~ "no")) %>%
with(., table(ds, PD_gene, dnn = c("Differentially spliced?", "Overlapping PD gene?")))
}
cont_table
## $own
## Overlapping PD gene?
## Differentially spliced? no yes
## no 29336 398
## yes 2867 50
##
## $SRP058181
## Overlapping PD gene?
## Differentially spliced? no yes
## no 23551 342
## yes 521 6
# Fisher's exact test
cont_table %>%
lapply(., function(x) x %>% fisher.test())
## $own
##
## Fisher's Exact Test for Count Data
##
## data: .
## p-value = 0.09563
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.9357161 1.7329065
## sample estimates:
## odds ratio
## 1.285453
##
##
## $SRP058181
##
## Fisher's Exact Test for Count Data
##
## data: .
## p-value = 0.7112
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.2876699 1.7558886
## sample estimates:
## odds ratio
## 0.7930489
ds_overlap <- ds$own %>%
dplyr::filter(cluster_id %in% c(ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
.[["cluster_id"]] %>%
unique())) %>%
dplyr::select(chr, start, end, comparison, cluster_id, loglr, df, Control, PD, PDD, deltapsi, direction_of_effect) %>%
dplyr::full_join(ds$SRP058181 %>%
dplyr::filter(cluster_id %in% c(ds_clu$SRP058181[queryHits(overlapped_junctions)] %>%
as.data.frame() %>%
.[["cluster_id"]] %>%
unique())) %>%
dplyr::select(chr, start, end, comparison, cluster_id, loglr, df, Control, PD, PDD, deltapsi, direction_of_effect),
by = c("comparison", "chr", "start", "end"),
suffix = c(".own", ".SRP058181")) %>%
dplyr::mutate(same_direction_of_effect = case_when(direction_of_effect.own == direction_of_effect.SRP058181 ~ TRUE,
direction_of_effect.own != direction_of_effect.SRP058181 ~ FALSE,
TRUE ~ NA)) %>%
dplyr::filter(!is.na(same_direction_of_effect))
ds_overlap %>%
DT::datatable(rownames = FALSE,
options = list(scrollX = TRUE),
class = 'white-space: nowrap')
ds_overlap %>%
dplyr::group_by(comparison) %>%
dplyr::summarise(n = n(),
same_direction_of_effect = sum(same_direction_of_effect)) %>%
dplyr::mutate(prop = (same_direction_of_effect/n) * 100)
ds_overlap %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
dplyr::select(chr, start, end, contains("comparison"), contains("deltapsi")) %>%
tidyr::gather(key = dataset, value = dPSI, deltapsi.own:deltapsi.SRP058181) %>%
dplyr::mutate(dataset = str_replace(dataset, ".*\\.", "")) %>%
ggplot(aes(x = abs(dPSI), fill = dataset)) +
geom_density(alpha = 0.5) +
labs(x = expression("Absolute "*Delta*"PSI")) +
theme_rhr
Figure 5.2: Density plot of absolute delta PSI values for those junctions that (i) overlap between our own data and SRP05181, (ii) are within clusters that are differentially spliced in both datasets across the same comparisons, and (iii) share the same direction of effect.
a <- ds_overlap %>%
ggplot(aes(x = deltapsi.own, y = deltapsi.SRP058181)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(x = expression(Delta*"PSI in own data"), y = expression(Delta*"PSI in SRP058181")) +
facet_zoom(xlim = c(-0.025, 0.025), ylim = c(-0.025, 0.025), zoom.size = 1) +
theme_rhr
b <- ds_overlap %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
ggplot(aes(x = deltapsi.own, y = deltapsi.SRP058181)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(x = expression(Delta*"PSI in own data"), y = expression(Delta*"PSI in SRP058181")) +
facet_zoom(xlim = c(-0.025, 0.025), ylim = c(-0.025, 0.025), zoom.size = 1) +
theme_rhr
ggarrange(a, b,
ncol = 1,
labels = c("a", "b"))
Figure 5.3: Scatterplot of absolute delta PSI values for those junctions that (a) overlap between our own data and SRP0518181 and are within clusters that are differentially spliced in both datasets across the same comparisons and (b) share the same direction of effect.
# Correlation of junctions within ds clusters from same comparison
ds_overlap %>%
cor.test(~ deltapsi.own + deltapsi.SRP058181, data = ., method = "spearman")
##
## Spearman's rank correlation rho
##
## data: deltapsi.own and deltapsi.SRP058181
## S = 5366852, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5863913
# Correlation of junctions within ds clusters from same comparison and with same direction of effect
ds_overlap %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
cor.test(~ deltapsi.own + deltapsi.SRP058181, data = ., method = "spearman")
##
## Spearman's rank correlation rho
##
## data: deltapsi.own and deltapsi.SRP058181
## S = 291220, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9283554
This section is simply looking to see that junctions found within differentially spliced clusters from one dataset, also validate across in the other dataset (without needing to be differentially spliced in the other, too).
# Find junctions that overlap
overlapped_junctions <-
GenomicRanges::findOverlaps(query = clu$own,
subject = ds_clu$SRP058181,
ignore.strand = F,
type = "equal")
# Absolute numbers
print(str_c("Total number of junctions within ds clusters in SRP058181: ", length(ds_clu$SRP058181)))
## [1] "Total number of junctions within ds clusters in SRP058181: 2992"
print(str_c("Number of intersecting junctions: ", c(overlapped_junctions %>% subjectHits() %>% unique() %>% length())))
## [1] "Number of intersecting junctions: 2678"
# Percentage overlaps
print(str_c("Percentage overlap of junctions within differentially spliced clusters from SRP058181 into own data: ",
c((overlapped_junctions %>% subjectHits() %>% unique() %>% length())/(ds_clu$SRP058181 %>% length()) * 100)))
## [1] "Percentage overlap of junctions within differentially spliced clusters from SRP058181 into own data: 89.5053475935829"
ds_clu$SRP058181[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n(),
prop_overlap = n/length(ds_clu$SRP058181)) %>%
dplyr::filter(junc_cat != "ambig_gene") %>%
dplyr::mutate(junc_cat = junc_cat %>% factor(levels = c("annotated", "novel_acceptor", "novel_donor", "novel_exon_skip", "novel_combo", "none"))) %>%
# ggplot(aes(x = junc_cat, y = prop_overlap, fill = junc_cat)) +
ggplot(aes(x = junc_cat, y = prop_overlap)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0), colour = "black") +
# facet_wrap(vars(overlap)) +
labs(x = "", y = "Proportion of overlapping junctions") +
# scale_fill_manual(values = c(pal_npg(palette = c("nrc"), alpha = 1)(10)[c(4,2,1,3)], "#808080")) +
guides(fill = FALSE,
alpha=guide_legend(nrow=2,byrow=TRUE)) +
theme_rhr +
theme(legend.position = "bottom")
Figure 5.4: Overlap of junctions found within differentially spliced clusters from SRP058181 into own data.
# Find junctions that overlap
overlapped_junctions <-
GenomicRanges::findOverlaps(query = clu$SRP058181,
subject = ds_clu$own,
ignore.strand = F,
type = "equal")
# Absolute numbers
print(str_c("Total number of junctions within ds clusters in own data: ", length(ds_clu$own)))
## [1] "Total number of junctions within ds clusters in own data: 15876"
print(str_c("Number of intersecting junctions: ", c(overlapped_junctions %>% subjectHits() %>% unique() %>% length())))
## [1] "Number of intersecting junctions: 12493"
# Percentage overlaps
print(str_c("Percentage overlap of junctions within differentially spliced cluster from own data into smaller SRP058181: ",
c((overlapped_junctions %>% subjectHits() %>% unique() %>% length())/(ds_clu$own %>% length()) * 100)))
## [1] "Percentage overlap of junctions within differentially spliced cluster from own data into smaller SRP058181: 78.6911060720584"
ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n(),
prop_overlap = n/length(ds_clu$own)) %>%
dplyr::filter(junc_cat != "ambig_gene") %>%
dplyr::mutate(junc_cat = junc_cat %>% factor(levels = c("annotated", "novel_acceptor", "novel_donor", "novel_exon_skip", "novel_combo", "none"))) %>%
# ggplot(aes(x = junc_cat, y = prop_overlap, fill = junc_cat)) +
ggplot(aes(x = junc_cat, y = prop_overlap)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0), colour = "black") +
# facet_wrap(vars(overlap)) +
labs(x = "", y = "Proportion of overlapping junctions") +
scale_fill_manual(values = c(pal_npg(palette = c("nrc"), alpha = 1)(10)[c(4,2,1,3)], "#808080")) +
guides(fill = FALSE,
alpha=guide_legend(nrow=2,byrow=TRUE)) +
theme_rhr +
theme(legend.position = "bottom")
Figure 5.5: Overlap of junctions found within differentially spliced clusters from our own dataset into the recount dataset, SRP058181.
# Import leafcutter lists
leafcutter_list <-
setNames(
list(
readRDS(
file.path(
path_to_results,
"leafcutter/diff_splicing_PCaxes/allcomparisons_leafcutter_ds_PCaxes.Rds")
),
readRDS(
file.path(
path_to_results,
"SRP058181/leafcutter/diff_splicing_celltype/allcomparisons_leafcutter_ds_celltype.Rds"
)
)
),
c("own", "SRP058181")
)
# Import differentially spliced clusters (no dPSI filter, yet)
ds <-
leafcutter_list %>%
lapply(., function(x){
x$significant_clusters_0.05_filter %>%
dplyr::mutate(chr = chr %>%
str_replace(., "chr", ""),
cluster_id = str_c(chr, ":", cluster)) %>%
dplyr::select(comparison, chr, cluster, cluster_id, everything()) %>%
# Filtering for only those comparisons that are comparable between the two datasets
# I.e. Control_vs_PD, Control_vs_PDD, PD_vs_PDD
dplyr::filter(comparison %in% c("Control_vs_PD", "Control_vs_PDD", "PD_vs_PDD"))
})
# Filter by differentially spliced clusters
ds_clu <-
setNames(list(clu$own[elementMetadata(clu$own)[, "cluster_id"] %in%
c(ds$own$cluster_id %>% unique())],
clu$SRP058181[elementMetadata(clu$SRP058181)[, "cluster_id"] %in%
c(ds$SRP058181$cluster_id %>% unique())]),
c("own", "SRP058181"))
# Find junctions that overlap
overlapped_junctions <-
GenomicRanges::findOverlaps(query = ds_clu$SRP058181,
subject = ds_clu$own,
ignore.strand = F,
type = "equal")
# Absolute numbers
print(str_c("Total number of junctions within ds clusters in own data: ", length(ds_clu$own)))
## [1] "Total number of junctions within ds clusters in own data: 12755"
print(str_c("Total number of junctions within ds clusters in SRP058181: ", length(ds_clu$SRP058181)))
## [1] "Total number of junctions within ds clusters in SRP058181: 6793"
print(str_c("Number of intersecting junctions: ", c(overlapped_junctions %>% subjectHits() %>% unique() %>% length())))
## [1] "Number of intersecting junctions: 868"
# Percentage overlaps
print(str_c("Percentage overlap of junctions within differentially spliced cluster from SRP058181 into own data: ",
c((overlapped_junctions %>% queryHits() %>% unique() %>% length())/(ds_clu$SRP058181 %>% length()) * 100)))
## [1] "Percentage overlap of junctions within differentially spliced cluster from SRP058181 into own data: 12.7778595613131"
print(str_c("Percentage overlap of junctions within differentially spliced cluster from own data into smaller SRP058181: ",
c((overlapped_junctions %>% subjectHits() %>% unique() %>% length())/(ds_clu$own %>% length()) * 100)))
## [1] "Percentage overlap of junctions within differentially spliced cluster from own data into smaller SRP058181: 6.80517444139553"
ds_clu$SRP058181[queryHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n(),
prop_overlap = n/length(ds_clu$SRP058181)) %>%
dplyr::mutate(overlap = "SRP058181 into own data") %>%
dplyr::bind_rows(ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n(),
prop_overlap = n/length(ds_clu$own)) %>%
dplyr::mutate(overlap = "Own data into SRP058181")) %>%
dplyr::filter(junc_cat != "ambig_gene") %>%
dplyr::mutate(junc_cat = junc_cat %>% factor(levels = c("annotated", "novel_acceptor", "novel_donor", "novel_exon_skip", "novel_combo", "none"))) %>%
# ggplot(aes(x = junc_cat, y = prop_overlap, fill = junc_cat, alpha = overlap)) +
ggplot(aes(x = junc_cat, y = prop_overlap, alpha = overlap)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0), colour = "black") +
# facet_wrap(vars(overlap)) +
labs(x = "", y = "Proportion of overlapping junctions") +
# scale_fill_manual(values = c(pal_npg(palette = c("nrc"), alpha = 1)(10)[c(4,2,1,3)], "#808080")) +
scale_alpha_manual(name = "Direction of overlap",
values = c(1, 0.5)) +
guides(fill = FALSE,
alpha=guide_legend(nrow=2,byrow=TRUE)) +
theme_rhr +
theme(legend.position = "bottom")
Figure 5.6: Overlap of junctions within differentially spliced intron clusters from our own dataset with junctions within differentially spliced intron clusters from the recount dataset, SRP058181, and vice-versa.
# Number of genes
ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
.[["gene_name_junc"]] %>%
unlist() %>%
unique() %>%
length()
## [1] 156
# Gene names
ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
.[["gene_name_junc"]] %>%
unlist() %>%
unique() %>%
sort()
## [1] "ABHD11" "AC002470.2" "ACTR8" "ADRA1B" "AIFM3"
## [6] "AKAP8" "AKAP8L" "AMPD2" "APIP" "ARAP1"
## [11] "ARAP2" "ARHGAP21" "ARHGAP9" "ARHGEF1" "ASIC3"
## [16] "ASPA" "ASPDH" "ASPH" "ATP1A2" "BCAS1"
## [21] "BRWD1" "BTAF1" "C1orf61" "C20orf27" "CAPN2"
## [26] "CARMIL3" "CARNS1" "CAST" "CCDC74A" "CCNL1"
## [31] "CD22" "CDC37" "CDK18" "CLIP2" "CLK4"
## [36] "CNOT3" "COQ9" "CPT1C" "CREBBP" "CSDE1"
## [41] "CTSZ" "CYP2J2" "DBI" "DBNDD1" "DCAF16"
## [46] "DLGAP1-AS4" "DMXL1" "DPH7" "DPY19L2P1" "DRAM2"
## [51] "DTX3" "EIF2D" "ERCC1" "F3" "FAAH"
## [56] "FAM13B" "FAM221A" "FBXO7" "FLAD1" "GABBR1"
## [61] "GGT5" "GORASP1" "HAUS2" "HHATL" "IFT140"
## [66] "IQSEC2" "IZUMO4" "KIAA1755" "L3MBTL2" "LINC00299"
## [71] "LMO3" "LPIN1" "LUC7L3" "MAST1" "MBNL2"
## [76] "MFSD12" "MIA3" "MORN5" "MRPL43" "MRPL51"
## [81] "MSTO1" "MTDH" "MTMR12" "MVD" "NAALAD2"
## [86] "NAP1L4" "NCALD" "NDRG2" "NDUFA6-DT" "NDUFAF6"
## [91] "NELFA" "NETO1" "NSUN5P2" "ODF2" "OGFOD3"
## [96] "OSBPL1A" "P4HA2" "PCYT2" "PIN4" "PLCH2"
## [101] "PLEKHA5" "PLXNB1" "POMT2" "POT1" "PRKAG1"
## [106] "PRKAG2" "PRR14" "PRRC2B" "PRRC2C" "PSMD13"
## [111] "PTPRZ1" "RANGAP1" "RAPGEF1" "RASGRF1" "RASSF8-AS1"
## [116] "RBM25" "RBMX" "RERGL" "RFC5" "RGS20"
## [121] "RHOJ" "RNF212" "ROCK2" "ROGDI" "RPAIN"
## [126] "RPUSD3" "RUFY3" "SCMH1" "SEZ6L2" "SHC4"
## [131] "SLCO1C1" "SNCB" "SNHG29" "SNHG8" "SNX32"
## [136] "SON" "SPATS2L" "SPINDOC" "SPRYD7" "SPSB3"
## [141] "SRCIN1" "SRPK2" "STMN4" "STYXL1" "TMEM167A"
## [146] "TMEM176B" "TMEM63A" "TMEM98" "TSPAN17" "UBA3"
## [151] "UIMC1" "URB1" "VEZT" "XPO1" "ZNF530"
## [156] "ZNF76"
# Check for overlaps
ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::filter(gene_name_junc %in% unique(c(PD_genes$mendelian$Gene, PD_genes$sporadic$Gene))) %>%
.[["gene_name_junc"]] %>%
unlist() %>%
unique() %>%
sort()
## [1] "DCAF16" "FBXO7"
# Construct contingency table of differential splicing
# Extract cluster_significance from corresponding leafcutter lists
clu_significance <-
leafcutter_list%>%
lapply(., function(x){
x$cluster_significance %>%
dplyr::filter(comparison %in% c("Control_vs_PD", "Control_vs_PDD", "PD_vs_PDD"),
!str_detect(genes, ",")) %>%
dplyr::mutate(cluster_id = str_replace(cluster, "chr", "")) %>%
dplyr::select(comparison, cluster_id, everything(), -cluster)
})
cont_table <- setNames(vector(mode = "list", length = length(clu)),
names(clu))
for(i in 1:length(clu)){
cont_table[[i]] <- mcols(clu[[i]])[c("cluster_id", "gene_name_junc")] %>%
as_tibble() %>%
dplyr::filter(cluster_id %in% c(clu_significance[[names(clu[i])]] %>%
dplyr::filter(status == "Success") %>%
.[["cluster_id"]] %>%
unique())) %>%
tidyr::unnest(gene_name_junc) %>%
dplyr::distinct(cluster_id, gene_name_junc) %>%
dplyr::group_by(cluster_id) %>%
dplyr::filter(n() <= 1) %>%
dplyr::mutate(ds = case_when(cluster_id %in% c(clu_significance[[names(clu[i])]] %>%
dplyr::filter(p.adjust < 0.05) %>%
.[["cluster_id"]] %>%
unique()) ~ "yes",
TRUE~ "no"),
PD_gene = case_when(gene_name_junc %in%
unique(c(PD_genes$mendelian$Gene, PD_genes$sporadic$Gene)) ~ "yes",
TRUE ~ "no")) %>%
with(., table(ds, PD_gene, dnn = c("Differentially spliced?", "Overlapping PD gene?")))
}
cont_table
## $own
## Overlapping PD gene?
## Differentially spliced? no yes
## no 29746 404
## yes 2457 44
##
## $SRP058181
## Overlapping PD gene?
## Differentially spliced? no yes
## no 22548 327
## yes 1524 21
# Fisher's exact test
cont_table %>%
lapply(., function(x) x %>% fisher.test())
## $own
##
## Fisher's Exact Test for Count Data
##
## data: .
## p-value = 0.08873
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.9405449 1.8082126
## sample estimates:
## odds ratio
## 1.318531
##
##
## $SRP058181
##
## Fisher's Exact Test for Count Data
##
## data: .
## p-value = 0.9118
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.5784634 1.4824421
## sample estimates:
## odds ratio
## 0.9501564
ds_overlap <- ds$own %>%
dplyr::filter(cluster_id %in% c(ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
.[["cluster_id"]] %>%
unique())) %>%
dplyr::select(chr, start, end, comparison, cluster_id, loglr, df, Control, PD, PDD, deltapsi, direction_of_effect) %>%
dplyr::full_join(ds$SRP058181 %>%
dplyr::filter(cluster_id %in% c(ds_clu$SRP058181[queryHits(overlapped_junctions)] %>%
as.data.frame() %>%
.[["cluster_id"]] %>%
unique())) %>%
dplyr::select(chr, start, end, comparison, cluster_id, loglr, df, Control, PD, PDD, deltapsi, direction_of_effect),
by = c("comparison", "chr", "start", "end"),
suffix = c(".own", ".SRP058181")) %>%
dplyr::mutate(same_direction_of_effect = case_when(direction_of_effect.own == direction_of_effect.SRP058181 ~ TRUE,
direction_of_effect.own != direction_of_effect.SRP058181 ~ FALSE,
TRUE ~ NA)) %>%
dplyr::filter(!is.na(same_direction_of_effect))
ds_overlap %>%
DT::datatable(rownames = FALSE,
options = list(scrollX = TRUE),
class = 'white-space: nowrap')
ds_overlap %>%
dplyr::group_by(comparison) %>%
dplyr::summarise(n = n(),
same_direction_of_effect = sum(same_direction_of_effect)) %>%
dplyr::mutate(prop = (same_direction_of_effect/n) * 100)
ds_overlap %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
dplyr::select(chr, start, end, contains("comparison"), contains("deltapsi")) %>%
tidyr::gather(key = dataset, value = dPSI, deltapsi.own:deltapsi.SRP058181) %>%
dplyr::mutate(dataset = str_replace(dataset, ".*\\.", "")) %>%
ggplot(aes(x = abs(dPSI), fill = dataset)) +
geom_density(alpha = 0.5) +
labs(x = expression("Absolute "*Delta*"PSI")) +
theme_rhr
Figure 5.7: Density plot of absolute delta PSI values for those junctions that (i) overlap between our own data and SRP0518181, (ii) are within clusters that are differentially spliced in both datasets across the same comparisons, and (iii) share the same direction of effect.
a <- ds_overlap %>%
ggplot(aes(x = deltapsi.own, y = deltapsi.SRP058181)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(x = expression(Delta*"PSI in own data"), y = expression(Delta*"PSI in SRP058181")) +
facet_zoom(xlim = c(-0.025, 0.025), ylim = c(-0.025, 0.025), zoom.size = 1) +
theme_rhr
b <- ds_overlap %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
ggplot(aes(x = deltapsi.own, y = deltapsi.SRP058181)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(x = expression(Delta*"PSI in own data"), y = expression(Delta*"PSI in SRP058181")) +
facet_zoom(xlim = c(-0.025, 0.025), ylim = c(-0.025, 0.025), zoom.size = 1) +
theme_rhr
ggarrange(a, b,
ncol = 1,
labels = c("a", "b"))
Figure 5.8: Scatterplot of absolute delta PSI values for those junctions that (a) overlap between our own data and SRP0518181 and are within clusters that are differentially spliced in both datasets across the same comparisons and (b) share the same direction of effect.
# Correlation of junctions within ds clusters from same comparison
ds_overlap %>%
cor.test(~ deltapsi.own + deltapsi.SRP058181, data = ., method = "spearman")
##
## Spearman's rank correlation rho
##
## data: deltapsi.own and deltapsi.SRP058181
## S = 4254074, p-value = 0.02033
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1381891
# Correlation of junctions within ds clusters from same comparison and with same direction of effect
ds_overlap %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
cor.test(~ deltapsi.own + deltapsi.SRP058181, data = ., method = "spearman")
##
## Spearman's rank correlation rho
##
## data: deltapsi.own and deltapsi.SRP058181
## S = 24930, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9112306
This section is simply looking to see that junctions found within differentially spliced clusters from one dataset, also validate across in the other dataset (without needing to be differentially spliced in the other, too).
# Find junctions that overlap
overlapped_junctions <-
GenomicRanges::findOverlaps(query = clu$own,
subject = ds_clu$SRP058181,
ignore.strand = F,
type = "equal")
# Absolute numbers
print(str_c("Total number of junctions within ds clusters in SRP058181: ", length(ds_clu$SRP058181)))
## [1] "Total number of junctions within ds clusters in SRP058181: 6793"
print(str_c("Number of intersecting junctions: ", c(overlapped_junctions %>% subjectHits() %>% unique() %>% length())))
## [1] "Number of intersecting junctions: 5952"
# Percentage overlaps
print(str_c("Percentage overlap of junctions within differentially spliced clusters from SRP058181 into own data: ",
c((overlapped_junctions %>% subjectHits() %>% unique() %>% length())/(ds_clu$SRP058181 %>% length()) * 100)))
## [1] "Percentage overlap of junctions within differentially spliced clusters from SRP058181 into own data: 87.6196084204328"
ds_clu$SRP058181[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n(),
prop_overlap = n/length(ds_clu$SRP058181)) %>%
dplyr::filter(junc_cat != "ambig_gene") %>%
dplyr::mutate(junc_cat = junc_cat %>% factor(levels = c("annotated", "novel_acceptor", "novel_donor", "novel_exon_skip", "novel_combo", "none"))) %>%
# ggplot(aes(x = junc_cat, y = prop_overlap, fill = junc_cat)) +
ggplot(aes(x = junc_cat, y = prop_overlap)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0), colour = "black") +
# facet_wrap(vars(overlap)) +
labs(x = "", y = "Proportion of overlapping junctions") +
# scale_fill_manual(values = c(pal_npg(palette = c("nrc"), alpha = 1)(10)[c(4,2,1,3)], "#808080")) +
guides(fill = FALSE,
alpha=guide_legend(nrow=2,byrow=TRUE)) +
theme_rhr +
theme(legend.position = "bottom")
Figure 5.9: Overlap of junctions found within differentially spliced clusters from SRP058181 into own data.
# Find junctions that overlap
overlapped_junctions <-
GenomicRanges::findOverlaps(query = clu$SRP058181,
subject = ds_clu$own,
ignore.strand = F,
type = "equal")
# Absolute numbers
print(str_c("Total number of junctions within ds clusters in own data: ", length(ds_clu$own)))
## [1] "Total number of junctions within ds clusters in own data: 12755"
print(str_c("Number of intersecting junctions: ", c(overlapped_junctions %>% subjectHits() %>% unique() %>% length())))
## [1] "Number of intersecting junctions: 9993"
# Percentage overlap of junctions within differentially spliced cluster from own data into smaller SRP058181
print(str_c("Percentage overlap of junctions within differentially spliced cluster from own data into smaller SRP058181: ",
c((overlapped_junctions %>% subjectHits() %>% unique() %>% length())/(ds_clu$own %>% length()) * 100)))
## [1] "Percentage overlap of junctions within differentially spliced cluster from own data into smaller SRP058181: 78.3457467659741"
ds_clu$own[subjectHits(overlapped_junctions)] %>%
as.data.frame() %>%
dplyr::group_by(junc_cat) %>%
dplyr::summarise(n = n(),
prop_overlap = n/length(ds_clu$own)) %>%
dplyr::filter(junc_cat != "ambig_gene") %>%
dplyr::mutate(junc_cat = junc_cat %>% factor(levels = c("annotated", "novel_acceptor", "novel_donor", "novel_exon_skip", "novel_combo", "none"))) %>%
# ggplot(aes(x = junc_cat, y = prop_overlap, fill = junc_cat)) +
ggplot(aes(x = junc_cat, y = prop_overlap)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0), colour = "black") +
# facet_wrap(vars(overlap)) +
labs(x = "", y = "Proportion of overlapping junctions") +
scale_fill_manual(values = c(pal_npg(palette = c("nrc"), alpha = 1)(10)[c(4,2,1,3)], "#808080")) +
guides(fill = FALSE,
alpha=guide_legend(nrow=2,byrow=TRUE)) +
theme_rhr +
theme(legend.position = "bottom")
Figure 5.10: Overlap of junctions found within differentially spliced clusters from our own dataset into the recount dataset, SRP058181.
# Find overlapping junctions/clusters
overlapped_junctions <-
GenomicRanges::findOverlaps(query = clu$SRP058181,
subject = clu$own,
ignore.strand = F,
type = "equal")
# Filter by cluster names from overlap
clu_overlap <- clu$own[subjectHits(overlapped_junctions), "cluster_id"] %>%
as_tibble() %>%
dplyr::inner_join(clu$SRP058181[queryHits(overlapped_junctions), "cluster_id"] %>%
as_tibble(),
by = c("seqnames", "start", "end", "strand"),
suffix = c(".own", ".SRP058181"))
# Remove clusters from own that overlap more than one cluster in SRP058181
# I.e. ensure no ambiguous cluster mapping
ambig_clu <- clu_overlap %>%
dplyr::distinct(cluster_id.own, cluster_id.SRP058181) %>%
dplyr::filter(!is.na(cluster_id.SRP058181)) %>%
dplyr::group_by(cluster_id.own) %>%
dplyr::filter(n() > 1)
# Extract non-ambiguous overlapping clusters
overlapping_clusters <- clu_overlap %>%
dplyr::filter(!cluster_id.own %in% unique(ambig_clu$cluster_id.own)) %>%
.[["cluster_id.own"]] %>%
unique()
# Absolute numbers
print(str_c("Number of clusters in own data: ", length(clu$own$cluster_id %>% unique())))
## [1] "Number of clusters in own data: 43544"
print(str_c("Number of clusters in SRP058181: ", length(clu$SRP058181$cluster_id %>% unique())))
## [1] "Number of clusters in SRP058181: 37021"
print(str_c("Number of overlapping clusters between both datasets: ", length(overlapping_clusters)))
## [1] "Number of overlapping clusters between both datasets: 31825"
# Use intron usage, as unlike clu (leafcutter cluster definitions), this will only include successfully tested clusters
# "Successful" test means cluster passed necessary minimum filters for differential splicing (i.e. min_samples_per_intron, min_samples_per_group, min_coverage)
intron_usage <-
leafcutter_list %>%
lapply(., function(x){
x$intron_usage %>%
dplyr::filter(comparison %in% c("Control_vs_PD", "Control_vs_PDD", "PD_vs_PDD")) %>%
tidyr::separate(col = intron, into = c("chr", "start", "end", "cluster"), sep = ":") %>%
tidyr::separate(col = cluster, into = c("clu", "id", "strand"), sep = "_", remove = FALSE) %>%
dplyr::mutate(chr = str_replace(chr, "chr", ""),
cluster_id = str_c(chr, ":", cluster))
})
# Loop through clusters to determine proportion overlap
# Run in parallel
cl <- makeCluster(20)
# Register clusters
registerDoParallel(cl)
getDoParWorkers()
clu_prop_overlap <- foreach(i = 1:length(overlapping_clusters),
.verbose = TRUE,
.packages = c("tidyverse", "stringr")) %dopar% {
# Determine number of junctions in own cluster
n_junctions.own <- intron_usage$own %>%
dplyr::filter(cluster_id == overlapping_clusters[i]) %>%
dplyr::distinct(chr, start, end, strand) %>%
nrow()
# Determine number of junctions in overlap
n_overlap <- clu_overlap %>%
dplyr::filter(cluster_id.own == overlapping_clusters[i],
!is.na(cluster_id.SRP058181)) %>%
nrow()
# Determine number of junctions in SRP058181 cluster
cluster_id.SRP058181 <- clu_overlap %>%
dplyr::filter(cluster_id.own == overlapping_clusters[i],
!is.na(cluster_id.SRP058181)) %>%
.[["cluster_id.SRP058181"]] %>%
unique()
n_junctions.SRP058181 <- intron_usage$SRP058181 %>%
dplyr::filter(cluster_id == cluster_id.SRP058181) %>%
dplyr::distinct(chr, start, end, strand) %>%
nrow()
df <- tibble(cluster_id.own = overlapping_clusters[i],
n_junctions.own = n_junctions.own,
cluster_id.SRP058181 = cluster_id.SRP058181,
n_junctions.SRP058181 = n_junctions.SRP058181,
n_overlap = n_overlap,
prop_overlap.own = (n_overlap/n_junctions.own)*100,
prop_overlap.SRP058181 = (n_overlap/n_junctions.SRP058181)*100)
df
}
# Stop cluster
stopCluster(cl = cl)
clu_prop_overlap <- clu_prop_overlap %>%
qdapTools::list_df2df() %>%
dplyr::select(-X1)
saveRDS(
clu_prop_overlap,
file.path(
path_to_results,
"leafcutter/diff_splicing_PCaxes/cluster_overlap_with_SRP058181.Rds"
)
)
clu_prop_overlap <-
readRDS(
file.path(
path_to_results,
"leafcutter/diff_splicing_PCaxes/cluster_overlap_with_SRP058181.Rds"
)
)
clu_prop_overlap %>%
tidyr::gather(key = "overlap", value = "prop_overlap", contains("prop_overlap")) %>%
dplyr::mutate(overlap = case_when(overlap == "prop_overlap.own" ~ "own --> SRP058181",
overlap == "prop_overlap.SRP058181" ~ "SRP058181 --> own")) %>%
ggplot(aes(x = prop_overlap, fill = overlap)) +
geom_histogram(binwidth = 5, alpha = 0.5, colour = "black") +
facet_wrap(~overlap) +
labs(x = "Proportion overlap of cluster from discovery dataset into validation dataset") +
theme_rhr +
theme(legend.position = "none")
Figure 6.1: Histogram of the proportion overlap between all clusters derived from own data that are also found in SRP058181, and vice versa.
cluster_significance
dataframe for both datasets, which contains the necessary information re. cluster-level differential splicing.# Extract cluster_significance from corresponding leafcutter lists
clu_significance <-
leafcutter_list%>%
lapply(., function(x){
x$cluster_significance %>%
dplyr::filter(comparison %in% c("Control_vs_PD", "Control_vs_PDD", "PD_vs_PDD"),
!str_detect(genes, ",")) %>%
dplyr::mutate(cluster_id = str_replace(cluster, "chr", "")) %>%
dplyr::select(comparison, cluster_id, everything(), -cluster)
})
# Filter cluster overlap proportions to determine clusters that "exactly" match
clu_prop_overlap_exact <- clu_prop_overlap %>%
dplyr::filter(prop_overlap.own != Inf,
prop_overlap.own >= 100,
prop_overlap.own == prop_overlap.SRP058181)
# Filter clu_overlap to include only overlapping clusters that "exactly" match
clu_overlap_exact <- clu_overlap %>%
dplyr::distinct(cluster_id.own, cluster_id.SRP058181) %>%
dplyr::filter(cluster_id.own %in% clu_prop_overlap_exact$cluster_id.own)
print(str_c("Number of clusters that overlap exactly between both datasets: ", length(clu_overlap_exact$cluster_id.own %>% unique())))
## [1] "Number of clusters that overlap exactly between both datasets: 13433"
# Construct contingency table of differential splicing
cont_table <- clu_overlap_exact %>%
dplyr::mutate(ds.own = fct_relevel(case_when(cluster_id.own %in% c(clu_significance$own %>%
dplyr::filter(p.adjust < 0.05) %>%
.[["cluster_id"]] %>%
unique()) ~ TRUE,
TRUE~ FALSE) %>% as.factor(),
levels = c("TRUE", "FALSE")),
ds.SRP058181 = fct_relevel(case_when(cluster_id.SRP058181 %in% c(clu_significance$SRP058181 %>%
dplyr::filter(p.adjust < 0.05) %>%
.[["cluster_id"]] %>%
unique()) ~ TRUE,
TRUE~ FALSE) %>% as.factor(),
levels = c("TRUE", "FALSE"))) %>%
with(., table(ds.SRP058181, ds.own, dnn = c("Differentially spliced in SRP058181?", "Differentially spliced in own data?")))
cont_table
## Differentially spliced in own data?
## Differentially spliced in SRP058181? TRUE FALSE
## TRUE 64 696
## FALSE 772 11901
# Fisher's exact test
cont_table %>%
fisher.test()
##
## Fisher's Exact Test for Count Data
##
## data: .
## p-value = 0.01312
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.068660 1.853993
## sample estimates:
## odds ratio
## 1.417495
# Construct contingency table of differential splicing
cont_table <- clu_overlap_exact %>%
dplyr::mutate(ds.own = fct_relevel(case_when(cluster_id.own %in% c(clu_significance$own %>%
dplyr::filter(p.adjust < 0.05) %>%
.[["cluster_id"]] %>%
unique()) ~ TRUE,
TRUE~ FALSE) %>% as.factor(),
levels = c("TRUE", "FALSE")),
ds.SRP058181 = fct_relevel(case_when(cluster_id.SRP058181 %in% c(clu_significance$SRP058181 %>%
dplyr::filter(p < 0.05) %>%
.[["cluster_id"]] %>%
unique()) ~ TRUE,
TRUE~ FALSE) %>% as.factor(),
levels = c("TRUE", "FALSE"))) %>%
with(., table(ds.SRP058181, ds.own, dnn = c("Differentially spliced in SRP058181?", "Differentially spliced in own data?")))
cont_table
## Differentially spliced in own data?
## Differentially spliced in SRP058181? TRUE FALSE
## TRUE 304 4020
## FALSE 532 8577
# Fisher's exact test
cont_table %>%
fisher.test()
##
## Fisher's Exact Test for Count Data
##
## data: .
## p-value = 0.008332
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.050127 1.413516
## sample estimates:
## odds ratio
## 1.219146
# Find overlapping junctions/clusters
overlapped_junctions <-
GenomicRanges::findOverlaps(query = clu$SRP058181,
subject = ds_clu$own,
ignore.strand = F,
type = "equal")
# Filter by cluster names from overlap
clu_overlap_ds <-
ds_clu$own[subjectHits(overlapped_junctions), "cluster_id"] %>%
as_tibble() %>%
dplyr::inner_join(clu$SRP058181[queryHits(overlapped_junctions), "cluster_id"] %>%
as_tibble(),
by = c("seqnames", "start", "end", "strand"),
suffix = c(".own", ".SRP058181"))
# Filter generated clu_prop_overlap for cluster ids from clu_overlap_ds
# clu_prop_overlap already filtered for ambiguous clusters
# Generate geom_density
clu_prop_overlap %>%
dplyr::filter(cluster_id.own %in% clu_overlap_ds$cluster_id.own) %>%
tidyr::gather(key = "overlap", value = "prop_overlap", contains("prop_overlap")) %>%
dplyr::mutate(overlap = case_when(overlap == "prop_overlap.own" ~ "ds own --> all SRP058181",
overlap == "prop_overlap.SRP058181" ~ "all SRP058181 --> ds own")) %>%
ggplot(aes(x = prop_overlap, fill = overlap)) +
geom_histogram(binwidth = 5, alpha = 0.5, colour = "black") +
facet_wrap(~overlap) +
labs(x = "Proportion overlap of cluster from discovery dataset into validation dataset") +
theme_rhr +
theme(legend.position = "none")
Figure 7.1: Histogram of the proportion overlap between differentially spliced clusters derived from own data that are also found in SRP058181.
overlapping_clusters
) with clusters from own data that:
cluster_significance
dataframe for those clusters from overlapping_clusters
. Then, for each pairwise comparison, perform FDR-correction using the smaller set of overlapping clusters from SRP058181.intron_usage
) with cluster-level data (cluster_significance
) and filter for only those cluster ids in overlapping_clusters
. Once this is done, merge output from each dataset (ensuring to filter SRP058181 for only those clusters that are now significant following FDR-correction).same_direction_of_effect
. This column evaluates whether junctions that were found differentially spliced in the same comparison across one or both datasets share the same direction of effect.# Filter for overlapping clusters with 100% overlap from own data into SRP058181
# AND that are differentially spliced in own data
overlapping_clusters <-
clu_prop_overlap %>%
dplyr::filter(prop_overlap.own != Inf,
prop_overlap.own >= 100,
cluster_id.own %in% clu_overlap_ds$cluster_id.own
)
print(str_c("Number of overlapping clusters differentially spliced in SRP058181, prior to FDR-correction within smaller set: ",
clu_significance$SRP058181 %>%
dplyr::filter(cluster_id %in% overlapping_clusters$cluster_id.SRP058181,
status == "Success",
p.adjust < 0.05) %>%
dplyr::distinct(cluster_id) %>%
nrow()))
## [1] "Number of overlapping clusters differentially spliced in SRP058181, prior to FDR-correction within smaller set: 88"
# Correct each comparison using fdr on smaller set
clu_significance_fdr <- clu_significance
clu_significance_fdr$SRP058181 <-
clu_significance$SRP058181 %>%
dplyr::filter(cluster_id %in% overlapping_clusters$cluster_id.SRP058181, status == "Success") %>%
dplyr::group_by(comparison) %>%
dplyr::mutate(p.adjust = p.adjust(p, method = "fdr"))
print(str_c("Number of overlapping clusters differentially spliced in SRP058181, following FDR-correction within smaller set: ",
clu_significance_fdr$SRP058181 %>%
dplyr::filter(cluster_id %in% overlapping_clusters$cluster_id.SRP058181,
status == "Success",
p.adjust < 0.05) %>%
dplyr::distinct(cluster_id) %>%
nrow()))
## [1] "Number of overlapping clusters differentially spliced in SRP058181, following FDR-correction within smaller set: 116"
# Create list with dataframes from each dataset containing merged intron usage and cluster significance only for overlapping_clusters
clu_val <- vector(mode = "list", length = 2)
for(i in 1:length(clu_val)){
# Get name of each dataset
name <- names(intron_usage)[i]
# Column name for cluster ids in each dataset
col_name <- str_c("cluster_id.", name)
# Join intron usage (i.e. junction-level data) with cluster significance (cluster-level significance)
# And filter for only those clusters found within overlapping_clusters
clu_val[[i]] <- intron_usage[[name]] %>%
# Filter for appropriate comparisons
dplyr::filter(comparison %in% c("Control_vs_PD", "Control_vs_PDD", "PD_vs_PDD")) %>%
dplyr::inner_join(clu_significance_fdr[[name]]) %>%
dplyr::select(chr, start, end, comparison, cluster_id, loglr, p.adjust, df, Control, PD, PDD, deltapsi, genes) %>%
# Add direction of effect
dplyr::mutate(direction_of_effect = case_when(deltapsi > 0 ~ "upregulated",
deltapsi < 0 ~ "downregulated",
deltapsi == 0 ~ "no_change")) %>%
dplyr::filter(cluster_id %in% c(overlapping_clusters %>%
.[[col_name]]))
names(clu_val)[i] <- name
}
# Join lists together
# Add additional column evaluating for those junctions shared between comparisons whether they share direction of effect
# Remove any junctions that are not shared between comparisons
ds_val <- clu_val$own %>%
full_join(clu_val$SRP058181 %>%
dplyr::filter(p.adjust < 0.05),
by = c("comparison", "chr", "start", "end", "genes"),
suffix = c(".own", ".SRP058181")) %>%
dplyr::mutate(same_direction_of_effect = case_when(direction_of_effect.own == direction_of_effect.SRP058181 ~ TRUE,
direction_of_effect.own != direction_of_effect.SRP058181 ~ FALSE,
TRUE ~ NA))
cont_table <-
ds_val %>%
dplyr::mutate(same_direction_of_effect = fct_relevel(as.factor(same_direction_of_effect),
levels = c("TRUE", "FALSE")),
ds_both = fct_relevel(case_when(p.adjust.own < 0.05 & p.adjust.SRP058181 < 0.05~ TRUE,
TRUE~ FALSE) %>% as.factor(),
levels = c("TRUE", "FALSE"))) %>%
dplyr::filter(!is.na(same_direction_of_effect)) %>%
with(., table(same_direction_of_effect, ds_both,
dnn = c("Same direction of effect?", "Differentially spliced (same comparison) in both datasets?")))
cont_table
## Differentially spliced (same comparison) in both datasets?
## Same direction of effect? TRUE FALSE
## TRUE 93 168
## FALSE 120 187
cont_table %>%
fisher.test()
##
## Fisher's Exact Test for Count Data
##
## data: .
## p-value = 0.434
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.603794 1.231333
## sample estimates:
## odds ratio
## 0.8628749
a <- ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
dplyr::select(chr, start, end, contains("comparison"), contains("deltapsi")) %>%
tidyr::gather(key = dataset, value = dPSI, deltapsi.own:deltapsi.SRP058181) %>%
dplyr::mutate(dataset = str_replace(dataset, ".*\\.", "")) %>%
ggplot(aes(x = abs(dPSI), fill = dataset)) +
geom_density(alpha = 0.5) +
coord_cartesian(ylim = c(0,40)) +
labs(x = expression("Absolute "*Delta*"PSI")) +
theme_rhr
b <- ds_val %>%
dplyr::filter(cluster_id.own %in% c(ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
.[["cluster_id.own"]])) %>%
dplyr::select(chr, start, end, contains("comparison"), contains("deltapsi")) %>%
tidyr::gather(key = dataset, value = dPSI, deltapsi.own:deltapsi.SRP058181) %>%
dplyr::mutate(dataset = str_replace(dataset, ".*\\.", "")) %>%
ggplot(aes(x = abs(dPSI), fill = dataset)) +
geom_density(alpha = 0.5) +
coord_cartesian(ylim = c(0,40)) +
labs(x = expression("Absolute "*Delta*"PSI")) +
theme_rhr
ggarrange(a, b,
labels = c("a", "b"),
common.legend = TRUE,
legend = "bottom")
Figure 7.2: Density plot of absolute delta PSI values for (a) those junctions that (i) overlap between our own data and SRP0518181, (ii) are within clusters that are differentially spliced in one or both datasets across the same comparisons, and (iii) share the same direction of effect and (b) all junctions found within 'validated' clusters that contain junctions shared as described in (a).
a <- ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
ggplot(aes(x = deltapsi.own, y = deltapsi.SRP058181)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(x = expression(Delta*"PSI in own data"), y = expression(Delta*"PSI in SRP058181")) +
facet_zoom(xlim = c(-0.05, 0.05), ylim = c(-0.025, 0.025), zoom.size = 1) +
theme_rhr
b <- ds_val %>%
dplyr::filter(cluster_id.own %in% c(ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
.[["cluster_id.own"]])) %>%
ggplot(aes(x = deltapsi.own, y = deltapsi.SRP058181)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(x = expression(Delta*"PSI in own data"), y = expression(Delta*"PSI in SRP058181")) +
facet_zoom(xlim = c(-0.05, 0.05), ylim = c(-0.025, 0.025), zoom.size = 1) +
theme_rhr
ggarrange(a, b,
ncol = 1,
labels = c("a", "b"))
Figure 7.3: Scatterplot of absolute delta PSI values for those junctions that (a) overlap between our own data and SRP0518181, are within clusters that are differentially spliced in one or both datasets across the same comparisons and share the same direction of effect and (b) all junctions found within 'validated' clusters that contain junctions shared as described in (a).
# Correlation of junctions with same direction of effect within ds clusters
ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
cor.test(~ deltapsi.own + deltapsi.SRP058181, data = ., method = "spearman")
##
## Spearman's rank correlation rho
##
## data: deltapsi.own and deltapsi.SRP058181
## S = 385454, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.8699206
# Correlation of all junctions within ds clusters that contain some junctions with same direction of effect
ds_val %>%
dplyr::filter(cluster_id.own %in% c(ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
.[["cluster_id.own"]])) %>%
cor.test(~ deltapsi.own + deltapsi.SRP058181, data = ., method = "spearman")
##
## Spearman's rank correlation rho
##
## data: deltapsi.own and deltapsi.SRP058181
## S = 20826034, p-value = 0.1909
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.0580042
ds_val %>%
dplyr::filter(cluster_id.own %in% c(ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
.[["cluster_id.own"]])) %>%
dplyr::group_by(cluster_id.own) %>%
dplyr::summarise(n = n(),
same_direction_of_effect = sum(same_direction_of_effect, na.rm = TRUE),
prop_own = same_direction_of_effect/n) %>%
ggplot(aes(x = prop_own)) +
geom_histogram(binwidth = 0.01) +
labs(x = "Proportion of junctions within a shared cluster that share the same direction of effect") +
coord_cartesian(xlim = c(0,1.05)) +
theme_rhr
Figure 7.4: Histogram of shared clusters and the proportion of junctions within each cluster that share the same direction of effect.
Gene lists derived by using genes overlapping "validated" clusters that are differentially spliced in both datasets that contain junctions that share the same direction of effect.
# Extract genes
genes_list <- setNames(ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE,
p.adjust.own < 0.05,
p.adjust.SRP058181 < 0.05
) %>%
group_split(comparison),
ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE,
p.adjust.own < 0.05,
p.adjust.SRP058181 < 0.05
) %>%
.[["comparison"]] %>%
unique() %>%
sort()) %>%
# For each dataframe, extract the gene column and remove duplicate genes
lapply(., function(x){
x %>%
.[["genes"]] %>%
unique()
})
lapply(genes_list, length)
## $Control_vs_PD
## [1] 3
##
## $Control_vs_PDD
## [1] 15
##
## $PD_vs_PDD
## [1] 10
# Background list
all_genes <- leafcutter_list$own$cluster_significance %>%
# remove clusters that overlap multiple genes
dplyr::filter(!str_detect(genes, ",")) %>%
.[["genes"]] %>%
unique()
UpSetR::upset(data = fromList(genes_list), sets = names(genes_list), keep.order = TRUE, nintersects = 25, order.by = "freq")
Figure 7.5: Overlap between genes lists derived from clusters found differentially spliced in the same comparison and with the same direction of effect in our own data & SRP058181. In the matrix (lower half of panel), rows represent the pairwise comparisons and the columns represent their intersections, with a single black filled circle representing those genes that were not found to be part of an intersection, while black filled circles connected by a vertical line represent genes that intersect across panels. The size of each intersection is shown as a bar chart above the matrix (upper half of panel), while the size of each gene set is shown to the left of the matrix.
genes_list
## $Control_vs_PD
## [1] "ARHGAP9" "KIAA1755" "TPMT"
##
## $Control_vs_PDD
## [1] "NAP1L4" "RAPGEF3" "PRKAG1" "RBM25" "POMT2" "RASGRF1" "RHOT1"
## [8] "AP3D1" "ARHGEF1" "RPS9" "ADORA1" "BCAS1" "LPIN1" "ACTR8"
## [15] "RUFY3"
##
## $PD_vs_PDD
## [1] "MVD" "DLGAP1-AS4" "AMPD2" "SCMH1" "CTSZ"
## [6] "CCNL1" "ARAP2" "CAST" "ABHD11" "RGS20"
gprofiler <- lapply(genes_list, function(x){
x %>%
gost(., organism = "hsapiens",
correction_method= "fdr", significant = TRUE,
user_threshold = 0.05,
custom_bg= all_genes,
sources = c("GO", "REAC", "KEGG", "OMIM"),
evcodes = TRUE)})
gprofiler %>%
lapply(., function(x){
x$result %>%
as_tibble()
}) %>%
qdapTools::list_df2df(., col = "comparison") %>%
dplyr::select(comparison, term_name, source, everything(), -query, -significant) %>%
dplyr::filter(term_size > 20 & term_size <= 2000) %>%
ggplot(aes(x = comparison,
y = term_name,
size = precision,
fill = p_value)) +
geom_point(pch = 21) +
scale_fill_viridis_c(direction = -1) +
labs(x = "Comparison", y = "Dataset: Cell type") +
theme_rhr
Figure 7.6: Gene ontology enrichments (GO, KEGG, REACTOME) of genes across each comparison. Size of dot indicates precision, which is the proportion of genes in the input list that are annotated to the function (defined as intersection_size/query_size). Fill of dot indicates FDR-adjusted p-value.
# Load internal specificity matrices
ctd_files <-
list.files(
file.path(
path_to_results,
"snRNA/specificity_matrices/2020_March/"
),
pattern = "ctd",
full.names = T)
ctd_list <- vector(mode = "list", length = length(ctd_files))
for(i in 1:length(ctd_files)){
# Load ctd
ctd_list[[i]] <- readRDS(ctd_files[i])
# Extract file name
title <-
ctd_files[i] %>%
str_replace(".*/", "") %>%
str_replace("\\..*", "") %>%
str_replace(".*_", "")
# Name list
names(ctd_list)[i] <- title
}
# Run EWCE
# Cannot run with Control_vs_PD, as list is too small for EWCE
ewce <- MarkerGenes::run_ewce_controlled(list_of_genes = genes_list[2:3],
ctd_list$Control, ctd_list$PD,
ctd_list$PDD, ctd_list$DLB,
celltypeLevel = 1,
reps = 100000,
genelistSpecies = "human",
sctSpecies = "human",
mouse_to_human = NULL)
saveRDS(
ewce,
file =
file.path(
path_to_results,
"leafcutter/ewce/ewce_leafcutter_ds_validated_celltype.Rds"
)
)
ewce <-
readRDS(
file.path(
path_to_results,
"leafcutter/ewce/ewce_leafcutter_ds_validated_celltype.Rds"
)
)
plot <- ewce %>%
dplyr::mutate(FDR.p = p.adjust(p, method="BH"),
Class = case_when(
CellType %in% c("Astrocyte", "ASC", "Astro") ~ "Astrocyte",
CellType %in% c("MG", "Microglia") ~ "Microglia",
CellType %in% c("END", "Endo", "Endothelial cell", "Vascular") ~ "Vascular",
CellType %in% c("ODC", "Oligo", "Oligodendrocyte") ~ "Oligodendrocyte",
CellType %in% c("exCA", "Excitatory", "exDG", "exPFC", "Glutamatergic") ~ "Excitatory \n neuron",
CellType %in% c("GABA", "GABAergic", "Inhibitory") ~ "Inhibitory \n neuron",
CellType == "OPC" ~ "OPC",
CellType == "NSC" ~ "NSC"),
Study = str_replace(Study, "list\\$", ""),
Study = fct_relevel(Study,
c("Control", "PD", "PDD", "DLB")),
joint_name = str_c(Study, CellType, sep = ":"),
sd_from_mean = ifelse(p < 0.05, sd_from_mean, NA)) %>%
dplyr::arrange(Class, Study) %>%
dplyr::mutate_at(vars(joint_name), list(~ factor(., levels = unique(.)))) %>%
ggplot(aes(x = GeneSet, y = forcats::fct_rev(joint_name))) +
geom_tile(aes(fill = sd_from_mean), colour = "black") +
facet_grid(rows = vars(Class), scales = "free", space = "free_y") +
scale_fill_viridis_c(na.value = "grey") +
labs(x = "", y = "Dataset: Cell type") +
theme_rhr +
theme(panel.grid = element_blank(),
strip.text.y = element_text(size = 8, angle = 0))
plot
Figure 7.7: EWCE results using only genes overlapping clusters validated in SRP058181 and single-cell RNA-seq data from our own internal snRNA. The x-axis denotes the disease groups compared in the differential splicing analysis, while the y-axis denotes the cell type and from which specificity matrix it is derived. Enrichments are grouped by overall cell-type classes. Standard deviations from the mean denotes the distance (in standard deviations) of the target list from the mean of the bootstrapped samples. No cell types passed multiple test correction performed across all results using FDR. Results with uncorrected p > 0.05 were coloured grey.
# Filter generated clu_prop_overlap for cluster ids from clu_overlap_ds
# clu_prop_overlap already filtered for ambiguous clusters
# Generate geom_density
clu_prop_overlap_exact %>%
dplyr::filter(cluster_id.own %in% clu_overlap_ds$cluster_id.own) %>%
tidyr::gather(key = "overlap", value = "prop_overlap", contains("prop_overlap")) %>%
dplyr::mutate(overlap = case_when(overlap == "prop_overlap.own" ~ "ds own --> all SRP058181",
overlap == "prop_overlap.SRP058181" ~ "all SRP058181 --> ds own")) %>%
ggplot(aes(x = prop_overlap, fill = overlap)) +
geom_histogram(binwidth = 5, alpha = 0.5, colour = "black") +
facet_wrap(~overlap) +
labs(x = "Proportion overlap of cluster from discovery dataset into validation dataset") +
theme_rhr +
theme(legend.position = "none")
Figure 7.8: Histogram of the proportion overlap between differentially spliced clusters derived from own data that exactly match clusters that are also found in SRP058181.
overlapping_clusters
) with clusters from own data that:
cluster_significance
dataframe for those clusters from overlapping_clusters
. Then, for each pairwise comparison, perform FDR-correction using the smaller set of overlapping clusters from SRP058181.intron_usage
) with cluster-level data (cluster_significance
) and filter for only those cluster ids in overlapping_clusters
. Once this is done, merge output from each dataset (ensuring to filter SRP058181 for only those clusters that are now significant following FDR-correction).same_direction_of_effect
. This column evaluates whether junctions that were found differentially spliced in the same comparison across one or both datasets share the same direction of effect.# Filter for overlapping clusters with 100% overlap from own data into SRP058181
# AND that are differentially spliced in own data
overlapping_clusters <-
clu_prop_overlap_exact %>%
dplyr::filter(cluster_id.own %in% clu_overlap_ds$cluster_id.own)
print(str_c("Number of overlapping clusters differentially spliced in SRP058181, prior to FDR-correction within smaller set: ",
clu_significance$SRP058181 %>%
dplyr::filter(cluster_id %in% overlapping_clusters$cluster_id.SRP058181,
status == "Success",
p.adjust < 0.05) %>%
dplyr::ungroup() %>%
dplyr::distinct(cluster_id) %>%
nrow()))
## [1] "Number of overlapping clusters differentially spliced in SRP058181, prior to FDR-correction within smaller set: 64"
# Correct each comparison using fdr on smaller set
clu_significance_fdr <- clu_significance
clu_significance_fdr$SRP058181 <-
clu_significance$SRP058181 %>%
dplyr::filter(cluster_id %in% overlapping_clusters$cluster_id.SRP058181, status == "Success") %>%
dplyr::group_by(comparison) %>%
dplyr::mutate(p.adjust = p.adjust(p, method = "fdr"))
print(str_c("Number of overlapping clusters differentially spliced in SRP058181, following FDR-correction within smaller set: ",
clu_significance_fdr$SRP058181 %>%
dplyr::filter(cluster_id %in% overlapping_clusters$cluster_id.SRP058181,
status == "Success",
p.adjust < 0.05) %>%
dplyr::ungroup() %>%
dplyr::distinct(cluster_id) %>%
nrow()))
## [1] "Number of overlapping clusters differentially spliced in SRP058181, following FDR-correction within smaller set: 69"
# Create list with dataframes from each dataset containing merged intron usage and cluster significance only for overlapping_clusters
clu_val <- vector(mode = "list", length = 2)
for(i in 1:length(clu_val)){
# Get name of each dataset
name <- names(intron_usage)[i]
# Column name for cluster ids in each dataset
col_name <- str_c("cluster_id.", name)
# Join intron usage (i.e. junction-level data) with cluster significance (cluster-level significance)
# And filter for only those clusters found within overlapping_clusters
clu_val[[i]] <- intron_usage[[name]] %>%
# Filter for appropriate comparisons
dplyr::filter(comparison %in% c("Control_vs_PD", "Control_vs_PDD", "PD_vs_PDD")) %>%
# dplyr::inner_join(clu_significance_fdr[[name]]) %>%
dplyr::inner_join(clu_significance_fdr[[name]]) %>%
dplyr::select(chr, start, end, comparison, cluster_id, loglr, p.adjust, df, Control, PD, PDD, deltapsi, genes) %>%
# Add direction of effect
dplyr::mutate(direction_of_effect = case_when(deltapsi > 0 ~ "upregulated",
deltapsi < 0 ~ "downregulated",
deltapsi == 0 ~ "no_change")) %>%
dplyr::filter(cluster_id %in% c(overlapping_clusters %>%
.[[col_name]]))
names(clu_val)[i] <- name
}
# Join lists together
# Add additional column evaluating for those junctions shared between comparisons whether they share direction of effect
# Remove any junctions that are not shared between comparisons
ds_val <- clu_val$own %>%
full_join(clu_val$SRP058181 %>%
dplyr::filter(p.adjust < 0.05),
by = c("comparison", "chr", "start", "end", "genes"),
suffix = c(".own", ".SRP058181")) %>%
dplyr::mutate(same_direction_of_effect = case_when(direction_of_effect.own == direction_of_effect.SRP058181 ~ TRUE,
direction_of_effect.own != direction_of_effect.SRP058181 ~ FALSE,
TRUE ~ NA))
cont_table <-
ds_val %>%
dplyr::mutate(same_direction_of_effect = fct_relevel(as.factor(same_direction_of_effect),
levels = c("TRUE", "FALSE")),
ds_both = fct_relevel(case_when(p.adjust.own < 0.05 & p.adjust.SRP058181 < 0.05~ TRUE,
TRUE~ FALSE) %>% as.factor(),
levels = c("TRUE", "FALSE"))) %>%
dplyr::filter(!is.na(same_direction_of_effect)) %>%
with(., table(same_direction_of_effect, ds_both,
dnn = c("Same direction of effect?", "Differentially spliced (same comparison) in both datasets?")))
cont_table
## Differentially spliced (same comparison) in both datasets?
## Same direction of effect? TRUE FALSE
## TRUE 49 114
## FALSE 67 111
cont_table %>%
fisher.test()
##
## Fisher's Exact Test for Count Data
##
## data: .
## p-value = 0.1697
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.4413735 1.1461547
## sample estimates:
## odds ratio
## 0.7128169
a <- ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
dplyr::select(chr, start, end, contains("comparison"), contains("deltapsi")) %>%
tidyr::gather(key = dataset, value = dPSI, deltapsi.own:deltapsi.SRP058181) %>%
dplyr::mutate(dataset = str_replace(dataset, ".*\\.", "")) %>%
ggplot(aes(x = abs(dPSI), fill = dataset)) +
geom_density(alpha = 0.5) +
coord_cartesian(ylim = c(0,40)) +
labs(x = expression("Absolute "*Delta*"PSI")) +
theme_rhr
b <- ds_val %>%
dplyr::filter(cluster_id.own %in% c(ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
.[["cluster_id.own"]])) %>%
dplyr::select(chr, start, end, contains("comparison"), contains("deltapsi")) %>%
tidyr::gather(key = dataset, value = dPSI, deltapsi.own:deltapsi.SRP058181) %>%
dplyr::mutate(dataset = str_replace(dataset, ".*\\.", "")) %>%
ggplot(aes(x = abs(dPSI), fill = dataset)) +
geom_density(alpha = 0.5) +
coord_cartesian(ylim = c(0,40)) +
labs(x = expression("Absolute "*Delta*"PSI")) +
theme_rhr
ggarrange(a, b,
labels = c("a", "b"),
common.legend = TRUE,
legend = "bottom")
Figure 7.9: Density plot of absolute delta PSI values for (a) those junctions that (i) overlap between our own data and SRP0518181, (ii) are within exactly-matching clusters that are differentially spliced in one or both datasets across the same comparisons, and (iii) share the same direction of effect and (b) all junctions found within 'validated' clusters that contain junctions shared as described in (a).
a <- ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
ggplot(aes(x = deltapsi.own, y = deltapsi.SRP058181)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(x = expression(Delta*"PSI in own data"), y = expression(Delta*"PSI in SRP058181")) +
facet_zoom(xlim = c(-0.05, 0.05), ylim = c(-0.025, 0.025), zoom.size = 1) +
theme_rhr
b <- ds_val %>%
dplyr::filter(cluster_id.own %in% c(ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
.[["cluster_id.own"]])) %>%
ggplot(aes(x = deltapsi.own, y = deltapsi.SRP058181)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(x = expression(Delta*"PSI in own data"), y = expression(Delta*"PSI in SRP058181")) +
facet_zoom(xlim = c(-0.05, 0.05), ylim = c(-0.025, 0.025), zoom.size = 1) +
theme_rhr
ggarrange(a, b,
ncol = 1,
labels = c("a", "b"))
Figure 7.10: Scatterplot of absolute delta PSI values for those junctions that (a) overlap between our own data and SRP0518181, are within exactly-matching clusters that are differentially spliced in one or both datasets across the same comparisons and share the same direction of effect and (b) all junctions found within 'validated' clusters that contain junctions shared as described in (a).
# Correlation of junctions with same direction of effect within ds clusters
ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
cor.test(~ deltapsi.own + deltapsi.SRP058181, data = ., method = "spearman")
##
## Spearman's rank correlation rho
##
## data: deltapsi.own and deltapsi.SRP058181
## S = 94434, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.8691622
# Correlation of all junctions within ds clusters that contain some junctions with same direction of effect
ds_val %>%
dplyr::filter(cluster_id.own %in% c(ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
.[["cluster_id.own"]])) %>%
cor.test(~ deltapsi.own + deltapsi.SRP058181, data = ., method = "spearman")
##
## Spearman's rank correlation rho
##
## data: deltapsi.own and deltapsi.SRP058181
## S = 3825230, p-value = 0.009352
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1499394
ds_val %>%
dplyr::filter(cluster_id.own %in% c(ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE) %>%
.[["cluster_id.own"]])) %>%
dplyr::group_by(cluster_id.own) %>%
dplyr::summarise(n = n(),
same_direction_of_effect = sum(same_direction_of_effect, na.rm = TRUE),
prop_own = same_direction_of_effect/n) %>%
ggplot(aes(x = prop_own)) +
geom_histogram(binwidth = 0.01) +
labs(x = "Proportion of junctions within a shared cluster that share the same direction of effect") +
coord_cartesian(xlim = c(0,1.05)) +
theme_rhr
Figure 7.11: Histogram of shared clusters and the proportion of junctions within each cluster that share the same direction of effect.
clu_significance_fdr$SRP058181 <- clu_significance_fdr$SRP058181 %>%
dplyr::bind_rows(clu_significance$SRP058181 %>%
dplyr::anti_join(clu_significance_fdr$SRP058181,
by = c("comparison", "cluster_id")))
# Construct contingency table of differential splicing
clu_overlap_exact_ds <- clu_overlap_exact %>%
dplyr::mutate(ds.own = fct_relevel(case_when(cluster_id.own %in% c(clu_significance_fdr$own %>%
dplyr::filter(p.adjust < 0.05) %>%
.[["cluster_id"]] %>%
unique()) ~ TRUE,
TRUE~ FALSE) %>% as.factor(),
levels = c("TRUE", "FALSE")),
ds.SRP058181 = fct_relevel(case_when(cluster_id.SRP058181 %in% c(clu_significance_fdr$SRP058181 %>%
dplyr::filter(p.adjust < 0.05) %>%
.[["cluster_id"]] %>%
unique()) ~ TRUE,
TRUE~ FALSE) %>% as.factor(),
levels = c("TRUE", "FALSE")))
cont_table <- clu_overlap_exact_ds %>%
with(., table(ds.SRP058181, ds.own, dnn = c("Differentially spliced in SRP058181?", "Differentially spliced in own data?")))
cont_table
## Differentially spliced in own data?
## Differentially spliced in SRP058181? TRUE FALSE
## TRUE 69 696
## FALSE 767 11901
# saveRDS(clu_overlap_exact_ds, file.path(path_to_results, "leafcutter/diff_splicing_PCaxes/cluster_overlap_exact_ds_with_SRP058181.Rds"))
# Fisher's exact test
cont_table %>%
fisher.test()
##
## Fisher's Exact Test for Count Data
##
## data: .
## p-value = 0.001956
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.170652 1.995428
## sample estimates:
## odds ratio
## 1.538183
Gene lists derived by using genes overlapping exactly-matching "validated" clusters that are differentially spliced in both datasets that contain junctions that share the same direction of effect.
# Extract genes
genes_list <- setNames(ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE,
p.adjust.own < 0.05,
p.adjust.SRP058181 < 0.05
) %>%
group_split(comparison),
ds_val %>%
dplyr::filter(same_direction_of_effect == TRUE,
p.adjust.own < 0.05,
p.adjust.SRP058181 < 0.05
) %>%
.[["comparison"]] %>%
unique() %>%
sort()) %>%
# For each dataframe, extract the gene column and remove duplicate genes
lapply(., function(x){
x %>%
.[["genes"]] %>%
unique()
})
lapply(genes_list, length)
## $Control_vs_PD
## [1] 2
##
## $Control_vs_PDD
## [1] 8
##
## $PD_vs_PDD
## [1] 5
# Background list
all_genes <- leafcutter_list$own$cluster_significance %>%
# remove clusters that overlap multiple genes
dplyr::filter(!str_detect(genes, ",")) %>%
.[["genes"]] %>%
unique()
UpSetR::upset(data = fromList(genes_list), sets = names(genes_list), keep.order = TRUE, nintersects = 25, order.by = "freq")
Figure 7.12: Overlap between genes lists derived from clusters found differentially spliced in the same comparison and with the same direction of effect in our own data & SRP058181. In the matrix (lower half of panel), rows represent the pairwise comparisons and the columns represent their intersections, with a single black filled circle representing those genes that were not found to be part of an intersection, while black filled circles connected by a vertical line represent genes that intersect across panels. The size of each intersection is shown as a bar chart above the matrix (upper half of panel), while the size of each gene set is shown to the left of the matrix.
genes_list
## $Control_vs_PD
## [1] "ARHGAP9" "TPMT"
##
## $Control_vs_PDD
## [1] "PRKAG1" "RBM25" "POMT2" "RASGRF1" "ARHGEF1" "BCAS1" "LPIN1"
## [8] "ACTR8"
##
## $PD_vs_PDD
## [1] "DLGAP1-AS4" "SCMH1" "CCNL1" "ARAP2" "CAST"
gprofiler <- lapply(genes_list, function(x){
x %>%
gost(., organism = "hsapiens",
correction_method= "fdr", significant = TRUE,
user_threshold = 0.05,
custom_bg= all_genes,
sources = c("GO", "REAC", "KEGG", "OMIM"),
evcodes = TRUE)})
gprofiler %>%
lapply(., function(x){
x$result %>%
as_tibble()
}) %>%
qdapTools::list_df2df(., col = "comparison") %>%
dplyr::select(comparison, term_name, source, everything(), -query, -significant) %>%
dplyr::filter(term_size > 20 & term_size <= 2000) %>%
ggplot(aes(x = comparison,
y = term_name,
size = precision,
fill = p_value)) +
geom_point(pch = 21) +
scale_fill_viridis_c(direction = -1) +
labs(x = "Comparison", y = "Dataset: Cell type") +
theme_rhr
Figure 7.13: Gene ontology enrichments (GO, KEGG, REACTOME) of genes across each comparison. Size of dot indicates precision, which is the proportion of genes in the input list that are annotated to the function (defined as intersection_size/query_size). Fill of dot indicates FDR-adjusted p-value.
# Also, try running with clusterProfiler, which has a number of useful functionalities:
# `compareCluster`: permits comparison of GO enrichments across groups.
# Uses hypergeometric model to perform enrichment analysis within each group (i.e. does not account for semantic similarity in testing as GProfiler does, which could be a disadvantage)
# Allows semantic trimming of terms.
library(clusterProfiler)
ont <- c("BP", "CC", "MF")
cluster_compare_GO <- setNames(vector(mode = "list", length = length(ont)),
ont)
for(i in 1:length(ont)){
cluster_compare_GO[[i]] <-
genes_list %>%
qdapTools::list2df(col1 = "genes", col2 = "comparison") %>%
clusterProfiler::compareCluster(genes ~ comparison,
data=.,
keyType = "SYMBOL",
fun="enrichGO",
ont = ont[i],
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
OrgDb='org.Hs.eg.db')
}
# saveRDS(cluster_compare_GO, file.path(path_to_results, "leafcutter/gprofiler/clusterProfiler_leafcutter_overlap_exact_ds_with_SRP058181.Rds"))
cluster_compare_GO <-
readRDS(
file.path(path_to_results, "leafcutter/gprofiler/clusterProfiler_leafcutter_overlap_exact_ds_with_SRP058181.Rds")
)
cluster_compare_GO %>%
lapply(., function(df){
df %>%
clusterProfiler::simplify(., cutoff=0.4, by="p.adjust", select_fun=min) %>%
clusterProfiler::dotplot(., showCategory = NULL, font.size = 8) +
geom_point(shape = 1,colour = "black") +
scale_size_continuous(name = "precision") +
scale_colour_viridis_c(direction = -1) +
theme_rhr
})
## $BP
##
## $CC
##
## $MF
# Load internal specificity matrices
ctd_files <-
list.files(
file.path(
path_to_results,
"snRNA/specificity_matrices/2020_March/"
),
pattern = "ctd",
full.names = T)
ctd_list <- vector(mode = "list", length = length(ctd_files))
for(i in 1:length(ctd_files)){
# Load ctd
ctd_list[[i]] <- readRDS(ctd_files[i])
# Extract file name
title <-
ctd_files[i] %>%
str_replace(".*/", "") %>%
str_replace("\\..*", "") %>%
str_replace(".*_", "")
# Name list
names(ctd_list)[i] <- title
}
# Run EWCE
# Cannot run with Control_vs_PD, as list is too small for EWCE
ewce <- MarkerGenes::run_ewce_controlled(list_of_genes = genes_list[2:3],
ctd_list$Control, ctd_list$PD,
ctd_list$PDD, ctd_list$DLB,
celltypeLevel = 1,
reps = 100000,
genelistSpecies = "human",
sctSpecies = "human",
mouse_to_human = NULL)
saveRDS(
ewce,
file =
file.path(
path_to_results,
"leafcutter/ewce/ewce_leafcutter_ds_exact_validated_celltype.Rds"
)
)
ewce <-
readRDS(
file.path(
path_to_results,
"leafcutter/ewce/ewce_leafcutter_ds_exact_validated_celltype.Rds"
)
)
plot <- ewce %>%
dplyr::filter(!Study %in% c("AIBS2018", "DRONC_human")) %>%
dplyr::mutate(FDR.p = p.adjust(p, method="BH"),
Class = case_when(
CellType %in% c("Astrocyte", "ASC", "Astro") ~ "Astrocyte",
CellType %in% c("MG", "Microglia") ~ "Microglia",
CellType %in% c("END", "Endo", "Endothelial cell", "Vascular") ~ "Vascular",
CellType %in% c("ODC", "Oligo", "Oligodendrocyte") ~ "Oligodendrocyte",
CellType %in% c("exCA", "Excitatory", "exDG", "exPFC", "Glutamatergic") ~ "Excitatory \n neuron",
CellType %in% c("GABA", "GABAergic", "Inhibitory") ~ "Inhibitory \n neuron",
CellType == "OPC" ~ "OPC",
CellType == "NSC" ~ "NSC"),
Study = str_replace(Study, "list\\$", ""),
Study = fct_relevel(Study,
c("Control", "PD", "PDD", "DLB")),
joint_name = str_c(Study, CellType, sep = ":"),
sd_from_mean = ifelse(p <= 0.05, sd_from_mean, NA)) %>%
dplyr::arrange(Class, Study) %>%
dplyr::mutate_at(vars(joint_name), list(~ factor(., levels = unique(.)))) %>%
ggplot(aes(x = GeneSet, y = forcats::fct_rev(joint_name))) +
geom_tile(aes(fill = sd_from_mean), colour = "black") +
facet_grid(rows = vars(Class), scales = "free", space = "free_y") +
scale_fill_viridis_c(na.value = "grey") +
labs(x = "", y = "Dataset: Cell type") +
theme_rhr +
theme(panel.grid = element_blank(),
strip.text.y = element_text(size = 8, angle = 0))
plot
Figure 7.14: EWCE results using only genes overlapping exactly-matching clusters validated in SRP058181 and single-cell RNA-seq data from our own internal snRNA. The x-axis denotes the disease groups compared in the differential splicing analysis, while the y-axis denotes the cell type and from which specificity matrix it is derived. Enrichments are grouped by overall cell-type classes. Standard deviations from the mean denotes the distance (in standard deviations) of the target list from the mean of the bootstrapped samples. Non-significant results (p > 0.05) were coloured grey.
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] MarkerGenes_0.0.0.9000 EWCE_0.99.2 dasper_0.0.0.9000
## [4] testthat_2.3.2 UpSetR_1.4.0 rtracklayer_1.46.0
## [7] GenomicRanges_1.38.0 GenomeInfoDb_1.22.1 IRanges_2.20.2
## [10] S4Vectors_0.24.4 BiocGenerics_0.32.0 readxl_1.3.1
## [13] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.2
## [16] purrr_0.3.4 readr_1.4.0 tidyr_1.1.1
## [19] tibble_3.0.3 tidyverse_1.3.0 ggsci_2.9
## [22] ggpubr_0.4.0 ggforce_0.3.2 ggplot2_3.3.2
## [25] gprofiler2_0.1.9 doParallel_1.0.15 iterators_1.0.12
## [28] foreach_1.5.0 devtools_2.3.2 usethis_1.6.3
## [31] data.table_1.13.0 clusterProfiler_3.14.3
##
## loaded via a namespace (and not attached):
## [1] utf8_1.1.4 tidyselect_1.1.0
## [3] RSQLite_2.2.0 AnnotationDbi_1.48.0
## [5] htmlwidgets_1.5.3 grid_3.6.1
## [7] BiocParallel_1.20.1 munsell_0.5.0
## [9] codetools_0.2-16 chron_2.3-56
## [11] DT_0.15 withr_2.2.0
## [13] colorspace_2.0-0 GOSemSim_2.17.1
## [15] Biobase_2.46.0 highr_0.8
## [17] knitr_1.29 rstudioapi_0.13
## [19] ggsignif_0.6.0 DOSE_3.12.0
## [21] labeling_0.4.2 urltools_1.7.3
## [23] GenomeInfoDbData_1.2.2 polyclip_1.10-0
## [25] bit64_4.0.2 farver_2.0.3
## [27] rprojroot_2.0.2 vctrs_0.3.2
## [29] generics_0.0.2 xfun_0.16
## [31] BiocFileCache_1.10.2 R6_2.5.0
## [33] graphlayouts_0.7.0 bitops_1.0-6
## [35] cachem_1.0.3 fgsea_1.12.0
## [37] gridGraphics_0.5-0 DelayedArray_0.12.3
## [39] assertthat_0.2.1 scales_1.1.1
## [41] ggraph_2.0.3 enrichplot_1.6.1
## [43] gtable_0.3.0 processx_3.4.5
## [45] tidygraph_1.2.0 rlang_0.4.7
## [47] splines_3.6.1 rstatix_0.6.0
## [49] lazyeval_0.2.2 broom_0.7.0
## [51] europepmc_0.4 BiocManager_1.30.10
## [53] yaml_2.2.1 reshape2_1.4.4
## [55] abind_1.4-5 modelr_0.1.8
## [57] crosstalk_1.1.0.1 backports_1.1.8
## [59] qvalue_2.18.0 tools_3.6.1
## [61] bookdown_0.21 qdapTools_1.3.5
## [63] ggplotify_0.0.5 ellipsis_0.3.1
## [65] RColorBrewer_1.1-2 ggdendro_0.1.21
## [67] sessioninfo_1.1.1 ggridges_0.5.2
## [69] Rcpp_1.0.5 plyr_1.8.6
## [71] progress_1.2.2 zlibbioc_1.32.0
## [73] RCurl_1.98-1.2 ps_1.3.4
## [75] prettyunits_1.1.1 openssl_1.4.2
## [77] viridis_0.5.1 cowplot_1.0.0
## [79] SummarizedExperiment_1.16.1 haven_2.3.1
## [81] ggrepel_0.8.2 here_1.0.0
## [83] fs_1.5.0 magrittr_2.0.1
## [85] DO.db_2.9 openxlsx_4.2.3
## [87] triebeard_0.3.0 reprex_1.0.0
## [89] matrixStats_0.56.0 pkgload_1.1.0
## [91] hms_1.0.0 evaluate_0.14
## [93] XML_3.99-0.3 rio_0.5.16
## [95] gridExtra_2.3 biomaRt_2.42.1
## [97] compiler_3.6.1 crayon_1.4.1
## [99] htmltools_0.5.1.1 mgcv_1.8-29
## [101] lubridate_1.7.9 DBI_1.1.1
## [103] tweenr_1.0.1 dbplyr_1.4.4
## [105] rappdirs_0.3.1 MASS_7.3-51.4
## [107] refGenome_1.7.7 Matrix_1.2-17
## [109] car_3.0-9 cli_2.2.0.9000
## [111] igraph_1.2.5 doBy_4.6.7
## [113] pkgconfig_2.0.3 rvcheck_0.1.8
## [115] GenomicAlignments_1.22.1 foreign_0.8-72
## [117] plotly_4.9.2.1 xml2_1.3.2
## [119] XVector_0.26.0 rvest_0.3.6
## [121] callr_3.5.1 digest_0.6.27
## [123] Biostrings_2.54.0 HGNChelper_0.8.1
## [125] rmarkdown_2.5 cellranger_1.1.0
## [127] fastmatch_1.1-0 Deriv_4.0
## [129] curl_4.3 Rsamtools_2.2.3
## [131] nlme_3.1-141 lifecycle_0.2.0
## [133] jsonlite_1.7.1 carData_3.0-4
## [135] fansi_0.4.2 limma_3.42.2
## [137] askpass_1.1 desc_1.2.0
## [139] viridisLite_0.3.0 pillar_1.4.6
## [141] lattice_0.20-38 fastmap_1.0.1
## [143] httr_1.4.2 pkgbuild_1.1.0
## [145] GO.db_3.10.0 glue_1.4.2
## [147] remotes_2.2.0 zip_2.1.0
## [149] bit_4.0.4 stringi_1.5.3
## [151] blob_1.2.1 memoise_2.0.0