Aim: use stratified LDSC to test whether heritability of various diseases (including AD and PD) enriches within genes found differentially expressed within a cell type across disease groups.
source(here::here("R", "file_paths.R"))
source(here::here("R", "EWCE_related_functions.R"))
source(here::here("R", "upsetR_common.R"))
s-LDSC is a method that allows you to determine the relative contribution of an annotation to disease heritability. We use the co-efficient p-value as our read-out for significance, as it tells us whether our annotation is significantly contributing to disease heritability after we have accounted for underlying genetic architecture (as represented by a 53-annotation baseline model, which tags coding regions, enhancer regions, histones, promoters, etc.).
comparison[2]_vs_comparison[1]
, where comparison[1]
is the baseline/reference value and comparison[2]
is in reference to the baseline value. That is, directionality of effect is the same as how we (at UCL) have been performing our analyses, albeit with a different naming convention. To reflect our own naming convention, I swapped theirs.# Thresholds
fc_threshold <- log2(1.5)
fdr_threshold <- 0.05
# File paths
DEG_files <-
list.files(
file.path(
path_to_results,
"snRNA/differential_gene_expr/2021_Mar"),
pattern = "_Allfiles2",
full.names = T
)
# Load across files
DEG_list <- setNames(DEG_files %>%
lapply(., function(file){
file %>%
readRDS() %>%
lapply(., function(df){
df %>%
dplyr::rename(gene = primerid)
})
}),
DEG_files %>%
str_replace(".*/", "") %>%
str_replace("_Allfiles2.*", ""))
# Create dataframe
DEG_df <-
DEG_list %>%
lapply(., function(DEG){
DEG %>%
qdapTools::list_df2df(col1 = "cell_type")
}) %>%
qdapTools::list_df2df(col1 = "comparison") %>%
# Change naming convention to reflect our convention
tidyr::separate(comparison, into = c("comparison_2", "comparison_1"), sep = "_") %>%
dplyr::mutate(comparison_1 = str_replace(comparison_1, pattern = "C", "Control"),
comparison = str_c(comparison_1, ".vs.", comparison_2),
# Add direction of effect
direction_of_effect = ifelse(coef > 0, "up", "down"),
cell_type =
case_when(cell_type == "Astrocytes" ~ "Astro",
cell_type == "Oligodendrocytes" ~ "Oligo",
TRUE ~ cell_type)) %>%
dplyr::select(comparison, everything(), -comparison_1, -comparison_2) %>%
dplyr::filter(fdr < fdr_threshold, abs(coef) > fc_threshold) %>%
as_tibble()
# Plot number of differentially expressed genes
DEG_df %>%
dplyr::mutate(direction_of_effect = ifelse(coef > 0, "up", "down")) %>%
dplyr::filter(fdr < fdr_threshold, abs(coef) > fc_threshold) %>%
dplyr::group_by(comparison, direction_of_effect, cell_type) %>%
dplyr::summarise(n = n()) %>%
ggplot(aes(x = cell_type, y = n)) +
geom_col(position = position_dodge()) +
labs(x = "Cell type", y = "Number of DEG (FDR < 0.05 & logFC > log2(1.5))") +
facet_wrap(vars(comparison, direction_of_effect)) +
theme_rhr
# Remove cell type/comparisons with less than 20 DEG
gene_list <-
DEG_df %>%
dplyr::filter(fdr < fdr_threshold, abs(coef) > fc_threshold) %>%
dplyr::mutate(cell_type_comparison = str_c(cell_type, ":", comparison)) %>%
dplyr::group_by(cell_type_comparison) %>%
dplyr::filter(n() >= 20) %>%
dplyr::ungroup() %>%
group_split(cell_type, comparison)
names(gene_list) <- gene_list %>%
lapply(., function(df){
df %>% .[["cell_type_comparison"]] %>% unique()
})
upset(fromList(gene_list %>%
lapply(., function(df){
df %>% .[["gene"]]
})),
sets = names(gene_list),
keep.order = TRUE,
nintersects = 15,
order.by = "freq")
cd /home/rreynolds/projects/Aim2_PDsequencing_wd/LBD-seq-bulk-analyses/
nohup Rscript \
/home/rreynolds/projects/Aim2_PDsequencing_wd/LBD-seq-bulk-analyses/misc_scripts/LDSC_cell_type_DEG.R \
&>/home/rreynolds/projects/Aim2_PDsequencing_wd/LBD-seq-bulk-analyses/nohup_logs/LDSC_cell_type_DEG.log&
file_paths <- list.files(path =
file.path(
path_to_raw_data,
"ldsc_annotations/celltype.DEG/"
),
pattern = ".results",
recursive = T,
full.names = T)
results <- LDSCforRyten::Assimilate_H2_results(path_to_results = file_paths) %>%
LDSCforRyten::Calculate_enrichment_SE_and_logP(., one_sided = "+") %>%
tidyr::separate(annot_name, into = c("comparison", "cell_type", "direction_of_effect"),sep = ":") %>%
dplyr::mutate(comparison = str_replace_all(comparison, "\\.", "_") %>%
fct_relevel(.,
c("Control_vs_PD", "Control_vs_PDD", "Control_vs_DLB", "PD_vs_PDD", "PD_vs_DLB", "PDD_vs_DLB"))) %>%
dplyr::select(GWAS, comparison, cell_type, direction_of_effect, everything())
write_delim(results,
path = file.path(path_to_results, "ldsc/sldsc_celltype_DEG.txt"),
delim = "\t")
results <- read_delim(file = file.path(path_to_results, "ldsc/sldsc_celltype_DEG.txt"),
delim = "\t")
print("Results for annotations with direction of effect")
## [1] "Results for annotations with direction of effect"
results %>%
dplyr::filter(!is.na(direction_of_effect)) %>%
dplyr::group_by(GWAS, comparison) %>%
dplyr::mutate(Z_score_FDR = p.adjust(Z_score_P, method = "fdr")) %>%
dplyr::filter(Z_score_FDR < 0.05) %>%
dplyr::select(-contains("SE"), -contains("log")) %>%
DT::datatable(rownames = FALSE,
options = list(scrollX = TRUE),
class = 'white-space: nowrap')
print("Results for joint lists")
## [1] "Results for joint lists"
results %>%
dplyr::filter(is.na(direction_of_effect)) %>%
dplyr::group_by(GWAS, comparison) %>%
dplyr::mutate(Z_score_FDR = p.adjust(Z_score_P, method = "fdr")) %>%
dplyr::filter(Z_score_FDR < 0.05) %>%
dplyr::select(-contains("SE"), -contains("log")) %>%
DT::datatable(rownames = FALSE,
options = list(scrollX = TRUE),
class = 'white-space: nowrap')
results %>%
dplyr::filter(Prop._h2 + Prop._h2_std_error < 0, `Coefficient_z-score` > 0) %>%
.[["GWAS"]] %>%
unique()
## [1] "PD2019.base.dementia.hg38" "PD2019.surv.depr.hg38"
## [3] "PD2019.surv.dyskinesia.hg38" "PD2019.surv.motorflux.hg38"
print("Results for annotations with direction of effect")
## [1] "Results for annotations with direction of effect"
results %>%
dplyr::filter(!is.na(direction_of_effect),
!str_detect(GWAS, "base"),
!str_detect(GWAS, "surv")) %>%
dplyr::group_by(GWAS, comparison) %>%
dplyr::mutate(Z_score_FDR = p.adjust(Z_score_P, method = "fdr")) %>%
dplyr::filter(Z_score_FDR < 0.05) %>%
dplyr::select(-contains("SE"), -contains("log")) %>%
DT::datatable(rownames = FALSE,
options = list(scrollX = TRUE),
class = 'white-space: nowrap')
print("Results for joint lists")
## [1] "Results for joint lists"
results %>%
dplyr::filter(is.na(direction_of_effect),
!str_detect(GWAS, "base"),
!str_detect(GWAS, "surv")) %>%
dplyr::group_by(GWAS, comparison) %>%
dplyr::mutate(Z_score_FDR = p.adjust(Z_score_P, method = "fdr")) %>%
dplyr::filter(Z_score_FDR < 0.05) %>%
dplyr::select(-contains("SE"), -contains("log")) %>%
DT::datatable(rownames = FALSE,
options = list(scrollX = TRUE),
class = 'white-space: nowrap')
print("Results for joint lists")
## [1] "Results for joint lists"
results %>%
dplyr::filter(is.na(direction_of_effect),
!str_detect(GWAS, "base"),
!str_detect(GWAS, "surv")) %>%
dplyr::group_by(GWAS, comparison) %>%
dplyr::filter(Z_score_P < 0.05) %>%
dplyr::select(-contains("SE"), -contains("log")) %>%
DT::datatable(rownames = FALSE,
options = list(scrollX = TRUE),
class = 'white-space: nowrap')
session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.1 (2019-07-05)
## os Ubuntu 16.04.6 LTS
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_GB.UTF-8
## ctype en_GB.UTF-8
## tz Europe/London
## date 2021-05-24
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib
## abind 1.4-5 2016-07-21 [2]
## assertthat 0.2.1 2019-03-21 [2]
## backports 1.1.8 2020-06-17 [1]
## bitops 1.0-6 2013-08-17 [2]
## blob 1.2.1 2020-01-20 [1]
## bookdown 0.21 2020-10-13 [1]
## broom 0.7.0 2020-07-09 [1]
## cachem 1.0.3 2021-02-04 [1]
## callr 3.5.1 2020-10-13 [1]
## car 3.0-9 2020-08-11 [1]
## carData 3.0-4 2020-05-22 [1]
## caTools 1.18.0 2020-01-17 [1]
## cellranger 1.1.0 2016-07-27 [2]
## chron 2.3-56 2020-08-18 [1]
## cli 2.2.0.9000 2021-01-22 [1]
## colorspace 2.0-0 2020-11-11 [2]
## corrplot * 0.84 2017-10-16 [1]
## crayon 1.4.1 2021-02-08 [2]
## crosstalk 1.1.0.1 2020-03-13 [1]
## curl 4.3 2019-12-02 [2]
## data.table 1.13.0 2020-07-24 [1]
## DBI 1.1.1 2021-01-15 [2]
## dbplyr 1.4.4 2020-05-27 [1]
## desc 1.2.0 2018-05-01 [1]
## devtools * 2.3.2 2020-09-18 [1]
## digest 0.6.27 2020-10-24 [2]
## dplyr * 1.0.2 2020-08-18 [1]
## DT * 0.15 2020-08-05 [1]
## ellipsis 0.3.1 2020-05-15 [1]
## evaluate 0.14 2019-05-28 [2]
## farver 2.0.3 2020-01-16 [1]
## fastmap 1.0.1 2019-10-08 [1]
## forcats * 0.5.1 2021-01-27 [2]
## foreign 0.8-72 2019-08-02 [4]
## fs 1.5.0 2020-07-31 [1]
## gdata 2.18.0 2017-06-06 [1]
## GeneOverlap * 1.22.0 2019-10-29 [1]
## generics 0.0.2 2018-11-29 [1]
## ggdendro * 0.1.21 2020-08-11 [1]
## ggplot2 * 3.3.2 2020-06-19 [1]
## ggpubr * 0.4.0 2020-06-27 [1]
## ggsignif 0.6.0 2019-11-04 [1]
## glue 1.4.2 2020-08-27 [1]
## gplots 3.0.4 2020-07-05 [1]
## gridExtra 2.3 2017-09-09 [2]
## gtable 0.3.0 2019-03-25 [2]
## gtools 3.8.2 2020-03-31 [1]
## haven 2.3.1 2020-06-01 [1]
## here 1.0.0 2020-11-15 [1]
## highr 0.8 2019-03-20 [2]
## hms 1.0.0 2021-01-13 [2]
## htmltools 0.5.1.1 2021-01-22 [1]
## htmlwidgets 1.5.3 2020-12-10 [2]
## httr 1.4.2 2020-07-20 [1]
## jsonlite 1.7.1 2020-09-07 [1]
## KernSmooth 2.23-15 2015-06-29 [4]
## knitr 1.29 2020-06-23 [1]
## labeling 0.4.2 2020-10-20 [2]
## LDSCforRyten * 0.0.0.9000 2020-04-03 [1]
## lifecycle 0.2.0 2020-03-06 [1]
## lubridate 1.7.9 2020-06-08 [1]
## magrittr 2.0.1 2020-11-17 [2]
## MASS 7.3-51.4 2019-04-26 [4]
## memoise 2.0.0 2021-01-26 [2]
## modelr 0.1.8 2020-05-19 [1]
## munsell 0.5.0 2018-06-12 [2]
## openxlsx 4.2.3 2020-10-27 [1]
## pillar 1.4.6 2020-07-10 [1]
## pkgbuild 1.1.0 2020-07-13 [1]
## pkgconfig 2.0.3 2019-09-22 [2]
## pkgload 1.1.0 2020-05-29 [1]
## plyr 1.8.6 2020-03-03 [2]
## prettyunits 1.1.1 2020-01-24 [1]
## processx 3.4.5 2020-11-30 [1]
## ps 1.3.4 2020-08-11 [1]
## purrr * 0.3.4 2020-04-17 [1]
## qdapTools 1.3.5 2020-04-17 [1]
## R6 2.5.0 2020-10-28 [2]
## RColorBrewer 1.1-2 2014-12-07 [2]
## Rcpp 1.0.5 2020-07-06 [1]
## RCurl 1.98-1.2 2020-04-18 [1]
## readr * 1.4.0 2020-10-05 [2]
## readxl * 1.3.1 2019-03-13 [2]
## remotes 2.2.0 2020-07-21 [1]
## reprex 2.0.0 2021-04-02 [2]
## rio 0.5.16 2018-11-26 [1]
## rlang 0.4.7 2020-07-09 [1]
## rmarkdown 2.5 2020-10-21 [1]
## rprojroot 2.0.2 2020-11-15 [1]
## rstatix 0.6.0 2020-06-18 [1]
## rstudioapi 0.13 2020-11-12 [2]
## rvest 0.3.6 2020-07-25 [1]
## scales 1.1.1 2020-05-11 [1]
## sessioninfo 1.1.1 2018-11-05 [2]
## stringi 1.5.3 2020-09-09 [2]
## stringr * 1.4.0 2019-02-10 [2]
## testthat 2.3.2 2020-03-02 [1]
## tibble * 3.0.3 2020-07-10 [1]
## tidyr * 1.1.1 2020-07-31 [1]
## tidyselect 1.1.0 2020-05-11 [1]
## tidyverse * 1.3.0 2019-11-21 [1]
## UpSetR * 1.4.0 2019-05-22 [1]
## usethis * 1.6.3 2020-09-17 [1]
## vctrs 0.3.2 2020-07-15 [1]
## withr 2.2.0 2020-04-20 [1]
## xfun 0.16 2020-07-24 [1]
## xml2 1.3.2 2020-04-23 [1]
## yaml 2.2.1 2020-02-01 [1]
## zip 2.1.0 2020-08-10 [1]
## source
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Github (r-lib/cli@4b41c51)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Github (const-ae/ggsignif@5a05d88)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Github (RHReynolds/LDSCforRyten@e805257)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
##
## [1] /home/rreynolds/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library