Aim
- Check overlap of blacklisted ENCODE regions with PD-implicated genes.
File paths for workflow
source(here::here("R", "file_paths.R"))
path_to_folder <- file.path(path_to_raw_data, "misc")
PD gene lists
- For sporadic PD, 2 sets of genes available. Available from Nalls et al. 2019:
- Those implicated by proximity to a PD locus.
- Those identified using mendelian randomisation and 4 QTL datasets: a large meta-analysis of mRNA expression across brain tissues, mRNA expression in the substantia nigra, mRNA expression in blood, and methylation in blood. Used this list.
- Can also use mendelian genes:
- Those predicted mendelian genes as reported by Genomics England (i.e. green genes).
- Those implicated by family studies: table 1 from Blauwendraat et al. 2020.
PD_mend <-
read_delim(file.path(path_to_folder, "PD_risk_genes/20191128_GE_ParkinsonDiseaseComplexParkinsonism_greengenes.tsv"),
delim = "\t")
PD_fam <-
read_delim(file.path(path_to_folder, "PD_risk_genes/Blauwendraat2019_PDgenes.txt"),
delim = "\t")
PD_MR <-
read_excel(file.path(path_to_folder, "PD_risk_genes/TableS6_Complete_summary_statistics_for_QTL_Mendelian_randomization.xlsx")) %>%
dplyr::filter(`Pass Bonferroni` == "pass")
Check overlap with ENCODE blacklist regions
# Load granges with blacklist regions that overlap with ensembl v97 elements and extract gene IDs
blacklist_genes <-
readRDS(file.path(path_to_folder, "homo_sapiens_v97_BR.rds")) %>%
as_tibble() %>%
.[["gene_name"]] %>%
unique()
# Overlap with Mendelian genes
PD_mend$`Gene Symbol`[PD_mend$`Gene Symbol` %in% blacklist_genes] %>% unique() %>% sort()
## [1] "PPP2R2B"
PD_fam$Gene[PD_fam$Gene %in% blacklist_genes] %>% unique() %>% sort()
## character(0)
# Overlap with sporadic genes (as per mendelian randomisation)
PD_MR$Gene[PD_MR$Gene %in% blacklist_genes] %>% unique() %>% sort()
## [1] "AMPD3" "ELOVL7" "FCF1" "MCCC1" "PCBD2" "WDR5B"