library(cowplot) # For alignment of plots using axes
library(DT) # R interface to the JavaScript library DataTables
library(ggpubr) # For arranging plots in one panel
library(here) # For file path construction
library(readxl) # For loading excel workbooks
library(openxlsx) # For writing to excel workbooks
library(tidyverse) # For data manipulation
library(stringr) # For string manipulation
library(qdapTools) # For list manipulation
# To use MarkerGene package, clone from: https://github.com/RHReynolds/MarkerGenes
path_to_ewce_pkg <- "/home/rreynolds/packages/EWCE/" # necessary due to issues with R versioning, see https://github.com/NathanSkene/EWCE
path_to_markergenes_pkg <- "/home/rreynolds/projects/MarkerGenes/"
devtools::load_all(path_to_ewce_pkg) # Function for querying specificity matrices
devtools::load_all(path_to_markergenes_pkg) # Function for querying specificity matrices
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
path_for_figures <- here::here("manuscript", "figures/")
path_for_tables <- here::here("manuscript", "tables/")
source(here::here("R", "plotting_functions.R"))
# Set base_size for plots
theme_base_size <- 10
theme_base_size_small <- 8
# All coloc results
results <-
readRDS(here::here("results", "coloc_summary.Rds"))
# GWAS and eQTL points to plot
plot_data <-
setNames(
vector(mode = "list", length = 4),
c("psychencode", "eQTLGen", "gtex", "aibs")
)
plot_data[[1]] <-
readRDS(here::here("results", "psychencode", "RBD_psychencode_overlap.Rds")) %>%
qdapTools::list_df2df() %>%
dplyr::select(
SNP,
gene = gene_2,
pvalue_gwas = p.value_1,
pvalue_eqtl = p.value_2
) %>%
dplyr::inner_join(
results$robust %>%
dplyr::filter(dataset == "psychencode") %>%
dplyr::select(gene = gene_2, hgnc_symbol)
) %>%
tidyr::gather(
key = "Dataset",
value = "p.value",
-SNP, -gene, -hgnc_symbol
) %>%
tidyr::separate(col = "SNP", into = c("chr", "pos")) %>%
dplyr::mutate(
pos_mb = as.numeric(pos) / 1000000,
p.value = as.numeric(p.value),
log_pval = -log10(p.value),
hgnc_symbol = case_when(
gene == "ENSG00000247775" ~ "SNCA-AS1",
TRUE ~ hgnc_symbol
)
)
plot_data[[2]] <-
readRDS(here::here("results", "eQTLGen", "RBD_eQTLGen_overlap.Rds")) %>%
qdapTools::list_df2df() %>%
dplyr::select(
SNP,
gene = gene_2,
pvalue_gwas = p.value_1,
pvalue_eqtl = p.value_2
) %>%
dplyr::inner_join(
results$robust %>%
dplyr::filter(dataset == "eQTLGen") %>%
dplyr::select(gene = gene_2, hgnc_symbol)) %>%
tidyr::gather(
key = "Dataset",
value = "p.value",
-SNP, -gene, -hgnc_symbol
) %>%
tidyr::separate(col = "SNP", into = c("chr", "pos")) %>%
dplyr::mutate(
pos_mb = as.numeric(pos) / 1000000,
p.value = as.numeric(p.value),
log_pval = -log10(p.value)
)
# Load necessary specificity matrices
load(str_c(path_to_markergenes_pkg, "specificity_matrices/AIBS2018_MTG.rda"))
genes <- c("MMRN1", "SNCA-AS1")
plot_data$gtex <-
readRDS(str_c(path_to_markergenes_pkg, "specificity_df/GTEx_v8.Rds")) %>%
dplyr::filter(Description %in% genes) %>%
dplyr::arrange(Description, -specificity)
plot_data$aibs <-
MarkerGenes::query_gene_ctd(
genes = genes,
ctd_AIBS2018,
celltypeLevel = 1,
median_included = F,
genelistSpecies = "human",
ctdSpecies = "human"
)
fig_list <- vector(mode = "list", length = 4)
plot_tibble <-
tibble(
dataset = c("psychencode", "eQTLGen"),
GWAS_label = rep("RBD GWAS", 2),
dataset_label = c("PsychENCODE", "eQTLGen"),
figure_label = letters[1:2]
)
for (i in 1:nrow(plot_tibble)) {
fig_list[[i]] <-
plot_coloc_hits(
coloc_hits =
results$robust %>%
dplyr::filter(
dataset == plot_tibble$dataset[i],
PP.H4.abf >= 0.75
) %>%
dplyr::arrange(hgnc_symbol) %>%
.[["gene_2"]],
eQTL_GWAS_to_plot = plot_data[[i]],
facet_labels = c(pvalue_gwas = plot_tibble$GWAS_label[i],
pvalue_eqtl = plot_tibble$dataset_label[i]),
figure_labels = plot_tibble$figure_label[i],
theme_base_size = theme_base_size
)
}
fig_list[[3]] <-
plot_gtex_specificity(
specificity = plot_data$gtex,
theme_base_size = theme_base_size_small
)
fig_list[[4]] <-
plot_aibs_specificity(
specificity = plot_data$aibs,
theme_base_size = theme_base_size_small
)
fig <-
ggarrange(
ggarrange(
plotlist = fig_list[1:2],
align = "v",
ncol = 2,
nrow = 1,
common.legend = TRUE
),
ggarrange(
plotlist = fig_list[3:4],
nrow = 2,
align = "v",
labels = letters[3:4],
heights = c(3.5,1)
),
nrow = 2,
heights = c(1,2)
)
fig
Regional association plots for eQTL and RBD GWAS colocalisations and tissue and cell-type specificity of MMRN1 and SNCA-AS1. Regional association plots for eQTL (upper pane) and RBD GWAS association signals (lower pane) in the regions surrounding (a) SNCA-AS1 (PPH4 = 0.89) and (b) MMRN1 (PPH4 = 0.86). eQTLs are derived from (a) PsychENCODE's analysis of adult brain tissue from 1387 individuals or (b) the eQTLGen meta-analysis of 31,684 blood samples from 37 cohorts. In (a) and (b), the x-axis denotes chromosomal position in hg19, and the y-axis indicates association p-values on a -log10 scale. Plot of MMRN1 and SNCA-AS1 specificity in (c) 35 human tissues (GTEx dataset) and (d) 7 broad categories of cell type derived from human middle temporal gryus (AIBS dataset). Specificity represents the proportion of a gene’s total expression attributable to one cell type/tissue, with a value of 0 meaning a gene is not expressed in that cell type/tissue and a value of 1 meaning that a gene is only expressed in that cell type/tissue. In (c) tissues are coloured by whether they belong to the brain. In (c) and (d), tissues and cell types have been ordered by specificity from high to low.
fig_list <- vector(mode = "list", length = 2)
plot_tibble <-
tibble(
dataset = c("psychencode", "eQTLGen"),
GWAS_label = rep("RBD GWAS", 2),
dataset_label = c("psychencode", "eQTLGen"),
figure_label = letters[2:1]
)
for (i in 1:nrow(plot_tibble)) {
fig_list[[i]] <-
plot_coloc_hits(
coloc_hits =
results$robust %>%
dplyr::filter(
dataset == plot_tibble$dataset[i],
hgnc_symbol == "SCARB2"
) %>%
dplyr::arrange(hgnc_symbol) %>%
.[["gene_2"]],
eQTL_GWAS_to_plot = plot_data[[i]],
facet_labels = c(pvalue_gwas = plot_tibble$GWAS_label[i],
pvalue_eqtl = plot_tibble$dataset_label[i]),
figure_labels = plot_tibble$figure_label[i],
theme_base_size = theme_base_size
)
}
supp_fig <-
ggarrange(
plotlist = fig_list[2:1],
align = "v",
ncol = 2,
nrow = 1,
common.legend = TRUE
)
supp_fig
Regional association plot for eQTL and RBD GWAS colocalisation in the region surrounding SCARB2. Regional association plots for eQTL (upper pane) and RBD GWAS association signals (lower pane) in the region surrounding SCARB2, using eQTLs derived from (a) the eQTLGen meta-analysis of 31,684 blood samples from 37 cohorts (PPH3 = 0.99; PPH4 = 0.01) or (b) PsychENCODE's analysis of adult brain tissue from 1387 individuals (PPH3 = 0.66; PPH4 = 0.33). The x-axis denotes chromosomal position in hg19, and the y-axis indicates association p-values on a -log10 scale. eQTLs are derived from PsychENCODE's analysis of adult brain tissue from 1387 individuals. The x-axis denotes chromosomal position in hg19, and the y-axis indicates association p-values on a -log10 scale.
supp_table_coloc <-
setNames(
vector(mode = "list", length = 2),
c("ColumnDescriptions", "Results")
)
supp_table_coloc[[2]] <-
results %>%
qdapTools::list_df2df(col1 = "prior_type") %>%
dplyr::filter(prior_type == "robust", dataset %in% c("psychencode", "eQTLGen")) %>%
dplyr::mutate(hgnc_symbol = case_when(
gene_2 == "ENSG00000247775" ~ "SNCA-AS1",
TRUE ~ hgnc_symbol
)) %>%
dplyr::select(
dataset,
ensembl_id = gene_2, hgnc_symbol,
PP_H0 = PP.H0.abf, PP_H1 = PP.H1.abf,
PP_H2 = PP.H2.abf, PP_H3 = PP.H3.abf, PP_H4 = PP.H4.abf,
sum_PPH3_PPH4, ratio_PPH4_PPH3
) %>%
dplyr::arrange(dataset, ensembl_id)
supp_table_coloc[[1]] <-
tibble(
`Column name` = colnames(supp_table_coloc[[2]]),
Description = c(
"eQTL dataset name",
"Gene ensembl ID",
"Gene HGNC symbol",
"Posterior probability of hypothesis 0: No association with either trait",
"Posterior probability of hypothesis 1: Association with trait 1, not with trait 2",
"Posterior probability of hypothesis 2: Association with trait 2, not with trait 1",
"Posterior probability of hypothesis 3: Association with trait 1 and trait 2, two independent SNPs",
"Posterior probability of hypothesis 3: Association with trait 1 and trait 2, one shared SNP",
"Sum of PP_H3 and PP_H4",
"Ratio of PP_H4 to PP_H3"
)
)
Results of colocalisation analysis using the RBD GWAS and eQTLs derived from eQTLGen and PsychENCODE.
supp_table_specificity <- setNames(
vector(mode = "list", length = 2),
c("ColumnDescriptions", "Results")
)
supp_table_specificity[[2]] <-
plot_data$gtex %>%
dplyr::mutate(Study = "GTEx") %>%
dplyr::select(Study, Gene = Description, CellType_Tissue = Organ, Specificity = specificity, Mean_Expression = mean_expr) %>%
dplyr::bind_rows(plot_data$aibs %>%
dplyr::mutate(Study = Study %>%
str_replace(., "ctd_", "")) %>%
dplyr::select(Study, Gene, CellType_Tissue = CellType, Specificity, Mean_Expression))
supp_table_specificity[[1]] <-
tibble(
`Column name` = colnames(supp_table_specificity[[2]]),
Description = c(
"Dataset from which specificity values were derived",
"Gene HGNC symbol",
"Cell type or tissue name",
"Proportion of a gene's total expression attributable to one cell type or tissue",
"Mean expression across a cell type or tissue"
)
)
Specificity values of MMRN1 and SNCA-AS1 in GTEx and AIBS datasets.
ggsave("figure_coloc_specificity.png", fig,
device = "png",
path = path_for_figures,
width = 180,
height = 270,
units = "mm",
dpi = 300,
limitsize = TRUE
)
ggsave("figure_coloc_specificity.pdf", fig,
device = "pdf",
path = path_for_figures,
width = 180,
height = 270,
units = "mm",
dpi = 300,
limitsize = TRUE
)
ggsave("supp_figure_coloc.png", supp_fig,
device = "png",
path = path_for_figures,
width = 180,
height = 120,
units = "mm",
dpi = 300,
limitsize = TRUE
)
openxlsx::write.xlsx(supp_table_coloc,
file = str_c(path_for_tables, "supp_table_coloc.xlsx"),
row.names = FALSE,
headerStyle = openxlsx::createStyle(textDecoration = "BOLD"),
firstRow = TRUE,
append = TRUE
)
openxlsx::write.xlsx(supp_table_specificity,
file = str_c(path_for_tables, "supp_table_specificity.xlsx"),
row.names = FALSE,
headerStyle = openxlsx::createStyle(textDecoration = "BOLD"),
firstRow = TRUE,
append = TRUE
)
# PPH4 decision rule
pph4_decision <- 0.75
# Paths to coloc files from which to plot sensitivity analyses
plot_data <-
tibble(
dir =
list.files(
here::here("results"),
recursive = T,
full.names = T,
pattern = ".rda") %>%
str_replace(., "/[^/]*$", "") %>%
str_replace(., "/[^/]*$", ""),
file_path =
list.files(here::here("results"),
recursive = T,
full.names = T,
pattern = ".rda"),
file_name =
list.files(
here::here("results"),
recursive = T,
full.names = F,
pattern = ".rda") %>%
str_replace(., ".rda", "")
) %>%
tidyr::separate(
file_name,
into = c("dataset", "prior_type", "gene"),
sep = "/",
remove = F
) %>%
dplyr::mutate(
gene =
str_split(gene, pattern = "_") %>%
lapply(., function(x) x[str_detect(x, "ENSG")]) %>%
unlist()
) %>%
dplyr::inner_join(
results$robust %>%
dplyr::filter(
PP.H4.abf >= pph4_decision |
hgnc_symbol == "SCARB2") %>%
dplyr::mutate(
hgnc_symbol = case_when(
gene_2 == "ENSG00000247775" ~ "SNCA-AS1",
TRUE ~ hgnc_symbol
)) %>%
dplyr::distinct(gene_2, dataset, hgnc_symbol),
by = c("gene" = "gene_2", "dataset")
) %>%
dplyr::filter(prior_type == "robust") %>%
dplyr::arrange(dataset, hgnc_symbol)
for (i in 1:nrow(plot_data)) {
load(as.character(plot_data$file_path[i]))
print(str_c(plot_data$hgnc_symbol[i], " - ", plot_data$gene[i]))
png(
file = str_c(path_for_figures,
"supp_figure_",
plot_data$dataset[i],
"_",
plot_data$hgnc_symbol[i],
".png"
),
width = 180,
height = 120,
units = "mm",
res = 300
)
coloc::sensitivity(coloc_results_annotated, "H4 >= 0.75", plot.manhattans = F)
dev.off()
}
## [1] "MMRN1 - ENSG00000138722"
## [1] "SCARB2 - ENSG00000138760"
## [1] "SCARB2 - ENSG00000138760"
## [1] "SNCA-AS1 - ENSG00000247775"
Sensitivity analysis of MMRN1 colocalisation. Sensitivity analysis of colocalisation between eQTLGen-derived eQTLs regulating MMRN1 expression and RBD GWAS signals. Plot of prior (left) and posterior (right) probabilities for H0-H4 across varying p12 priors. Dashed vertical line indicates the value of p12 used in the initial analysis (p12 = 5 x \(10^{-6}\)). The green region in these plots show the region for which PPH4 \(\geq\) 0.75 would still be supported.
Sensitivity analysis of SNCA-AS1 colocalisation. Sensitivity analysis of colocalisation between PscyhENCODE-derived eQTLs regulating SNCA-AS1 expression and RBD GWAS signals. Plot of prior (left) and posterior (right) probabilities for H0-H4 across varying p12 priors. Dashed vertical line indicates the value of p12 used in the initial analysis (p12 = 5 x \(10^{-6}\)). The green region in these plots show the region for which PPH4 \(\geq\) 0.75 would still be supported.
Sensitivity analysis of SCARB2 colocalisation using eQTLGen-derived eQTLs. Sensitivity analysis of colocalisation between eQTLGen-derived eQTLs regulating SCARB2 expression and RBD GWAS signals. Plot of prior (left) and posterior (right) probabilities for H0-H4 across varying p12 priors. Dashed vertical line indicates the value of p12 used in the initial analysis (p12 = 5 x \(10^{-6}\)). The green region in these plots show the region for which PPH4 \(\geq\) 0.75 would still be supported.
Sensitivity analysis of SCARB2 colocalisation using PscyhENCODE-derived eQTLs.. Sensitivity analysis of colocalisation between PscyhENCODE-derived eQTLs regulating SCARB2 expression and RBD GWAS signals. Plot of prior (left) and posterior (right) probabilities for H0-H4 across varying p12 priors. Dashed vertical line indicates the value of p12 used in the initial analysis (p12 = 5 x \(10^{-6}\)). The green region in these plots show the region for which PPH4 \(\geq\) 0.75 would still be supported.
plot_data <-
readRDS(here::here("results", "psychencode", "RBD_SNCAAS1_beta_harmonised.Rds")) %>%
dplyr::rename(
pvalue_gwas = p.value_1,
pvalue_eqtl = p.value_2,
beta_gwas = beta_1,
beta_eqtl = beta_2
) %>%
dplyr::mutate(
genome_wide_signif =
case_when(
pvalue_gwas < 5e-8 | pvalue_eqtl < 5e-8 ~ "TRUE",
TRUE ~ "FALSE"
) %>%
fct_relevel(., levels = c("TRUE", "FALSE"))
)
fig <-
plot_data %>%
ggplot(aes(x = beta_eqtl, y = beta_gwas)) +
geom_point(aes(colour = genome_wide_signif), alpha = 0.5) +
geom_smooth(method = "lm", level = 0.99, colour = "black", fill = "#EE442F") +
# ggpubr::stat_cor(method = "pearson", cor.coef.name = "R", label.sep = "\n") +
labs(x = "PscyhENCODE eQTL: beta coefficient", y = "RBD GWAS: beta coefficient") +
scale_colour_manual(
values = c("#EE442F", "#888888"),
name = "Genome-wide significant?"
) +
theme_bw(base_size = theme_base_size) +
theme(legend.position = "top")
fig
Association of RBD risk with SNCA-AS1 expression. Scatterplot of beta coefficients for SNPs shared between the RBD GWAS and PsychENCODE eQTLs regulating SNCA-AS1 expression. SNPs passing genome-wide significance (p = 5 x \(10^{-8}\)) in the RBD GWAS and/or PyschENCODE are indicated in red. The black line represents a linear model fitted for the beta coefficients from either dataset, with the 99% confidence interval indicated with a red fill.
ggsave("supp_figure_betas.png", fig,
device = "png",
path = path_for_figures,
width = 140,
height = 120,
units = "mm",
dpi = 300,
limitsize = TRUE
)
ggsave("supp_figure_betas.pdf", fig,
device = "pdf",
path = path_for_figures,
width = 140,
height = 120,
units = "mm",
dpi = 300,
limitsize = TRUE
)
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] MarkerGenes_0.0.0.9000 EWCE_0.99.2 qdapTools_1.3.5
## [4] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.2
## [7] purrr_0.3.4 readr_1.4.0 tidyr_1.1.1
## [10] tibble_3.0.3 tidyverse_1.3.0 openxlsx_4.2.3
## [13] readxl_1.3.1 here_1.0.0 ggpubr_0.4.0
## [16] ggplot2_3.3.2 DT_0.15 cowplot_1.0.0
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.8 BiocFileCache_1.10.2 coloc_4.0-4
## [4] plyr_1.8.6 splines_3.6.1 usethis_1.6.3
## [7] inline_0.3.17 digest_0.6.27 htmltools_0.5.1.1
## [10] viridis_0.5.1 magrittr_2.0.1 memoise_2.0.0
## [13] limma_3.42.2 remotes_2.2.0 modelr_0.1.8
## [16] askpass_1.1 prettyunits_1.1.1 colorspace_2.0-0
## [19] blob_1.2.1 rvest_0.3.6 rappdirs_0.3.1
## [22] rrcov_1.5-5 haven_2.3.1 xfun_0.16
## [25] callr_3.5.1 crayon_1.4.1 RCurl_1.98-1.2
## [28] jsonlite_1.7.1 survival_3.2-7 glue_1.4.2
## [31] gtable_0.3.0 zlibbioc_1.32.0 HGNChelper_0.8.1
## [34] car_3.0-9 pkgbuild_1.1.0 BiocGenerics_0.32.0
## [37] DEoptimR_1.0-8 abind_1.4-5 scales_1.1.1
## [40] mvtnorm_1.1-1 DBI_1.1.1 rstatix_0.6.0
## [43] Rcpp_1.0.5 viridisLite_0.3.0 progress_1.2.2
## [46] foreign_0.8-72 bit_4.0.4 stats4_3.6.1
## [49] htmlwidgets_1.5.3 httr_1.4.2 ellipsis_0.3.1
## [52] pkgconfig_2.0.3 XML_3.99-0.3 farver_2.0.3
## [55] dbplyr_1.4.4 tidyselect_1.1.0 labeling_0.4.2
## [58] rlang_0.4.7 reshape2_1.4.4 AnnotationDbi_1.48.0
## [61] munsell_0.5.0 cellranger_1.1.0 tools_3.6.1
## [64] cachem_1.0.3 cli_2.2.0.9000 generics_0.0.2
## [67] RSQLite_2.2.0 devtools_2.3.2 broom_0.7.0
## [70] evaluate_0.14 fastmap_1.0.1 ggdendro_0.1.21
## [73] yaml_2.2.1 processx_3.4.5 knitr_1.29
## [76] bit64_4.0.2 fs_1.5.0 zip_2.1.0
## [79] robustbase_0.93-7 nlme_3.1-141 leaps_3.1
## [82] xml2_1.3.2 biomaRt_2.42.1 compiler_3.6.1
## [85] rstudioapi_0.13 curl_4.3 testthat_2.3.2
## [88] ggsignif_0.6.0 reprex_1.0.0 pcaPP_1.9-73
## [91] stringi_1.5.3 ps_1.3.4 desc_1.2.0
## [94] lattice_0.20-38 Matrix_1.2-17 vctrs_0.3.2
## [97] pillar_1.4.6 lifecycle_0.2.0 snpStats_1.36.0
## [100] data.table_1.13.0 bitops_1.0-6 R6_2.5.0
## [103] gridExtra_2.3 rio_0.5.16 IRanges_2.20.2
## [106] sessioninfo_1.1.1 MASS_7.3-51.4 assertthat_0.2.1
## [109] pkgload_1.1.0 chron_2.3-56 openssl_1.4.2
## [112] BMA_3.18.12 rprojroot_2.0.2 withr_2.2.0
## [115] S4Vectors_0.24.4 mgcv_1.8-29 parallel_3.6.1
## [118] hms_1.0.0 grid_3.6.1 rmarkdown_2.5
## [121] carData_3.0-4 Biobase_2.46.0 lubridate_1.7.9