library(cowplot) # For alignment of plots using axes
library(DT) # R interface to the JavaScript library DataTables
library(ggpubr) # For arranging plots in one panel
library(here) # For file path construction
library(readxl) # For loading excel workbooks
library(openxlsx) # For writing to excel workbooks
library(tidyverse) # For data manipulation
library(stringr) # For string manipulation
library(qdapTools) # For list manipulation
# To use MarkerGene package, clone from: https://github.com/RHReynolds/MarkerGenes
path_to_ewce_pkg <- "/home/rreynolds/packages/EWCE/" # necessary due to issues with R versioning, see https://github.com/NathanSkene/EWCE
path_to_markergenes_pkg <- "/home/rreynolds/projects/MarkerGenes/"
devtools::load_all(path_to_ewce_pkg) # Function for querying specificity matrices
devtools::load_all(path_to_markergenes_pkg) # Function for querying specificity matrices
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
path_for_figures <- here::here("manuscript", "figures/")
path_for_tables <- here::here("manuscript", "tables/")
source(here::here("R", "plotting_functions.R"))
# Set base_size for plots
theme_base_size <- 10
theme_base_size_small <- 8
# All coloc results
results <-
readRDS(here::here("results", "coloc_summary.Rds"))
# GWAS and eQTL points to plot
plot_data <-
setNames(
vector(mode = "list", length = 4),
c("psychencode", "eQTLGen", "gtex", "aibs")
)
plot_data[[1]] <-
readRDS(here::here("results", "psychencode", "RBD_psychencode_overlap.Rds")) %>%
qdapTools::list_df2df() %>%
dplyr::select(
SNP,
gene = gene_2,
pvalue_gwas = p.value_1,
pvalue_eqtl = p.value_2
) %>%
dplyr::inner_join(
results$robust %>%
dplyr::filter(dataset == "psychencode") %>%
dplyr::select(gene = gene_2, hgnc_symbol)
) %>%
tidyr::gather(
key = "Dataset",
value = "p.value",
-SNP, -gene, -hgnc_symbol
) %>%
tidyr::separate(col = "SNP", into = c("chr", "pos")) %>%
dplyr::mutate(
pos_mb = as.numeric(pos) / 1000000,
p.value = as.numeric(p.value),
log_pval = -log10(p.value),
hgnc_symbol = case_when(
gene == "ENSG00000247775" ~ "SNCA-AS1",
TRUE ~ hgnc_symbol
)
)
plot_data[[2]] <-
readRDS(here::here("results", "eQTLGen", "RBD_eQTLGen_overlap.Rds")) %>%
qdapTools::list_df2df() %>%
dplyr::select(
SNP,
gene = gene_2,
pvalue_gwas = p.value_1,
pvalue_eqtl = p.value_2
) %>%
dplyr::inner_join(
results$robust %>%
dplyr::filter(dataset == "eQTLGen") %>%
dplyr::select(gene = gene_2, hgnc_symbol)) %>%
tidyr::gather(
key = "Dataset",
value = "p.value",
-SNP, -gene, -hgnc_symbol
) %>%
tidyr::separate(col = "SNP", into = c("chr", "pos")) %>%
dplyr::mutate(
pos_mb = as.numeric(pos) / 1000000,
p.value = as.numeric(p.value),
log_pval = -log10(p.value)
)
# Load necessary specificity matrices
load(str_c(path_to_markergenes_pkg, "specificity_matrices/AIBS2018_MTG.rda"))
genes <- c("MMRN1", "SNCA-AS1")
plot_data$gtex <-
readRDS(str_c(path_to_markergenes_pkg, "specificity_df/GTEx_v8.Rds")) %>%
dplyr::filter(Description %in% genes) %>%
dplyr::arrange(Description, -specificity)
plot_data$aibs <-
MarkerGenes::query_gene_ctd(
genes = genes,
ctd_AIBS2018,
celltypeLevel = 1,
median_included = F,
genelistSpecies = "human",
ctdSpecies = "human"
)
fig_list <- vector(mode = "list", length = 4)
plot_tibble <-
tibble(
dataset = c("psychencode", "eQTLGen"),
GWAS_label = rep("RBD GWAS", 2),
dataset_label = c("PsychENCODE", "eQTLGen"),
figure_label = letters[1:2]
)
for (i in 1:nrow(plot_tibble)) {
fig_list[[i]] <-
plot_coloc_hits(
coloc_hits =
results$robust %>%
dplyr::filter(
dataset == plot_tibble$dataset[i],
PP.H4.abf >= 0.75
) %>%
dplyr::arrange(hgnc_symbol) %>%
.[["gene_2"]],
eQTL_GWAS_to_plot = plot_data[[i]],
facet_labels = c(pvalue_gwas = plot_tibble$GWAS_label[i],
pvalue_eqtl = plot_tibble$dataset_label[i]),
figure_labels = plot_tibble$figure_label[i],
theme_base_size = theme_base_size
)
}
fig_list[[3]] <-
plot_gtex_specificity(
specificity = plot_data$gtex,
theme_base_size = theme_base_size_small
)
fig_list[[4]] <-
plot_aibs_specificity(
specificity = plot_data$aibs,
theme_base_size = theme_base_size_small
)
fig <-
ggarrange(
ggarrange(
plotlist = fig_list[1:2],
align = "v",
ncol = 2,
nrow = 1,
common.legend = TRUE
),
ggarrange(
plotlist = fig_list[3:4],
nrow = 2,
align = "v",
labels = letters[3:4],
heights = c(3.5,1)
),
nrow = 2,
heights = c(1,2)
)
fig
fig_list <- vector(mode = "list", length = 2)
plot_tibble <-
tibble(
dataset = c("psychencode", "eQTLGen"),
GWAS_label = rep("RBD GWAS", 2),
dataset_label = c("psychencode", "eQTLGen"),
figure_label = letters[2:1]
)
for (i in 1:nrow(plot_tibble)) {
fig_list[[i]] <-
plot_coloc_hits(
coloc_hits =
results$robust %>%
dplyr::filter(
dataset == plot_tibble$dataset[i],
hgnc_symbol == "SCARB2"
) %>%
dplyr::arrange(hgnc_symbol) %>%
.[["gene_2"]],
eQTL_GWAS_to_plot = plot_data[[i]],
facet_labels = c(pvalue_gwas = plot_tibble$GWAS_label[i],
pvalue_eqtl = plot_tibble$dataset_label[i]),
figure_labels = plot_tibble$figure_label[i],
theme_base_size = theme_base_size
)
}
supp_fig <-
ggarrange(
plotlist = fig_list[2:1],
align = "v",
ncol = 2,
nrow = 1,
common.legend = TRUE
)
supp_fig
supp_table_coloc <-
setNames(
vector(mode = "list", length = 2),
c("ColumnDescriptions", "Results")
)
supp_table_coloc[[2]] <-
results %>%
qdapTools::list_df2df(col1 = "prior_type") %>%
dplyr::filter(prior_type == "robust", dataset %in% c("psychencode", "eQTLGen")) %>%
dplyr::mutate(hgnc_symbol = case_when(
gene_2 == "ENSG00000247775" ~ "SNCA-AS1",
TRUE ~ hgnc_symbol
)) %>%
dplyr::select(
dataset,
ensembl_id = gene_2, hgnc_symbol,
PP_H0 = PP.H0.abf, PP_H1 = PP.H1.abf,
PP_H2 = PP.H2.abf, PP_H3 = PP.H3.abf, PP_H4 = PP.H4.abf,
sum_PPH3_PPH4, ratio_PPH4_PPH3
) %>%
dplyr::arrange(dataset, ensembl_id)
supp_table_coloc[[1]] <-
tibble(
`Column name` = colnames(supp_table_coloc[[2]]),
Description = c(
"eQTL dataset name",
"Gene ensembl ID",
"Gene HGNC symbol",
"Posterior probability of hypothesis 0: No association with either trait",
"Posterior probability of hypothesis 1: Association with trait 1, not with trait 2",
"Posterior probability of hypothesis 2: Association with trait 2, not with trait 1",
"Posterior probability of hypothesis 3: Association with trait 1 and trait 2, two independent SNPs",
"Posterior probability of hypothesis 3: Association with trait 1 and trait 2, one shared SNP",
"Sum of PP_H3 and PP_H4",
"Ratio of PP_H4 to PP_H3"
)
)
supp_table_specificity <- setNames(
vector(mode = "list", length = 2),
c("ColumnDescriptions", "Results")
)
supp_table_specificity[[2]] <-
plot_data$gtex %>%
dplyr::mutate(Study = "GTEx") %>%
dplyr::select(Study, Gene = Description, CellType_Tissue = Organ, Specificity = specificity, Mean_Expression = mean_expr) %>%
dplyr::bind_rows(plot_data$aibs %>%
dplyr::mutate(Study = Study %>%
str_replace(., "ctd_", "")) %>%
dplyr::select(Study, Gene, CellType_Tissue = CellType, Specificity, Mean_Expression))
supp_table_specificity[[1]] <-
tibble(
`Column name` = colnames(supp_table_specificity[[2]]),
Description = c(
"Dataset from which specificity values were derived",
"Gene HGNC symbol",
"Cell type or tissue name",
"Proportion of a gene's total expression attributable to one cell type or tissue",
"Mean expression across a cell type or tissue"
)
)
ggsave("figure_coloc_specificity.png", fig,
device = "png",
path = path_for_figures,
width = 180,
height = 270,
units = "mm",
dpi = 300,
limitsize = TRUE
)
ggsave("figure_coloc_specificity.pdf", fig,
device = "pdf",
path = path_for_figures,
width = 180,
height = 270,
units = "mm",
dpi = 300,
limitsize = TRUE
)
ggsave("supp_figure_coloc.png", supp_fig,
device = "png",
path = path_for_figures,
width = 180,
height = 120,
units = "mm",
dpi = 300,
limitsize = TRUE
)
openxlsx::write.xlsx(supp_table_coloc,
file = str_c(path_for_tables, "supp_table_coloc.xlsx"),
row.names = FALSE,
headerStyle = openxlsx::createStyle(textDecoration = "BOLD"),
firstRow = TRUE,
append = TRUE
)
openxlsx::write.xlsx(supp_table_specificity,
file = str_c(path_for_tables, "supp_table_specificity.xlsx"),
row.names = FALSE,
headerStyle = openxlsx::createStyle(textDecoration = "BOLD"),
firstRow = TRUE,
append = TRUE
)
# PPH4 decision rule
pph4_decision <- 0.75
# Paths to coloc files from which to plot sensitivity analyses
plot_data <-
tibble(
dir =
list.files(
here::here("results"),
recursive = T,
full.names = T,
pattern = ".rda") %>%
str_replace(., "/[^/]*$", "") %>%
str_replace(., "/[^/]*$", ""),
file_path =
list.files(here::here("results"),
recursive = T,
full.names = T,
pattern = ".rda"),
file_name =
list.files(
here::here("results"),
recursive = T,
full.names = F,
pattern = ".rda") %>%
str_replace(., ".rda", "")
) %>%
tidyr::separate(
file_name,
into = c("dataset", "prior_type", "gene"),
sep = "/",
remove = F
) %>%
dplyr::mutate(
gene =
str_split(gene, pattern = "_") %>%
lapply(., function(x) x[str_detect(x, "ENSG")]) %>%
unlist()
) %>%
dplyr::inner_join(
results$robust %>%
dplyr::filter(
PP.H4.abf >= pph4_decision |
hgnc_symbol == "SCARB2") %>%
dplyr::mutate(
hgnc_symbol = case_when(
gene_2 == "ENSG00000247775" ~ "SNCA-AS1",
TRUE ~ hgnc_symbol
)) %>%
dplyr::distinct(gene_2, dataset, hgnc_symbol),
by = c("gene" = "gene_2", "dataset")
) %>%
dplyr::filter(prior_type == "robust") %>%
dplyr::arrange(dataset, hgnc_symbol)
for (i in 1:nrow(plot_data)) {
load(as.character(plot_data$file_path[i]))
print(str_c(plot_data$hgnc_symbol[i], " - ", plot_data$gene[i]))
png(
file = str_c(path_for_figures,
"supp_figure_",
plot_data$dataset[i],
"_",
plot_data$hgnc_symbol[i],
".png"
),
width = 180,
height = 120,
units = "mm",
res = 300
)
coloc::sensitivity(coloc_results_annotated, "H4 >= 0.75", plot.manhattans = F)
dev.off()
}
## [1] "MMRN1 - ENSG00000138722"
## [1] "SCARB2 - ENSG00000138760"
## [1] "SCARB2 - ENSG00000138760"
## [1] "SNCA-AS1 - ENSG00000247775"
plot_data <-
readRDS(here::here("results", "psychencode", "RBD_SNCAAS1_beta_harmonised.Rds")) %>%
dplyr::rename(
pvalue_gwas = p.value_1,
pvalue_eqtl = p.value_2,
beta_gwas = beta_1,
beta_eqtl = beta_2
) %>%
dplyr::mutate(
genome_wide_signif =
case_when(
pvalue_gwas < 5e-8 | pvalue_eqtl < 5e-8 ~ "TRUE",
TRUE ~ "FALSE"
) %>%
fct_relevel(., levels = c("TRUE", "FALSE"))
)
fig <-
plot_data %>%
ggplot(aes(x = beta_eqtl, y = beta_gwas)) +
geom_point(aes(colour = genome_wide_signif), alpha = 0.5) +
geom_smooth(method = "lm", level = 0.99, colour = "black", fill = "#EE442F") +
# ggpubr::stat_cor(method = "pearson", cor.coef.name = "R", label.sep = "\n") +
labs(x = "PscyhENCODE eQTL: beta coefficient", y = "RBD GWAS: beta coefficient") +
scale_colour_manual(
values = c("#EE442F", "#888888"),
name = "Genome-wide significant?"
) +
theme_bw(base_size = theme_base_size) +
theme(legend.position = "top")
fig
ggsave("supp_figure_betas.png", fig,
device = "png",
path = path_for_figures,
width = 140,
height = 120,
units = "mm",
dpi = 300,
limitsize = TRUE
)
ggsave("supp_figure_betas.pdf", fig,
device = "pdf",
path = path_for_figures,
width = 140,
height = 120,
units = "mm",
dpi = 300,
limitsize = TRUE
)
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] MarkerGenes_0.0.0.9000 EWCE_0.99.2 qdapTools_1.3.5
## [4] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.2
## [7] purrr_0.3.4 readr_1.4.0 tidyr_1.1.1
## [10] tibble_3.0.3 tidyverse_1.3.0 openxlsx_4.2.3
## [13] readxl_1.3.1 here_1.0.0 ggpubr_0.4.0
## [16] ggplot2_3.3.2 DT_0.15 cowplot_1.0.0
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.8 BiocFileCache_1.10.2 coloc_4.0-4
## [4] plyr_1.8.6 splines_3.6.1 usethis_1.6.3
## [7] inline_0.3.17 digest_0.6.27 htmltools_0.5.1.1
## [10] viridis_0.5.1 magrittr_2.0.1 memoise_2.0.0
## [13] limma_3.42.2 remotes_2.2.0 modelr_0.1.8
## [16] askpass_1.1 prettyunits_1.1.1 colorspace_2.0-0
## [19] blob_1.2.1 rvest_0.3.6 rappdirs_0.3.1
## [22] rrcov_1.5-5 haven_2.3.1 xfun_0.16
## [25] callr_3.5.1 crayon_1.4.1 RCurl_1.98-1.2
## [28] jsonlite_1.7.1 survival_3.2-7 glue_1.4.2
## [31] gtable_0.3.0 zlibbioc_1.32.0 HGNChelper_0.8.1
## [34] car_3.0-9 pkgbuild_1.1.0 BiocGenerics_0.32.0
## [37] DEoptimR_1.0-8 abind_1.4-5 scales_1.1.1
## [40] mvtnorm_1.1-1 DBI_1.1.1 rstatix_0.6.0
## [43] Rcpp_1.0.5 viridisLite_0.3.0 progress_1.2.2
## [46] foreign_0.8-72 bit_4.0.4 stats4_3.6.1
## [49] htmlwidgets_1.5.3 httr_1.4.2 ellipsis_0.3.1
## [52] pkgconfig_2.0.3 XML_3.99-0.3 farver_2.0.3
## [55] dbplyr_1.4.4 tidyselect_1.1.0 labeling_0.4.2
## [58] rlang_0.4.7 reshape2_1.4.4 AnnotationDbi_1.48.0
## [61] munsell_0.5.0 cellranger_1.1.0 tools_3.6.1
## [64] cachem_1.0.3 cli_2.2.0.9000 generics_0.0.2
## [67] RSQLite_2.2.0 devtools_2.3.2 broom_0.7.0
## [70] evaluate_0.14 fastmap_1.0.1 ggdendro_0.1.21
## [73] yaml_2.2.1 processx_3.4.5 knitr_1.29
## [76] bit64_4.0.2 fs_1.5.0 zip_2.1.0
## [79] robustbase_0.93-7 nlme_3.1-141 leaps_3.1
## [82] xml2_1.3.2 biomaRt_2.42.1 compiler_3.6.1
## [85] rstudioapi_0.13 curl_4.3 testthat_2.3.2
## [88] ggsignif_0.6.0 reprex_1.0.0 pcaPP_1.9-73
## [91] stringi_1.5.3 ps_1.3.4 desc_1.2.0
## [94] lattice_0.20-38 Matrix_1.2-17 vctrs_0.3.2
## [97] pillar_1.4.6 lifecycle_0.2.0 snpStats_1.36.0
## [100] data.table_1.13.0 bitops_1.0-6 R6_2.5.0
## [103] gridExtra_2.3 rio_0.5.16 IRanges_2.20.2
## [106] sessioninfo_1.1.1 MASS_7.3-51.4 assertthat_0.2.1
## [109] pkgload_1.1.0 chron_2.3-56 openssl_1.4.2
## [112] BMA_3.18.12 rprojroot_2.0.2 withr_2.2.0
## [115] S4Vectors_0.24.4 mgcv_1.8-29 parallel_3.6.1
## [118] hms_1.0.0 grid_3.6.1 rmarkdown_2.5
## [121] carData_3.0-4 Biobase_2.46.0 lubridate_1.7.9