Aim: determine whether RBD GWAS shares any common genetic causal variants with tissue-level eQTL datasets. If there are shared common genetic causal variants this will permit linking the variants within the GWAS locus to regulation of a gene's expression.
beta
) and standard error of the regression coefficient (se
) or p-value and MAF. We will be using beta
and se
(according to the original coloc paper, these are preferred over p-value and MAF).source(here::here("R", "tidy_GWAS.R"))
RBD <- fread("/data/LDScore/GWAS/RBD2020/RBD2020.txt")
# Tidy RBD GWAS
RBD_tidy <-
RBD %>%
tidy_GWAS()
# Get varbeta and check that the necessary columns for coloc are present and are the correct type (num, character)
RBD_tidy_w_varbeta <-
RBD_tidy %>%
colochelpR::get_varbeta() %>%
colochelpR::check_coloc_data_format(beta_or_pval = "beta", check_maf = F)
# Save and gzip output
write_delim(RBD_tidy_w_varbeta, path = here::here("data", "RBD_tidy_varbeta.txt"), delim = "\t")
R.utils::gzip(filename = here::here("data", "RBD_tidy_varbeta.txt"),
destname = here::here("data", "RBD_tidy_varbeta.txt.gz"),
remove = T)
beta == 0
, as this produces NaNs in coloc output.
beta == 0
.# Deal with multiallelic SNPs
# Find duplicates and choose by highest MAF
duplicates_top_maf <-
RBD_tidy_w_varbeta %>%
dplyr::filter(duplicated(SNP) | duplicated(SNP, fromLast = TRUE)) %>%
dplyr::group_by(SNP) %>%
dplyr::top_n(1, maf)
# Remove any remaining duplicates which do not differentiate on beta/p-value and maf
duplicates_top_maf <-
duplicates_top_maf %>%
dplyr::anti_join(duplicates_top_maf %>%
dplyr::filter(duplicated(SNP) | duplicated(SNP, fromLast = TRUE)))
# Above steps results in removal of all duplicate SNPs
# That is all SNP duplicates are duplicates (and not multiallelic SNPs)
nrow(duplicates_top_maf)
# Thus simply use dplyr::distinct() to find all unique entries
RBD_tidy_w_varbeta <-
RBD_tidy_w_varbeta %>%
dplyr::distinct(across(everything()))
# Finally omit any SNPs with beta = 0, as this produces NaNs in coloc output otherwise
RBD_tidy_w_varbeta <-
RBD_tidy_w_varbeta %>%
dplyr::filter(beta != 0)
# Save tidied output
write_delim(RBD_tidy_w_varbeta, path = here::here("data", "RBD_tidy_varbeta_noduplicates.txt"), delim = "\t")
R.utils::gzip(filename = here::here("data", "RBD_tidy_varbeta_noduplicates.txt"),
destname = here::here("data", "RBD_tidy_varbeta_noduplicates.txt.gz"),
remove = T)
colochelpR::get_genes_within_1Mb_of_signif_SNPs()
function. Note that we use GRCh37 gene definitions from ensembl, as the RBD GWAS has been mapped to GRCh37.# Extract all genes within +/- 1Mb of significant RBD hits
ensembl_gene_ids_overlapping_1Mb_window_hit <-
RBD_tidy_w_varbeta %>%
tidyr::separate(col = SNP, into = c("CHR", "BP"), sep = ":", remove = FALSE) %>%
dplyr::mutate(CHR = as.integer(CHR),
BP = as.integer(BP)) %>%
colochelpR::get_genes_within_1Mb_of_signif_SNPs(pvalue_column = "p.value",
CHR_column = "CHR",
BP_column = "BP",
mart = 37)
# Number of genes within +/- 1Mb of all significant hits in RBD GWAS
ensembl_gene_ids_overlapping_1Mb_window_hit <-
tibble(ensembl_id = ensembl_gene_ids_overlapping_1Mb_window_hit) %>%
colochelpR::biomart_df(.,
columnToFilter = "ensembl_id",
attributes = c("ensembl_gene_id", "hgnc_symbol",
"chromosome_name", "start_position", "end_position"),
filter = "ensembl_gene_id",
mart = 37)
saveRDS(ensembl_gene_ids_overlapping_1Mb_window_hit,
here::here("data", "ensembl_gene_ids_overlapping_1Mb_window_hit.Rds"))
MafDb.1Kgenomes.phase3.hs37d5
, which contains MAFs for a number of populations.mafdb <- MafDb.1Kgenomes.phase3.hs37d5
populations(mafdb)
## [1] "AF" "AFR_AF" "AMR_AF" "EAS_AF" "EUR_AF" "SAS_AF"
example_df <- data.frame(rs_id = c("rs12921634", "rs1476958", "rs56189750"))
# Example code
mafs <- GenomicScores::gscores(x = mafdb, ranges = example_df$rs_id %>% as.character(), pop = "EUR_AF")
mafs <-
mafs %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "SNP") %>%
dplyr::rename(maf = EUR_AF) %>%
dplyr::select(SNP, maf)
example_df <- example_df %>%
inner_join(mafs, by = c("rs_id" = "SNP"))
coloc_RBD_eQTLGen.R
nohup Rscript /home/rreynolds/misc_projects/RBD-GWAS-analysis/scripts/coloc_RBD_eQTLGen.R &>/home/rreynolds/misc_projects/RBD-GWAS-analysis/logs/coloc_RBD_eQTLGen.log&
fread("/data/psychencode/DER-08a_hg19_eQTL.significant.txt") %>%
colnames()
## [1] "gene_id" "gene_chr" "gene_start"
## [4] "gene_end" "strand" "number_of_SNPs_tested"
## [7] "SNP_distance_to_TSS" "SNP_id" "SNP_chr"
## [10] "SNP_start" "SNP_end" "nominal_pval"
## [13] "regression_slope" "top_SNP" "FDR"
coloc_RBD_psychencode.R
nohup Rscript /home/rreynolds/misc_projects/RBD-GWAS-analysis/scripts/coloc_RBD_psychencode.R &>/home/rreynolds/misc_projects/RBD-GWAS-analysis/logs/coloc_RBD_psychencode.log&
colochelpR::tidy_eQTL_psychencode()
.results_dir <-
tibble(dir = list.files(here::here("results"), recursive = T, full.names = T, pattern = ".rda") %>%
str_replace(., "/[^/]*$", "") %>%
str_replace(., "/[^/]*$", ""),
file_path = list.files(here::here("results"), recursive = T, full.names = T, pattern = ".rda"),
file_name = list.files(here::here("results"), recursive = T, full.names = F, pattern = ".rda") %>%
str_replace(., ".rda", "")) %>%
tidyr::separate(file_name, into = c("dataset", "prior_type", "gene"), sep = "/", remove = F)
# Split into liberal/robust results
dataset_dir <- setNames(results_dir %>% dplyr::group_split(prior_type),
c(results_dir %>% .[["prior_type"]] %>% unique() %>% sort()))
# Priors df
priors_df <- tibble(prior_type = c("liberal", "robust"),
p12 = c(1e-05, 5e-06))
all_results <- setNames(vector(mode = "list", length = length(dataset_dir)),
names(dataset_dir))
for(i in 1:length(dataset_dir)){
priors_df_filtered <-
priors_df %>%
dplyr::filter(prior_type == names(dataset_dir[i]))
dataset_file_paths <- dataset_dir[[i]]
dataset_names <- dataset_file_paths$dataset %>% unique()
results_list <- vector(mode = "list", length = length(dataset_names))
for(j in 1:length(dataset_names)){
dataset <- dataset_names[j]
print(dataset)
dir_to_load <-
dataset_file_paths %>%
dplyr::filter(dataset == dataset_names[j]) %>%
.[["dir"]] %>%
unique()
dir_to_load <- str_c(dir_to_load, "/", priors_df_filtered$prior_type)
for(k in 1:length(dir_to_load)){
print(dir_to_load[k])
if(dataset %in% c("eQTLGen", "psychencode")){
results <-
colochelpR::merge_coloc_summaries(dir_to_load[k], add_signif_SNP = F, recursive = T, pattern = ".rda") %>%
dplyr::select(GWAS_1, gene_2, everything(), -eQTL_dataset_2)
}
results <-
results %>%
dplyr::mutate(dataset = dataset,
p12 = priors_df_filtered$p12) %>%
dplyr::select(GWAS_1, dataset, gene_2, everything())
if(k == 1){
results_list[[j]] <- results
} else{
results_list[[j]] <-
results_list[[j]] %>%
dplyr::bind_rows(results)
}
}
}
all_results[[i]] <- results_list
}
results <-
all_results %>%
lapply(., function(x){
x %>%
qdapTools::list_df2df() %>%
biomart_df(columnToFilter = "gene_2",
mart = 37,
attributes = c("ensembl_gene_id", "hgnc_symbol"),
filter = c("ensembl_gene_id")) %>%
dplyr::select(GWAS_1, dataset, gene_2, hgnc_symbol, everything(), -X1)
})
saveRDS(results, file = here::here("results", "coloc_summary.Rds"))
results <- readRDS(here::here("results", "coloc_summary.Rds"))
coloc <-
results %>%
qdapTools::list_df2df(., col1 = "prior_type") %>%
dplyr::mutate(hgnc_symbol = case_when(gene_2 == "ENSG00000247775" ~ "SNCA-AS1",
TRUE ~ hgnc_symbol)) %>%
dplyr::inner_join(results %>%
qdapTools::list_df2df(., col1 = "prior_type") %>%
dplyr::filter(prior_type == "liberal", PP.H4.abf >= 0.75) %>%
dplyr::distinct(dataset, gene_2))
coloc %>%
dplyr::arrange(dataset, prior_type, hgnc_symbol) %>%
dplyr::select(prior_type, p12, GWAS_1, dataset, gene_2, hgnc_symbol, everything()) %>%
DT::datatable(rownames = FALSE,
options = list(scrollX = TRUE),
class = 'white-space: nowrap')
coloc %>%
ggplot(aes(x = hgnc_symbol, y = PP.H4.abf, alpha = prior_type)) +
geom_col(position = position_dodge2(), colour = "black") +
geom_hline(yintercept = 0.75, linetype = "dashed", colour = "#63ACBE") +
geom_hline(yintercept = 0.9, linetype = "dashed", colour = "#EE442F") +
labs(x = "Gene", y = "Posterior probability of H4") +
facet_grid(cols = vars(dataset), scales = "free_x", space = "free_x") +
theme_rhr
# Extract coloc hits
coloc_ens <-
results %>%
lapply(., function(x){x %>%
dplyr::filter(PP.H4.abf > 0.75)}) %>%
qdapTools::list_df2df() %>%
dplyr::filter(dataset == "eQTLGen") %>%
dplyr::distinct(hgnc_symbol, gene_2) %>%
dplyr::arrange(hgnc_symbol)
coloc_ens
# Loading relevant files
results_dir <-
tibble(dir = list.files(here::here("results"), recursive = T, full.names = T, pattern = ".rda") %>%
str_replace(., "/[^/]*$", "") %>%
str_replace(., "/[^/]*$", ""),
file_path = list.files(here::here("results"), recursive = T, full.names = T, pattern = ".rda"),
file_name = list.files(here::here("results"), recursive = T, full.names = F, pattern = ".rda") %>%
str_replace(., ".rda", "")) %>%
tidyr::separate(file_name, into = c("dataset", "prior_type", "gene"), sep = "/", remove = F) %>%
dplyr::mutate(gene = str_split(gene, pattern = "_") %>%
lapply(., function(x) x[str_detect(x, "ENSG")]) %>% unlist())
coloc_files <-
results_dir %>%
dplyr::inner_join(coloc_ens, by = c("gene" = "gene_2")) %>%
dplyr::filter(prior_type == "robust", dataset == "eQTLGen") %>%
dplyr::arrange(hgnc_symbol)
coloc_list <- vector(mode = "list", length = nrow(coloc_files))
for(i in 1:length(coloc_list)){
load(as.character(coloc_files$file_path[i]))
print(str_c(coloc_files$hgnc_symbol[i], " - ", coloc_files$gene[i]))
coloc::sensitivity(coloc_results_annotated, "H4 >= 0.75", plot.manhattans = T)
}
## [1] "MMRN1 - ENSG00000138722"
results %>%
qdapTools::list_df2df(col1 = "prior_type") %>%
dplyr::filter(dataset == "eQTLGen", hgnc_symbol == "SNCA")
# Extract overlaps between GWAS and eQTL datasets and join both datasets
# Only do this for colocalisations of interest
eQTL_path <- "/data/eQTLGen/2019-12-11-cis-eQTLsFDR-ProbeLevel-CohortInfoRemoved-BonferroniAdded.txt.gz"
eQTL <- fread(eQTL_path)
# Tidy eQTL data
eQTL_tidy_filtered <-
eQTL %>%
dplyr::filter(Gene %in% c(coloc_ens$gene_2, "ENSG00000145335", "ENSG00000138760")) %>%
colochelpR::tidy_eQTL_eQTLGen() %>%
colochelpR::check_coloc_data_format(beta_or_pval = "pval", check_maf = F) %>%
dplyr::distinct(gene, SNP, .keep_all = TRUE)
# Find overlapping regions between GWAS and eQTLGen
eQTL_RBD_GWAS_joined <-
setNames(eQTL_tidy_filtered %>%
group_split(gene),
eQTL_tidy_filtered %>%
.[["gene"]] %>%
unique() %>%
sort()) %>%
lapply(., function(eQTL_gene_df){
colochelpR::join_coloc_datasets(df1 = RBD_tidy_w_varbeta %>%
dplyr::filter(!duplicated(SNP)),
df2 = eQTL_gene_df,
harmonise = F)
})
# Save as Rds
saveRDS(eQTL_RBD_GWAS_joined, here::here("results", "eQTLGen", "RBD_eQTLGen_overlap.Rds"))
eQTL_RBD_GWAS_joined <- readRDS(here::here("results", "eQTLGen", "RBD_eQTLGen_overlap.Rds"))
# Format data for plotting
eQTL_RBD_GWAS_to_plot <-
eQTL_RBD_GWAS_joined %>%
qdapTools::list_df2df() %>%
dplyr::select(SNP, gene = gene_2, pvalue_gwas = p.value_1, pvalue_eqtl = p.value_2) %>%
dplyr::inner_join(results$liberal %>%
dplyr::filter(dataset == "eQTLGen") %>%
dplyr::select(gene = gene_2, hgnc_symbol)) %>%
tidyr::gather(key = "Dataset", value = "p.value", -SNP, -gene, -hgnc_symbol) %>%
tidyr::separate(col = "SNP", into = c("chr", "pos")) %>%
dplyr::mutate(pos_mb = as.numeric(pos)/1000000,
p.value = as.numeric(p.value),
log_pval = -log10(p.value))
# P-value correlations
# Spread data for scatter plot
eQTL_RBD_GWAS_corr <-
eQTL_RBD_GWAS_to_plot %>%
dplyr::select(-p.value) %>%
tidyr::spread(key = Dataset, value = log_pval)
eQTL_RBD_GWAS_corr %>%
dplyr::filter(gene %in% c(coloc_ens$gene_2, "ENSG00000145335")) %>%
ggplot(aes(x = pvalue_eqtl, y = pvalue_gwas)) +
geom_point(size = 1, alpha = 0.5) +
geom_hline(yintercept = -log10(5*10^-8), linetype = "dashed") +
geom_vline(xintercept = -log10(5*10^-8), linetype = "dashed") +
ggpubr::stat_cor(method = "spearman", cor.coef.name = "rho", colour = "black", size = 4) +
facet_wrap(vars(hgnc_symbol), nrow = 2, scales = "free") +
labs(x = "-log10(eQTL p-value)", y = "-log10(GWAS p-value)") +
theme_pubr()
coloc_ens <-
results %>%
lapply(., function(x){x %>%
dplyr::filter(PP.H4.abf > 0.75)}) %>%
qdapTools::list_df2df() %>%
dplyr::filter(dataset == "psychencode") %>%
dplyr::distinct(hgnc_symbol, gene_2) %>%
dplyr::mutate(hgnc_symbol = case_when(gene_2 == "ENSG00000247775" ~ "SNCA-AS1",
TRUE ~ hgnc_symbol)) %>%
dplyr::arrange(hgnc_symbol)
coloc_ens
coloc_files <-
results_dir %>%
dplyr::inner_join(coloc_ens, by = c("gene" = "gene_2")) %>%
dplyr::filter(prior_type == "robust", dataset == "psychencode") %>%
dplyr::arrange(hgnc_symbol)
coloc_list <- vector(mode = "list", length = nrow(coloc_files))
for(i in 1:length(coloc_list)){
load(as.character(coloc_files$file_path[i]))
print(str_c(coloc_files$hgnc_symbol[i], " - ", coloc_files$gene[i]))
coloc::sensitivity(coloc_results_annotated, "H4 >= 0.75", plot.manhattans = T)
}
## [1] "DLG5 - ENSG00000151208"
## [1] "SNCA-AS1 - ENSG00000247775"
results %>%
qdapTools::list_df2df(col1 = "prior_type") %>%
dplyr::filter(dataset == "psychencode", hgnc_symbol %in% c("MMRN1", "SNCA"))
# Extract overlaps between GWAS and eQTL datasets and join both datasets
eQTL_path <- "/data/psychencode/Full_hg19_cis-eQTL.txt.gz"
eQTL <- fread(eQTL_path)
SNP_info <- fread("/data/psychencode/SNP_Information_Table_with_Alleles.txt")
# Assign column names using another psychencode-derived file
colnames(eQTL) <- colnames(fread("/data/psychencode/DER-08a_hg19_eQTL.significant.txt"))[!str_detect(colnames(fread("/data/psychencode/DER-08a_hg19_eQTL.significant.txt")), "FDR")]
# Filter eQTLs for gene, tidy eQTLs
eQTL_tidy_filtered <-
eQTL %>%
colochelpR::tidy_eQTL_psychencode(., psychencode_SNP_info = SNP_info, add_colnames = F) %>%
dplyr::filter(gene %in% c(coloc_ens$gene_2, "ENSG00000145335", "ENSG00000138722", "ENSG00000138760")) %>%
colochelpR::check_coloc_data_format(beta_or_pval = "pval", check_maf = F) %>%
dplyr::distinct(gene, SNP, .keep_all = TRUE)
# Find overlapping regions between GWAS and eQTLGen
eQTL_RBD_GWAS_joined <-
setNames(eQTL_tidy_filtered %>%
group_split(gene),
eQTL_tidy_filtered %>%
.[["gene"]] %>%
unique() %>%
sort()) %>%
lapply(., function(eQTL_gene_df){
colochelpR::join_coloc_datasets(df1 = RBD_tidy_w_varbeta %>%
dplyr::filter(!duplicated(SNP)),
df2 = eQTL_gene_df,
harmonise = F)
})
# Save as Rds
saveRDS(eQTL_RBD_GWAS_joined, here::here("results", "psychencode", "RBD_psychencode_overlap.Rds"))
# Shared SNPs between GWAS/eQTL
eQTL_RBD_GWAS_joined <- readRDS(here::here("results", "psychencode", "RBD_psychencode_overlap.Rds"))
# Format data for plotting
eQTL_RBD_GWAS_to_plot <-
eQTL_RBD_GWAS_joined %>%
qdapTools::list_df2df() %>%
dplyr::select(SNP, gene = gene_2, pvalue_gwas = p.value_1, pvalue_eqtl = p.value_2) %>%
dplyr::inner_join(results$liberal %>%
dplyr::filter(dataset == "psychencode") %>%
dplyr::select(gene = gene_2, hgnc_symbol)) %>%
tidyr::gather(key = "Dataset", value = "p.value", -SNP, -gene, -hgnc_symbol) %>%
tidyr::separate(col = "SNP", into = c("chr", "pos")) %>%
dplyr::mutate(pos_mb = as.numeric(pos)/1000000,
p.value = as.numeric(p.value),
log_pval = -log10(p.value),
hgnc_symbol = case_when(gene == "ENSG00000247775" ~ "SNCA-AS1",
TRUE ~ hgnc_symbol))
# Spread data for scatter plot
eQTL_RBD_GWAS_corr <-
eQTL_RBD_GWAS_to_plot %>%
dplyr::select(-p.value) %>%
tidyr::spread(key = Dataset, value = log_pval)
eQTL_RBD_GWAS_corr %>%
dplyr::filter(gene %in% c("ENSG00000145335", "ENSG00000138722", "ENSG00000247775")) %>%
ggplot(aes(x = pvalue_eqtl, y = pvalue_gwas)) +
geom_point(size = 1, alpha = 0.5) +
geom_hline(yintercept = -log10(5*10^-8), linetype = "dashed") +
geom_vline(xintercept = -log10(5*10^-8), linetype = "dashed") +
ggpubr::stat_cor(method = "spearman", cor.coef.name = "rho", colour = "black", size = 4) +
facet_wrap(vars(hgnc_symbol), nrow = 2, scales = "free") +
labs(x = "-log10(eQTL p-value)", y = "-log10(GWAS p-value)") +
theme_pubr()
As we have betas for both the GWAS and the Psychencode dataset, we can make some initial attempt to determine what the direction of effect at the locus might be.
SNP_info <- fread("/data/psychencode/SNP_Information_Table_with_Alleles.txt")
SNP_info %>% dplyr::filter(PEC_id == "16:11417802")
same_effect_and_alt_allele
will be assigned the value TRUE.directionality_check <-
RBD_tidy_w_varbeta %>%
dplyr::select(SNP, Al1, Al2) %>%
dplyr::filter(SNP %in% eQTL_RBD_GWAS_joined$ENSG00000247775$SNP) %>%
dplyr::inner_join(SNP_info %>%
dplyr::filter(PEC_id %in% eQTL_RBD_GWAS_joined$ENSG00000247775$SNP) %>%
dplyr::select(SNP = PEC_id, psychencode_ALT = ALT, psychencode_REF = REF)) %>%
dplyr::mutate(same_effect_and_alt_allele = case_when(Al1 == psychencode_ALT & Al2 == psychencode_REF ~ TRUE,
TRUE ~ FALSE))
directionality_check %>%
dplyr::group_by(same_effect_and_alt_allele) %>%
dplyr::count()
effect_and_alt_swapped
:
directionality_check <-
directionality_check %>%
dplyr::mutate(effect_and_alt_swapped = case_when(same_effect_and_alt_allele == FALSE & Al1 == psychencode_REF & Al2 == psychencode_ALT ~ TRUE,
TRUE ~ FALSE))
directionality_check %>%
dplyr::filter(same_effect_and_alt_allele == FALSE) %>%
dplyr::group_by(effect_and_alt_swapped) %>%
dplyr::count()
SNPs_to_exclude <- directionality_check %>%
dplyr::filter(same_effect_and_alt_allele == FALSE, effect_and_alt_swapped == FALSE) %>%
.[["SNP"]]
SNPs_to_swap <- directionality_check %>%
dplyr::filter(same_effect_and_alt_allele == FALSE, effect_and_alt_swapped == TRUE) %>%
.[["SNP"]]
harmonised_alleles <-
eQTL_RBD_GWAS_joined$ENSG00000247775 %>%
dplyr::filter(!SNP %in% SNPs_to_exclude) %>%
dplyr::mutate(beta_2 = case_when(SNP %in% SNPs_to_swap ~ (-beta_2),
TRUE ~ beta_2),
Al1_2 = case_when(SNP %in% SNPs_to_swap ~ Al1_1,
TRUE ~ Al1_2),
Al2_2 = case_when(SNP %in% SNPs_to_swap ~ Al2_1,
TRUE ~ Al2_2))
# saveRDS(harmonised_alleles, here::here("results", "psychencode", "RBD_SNCAAS1_beta_harmonised.Rds"))
data_to_plot <-
harmonised_alleles %>%
tidyr::gather(key = "statistic", value = "value", beta_1, p.value_1, beta_2, p.value_2) %>%
dplyr::select(SNP, statistic, value) %>%
dplyr::mutate(dataset = case_when(str_detect(statistic, "_1") ~ "RBD GWAS",
str_detect(statistic, "_2") ~ "PsychENCODE eQTL"),
statistic = str_replace(statistic, "_.*", "")) %>%
tidyr::spread(key = statistic, value = value) %>%
dplyr::mutate(significant_GWAS_SNP = case_when(SNP == "4:90755909" ~ TRUE,
TRUE ~ FALSE))
data_to_plot %>%
ggplot(aes(x = beta, y = -log10(p.value))) +
geom_point(data = subset(data_to_plot, significant_GWAS_SNP == FALSE), colour = "#888888",
alpha = 0.5) +
geom_point(data = subset(data_to_plot, significant_GWAS_SNP == TRUE), size = 2, colour = "#EE442F",
alpha = 1) +
geom_hline(yintercept = -log10(5*10^-8), linetype = "dashed") +
facet_wrap(~ dataset) +
labs(x = "beta", y = "-log10(p-value)") +
theme_pubr()
harmonised_alleles %>%
dplyr::mutate(genome_wide_signif = case_when(p.value_1 < 5e-8 | p.value_2 < 5e-8 ~ TRUE,
TRUE ~ FALSE)) %>%
tidyr::gather(key = "statistic", value = "value", beta_1, p.value_1, beta_2, p.value_2) %>%
dplyr::select(SNP, statistic, value, genome_wide_signif) %>%
dplyr::mutate(dataset = case_when(str_detect(statistic, "_1") ~ "RBD GWAS",
str_detect(statistic, "_2") ~ "PsychENCODE eQTL"),
statistic = str_replace(statistic, "_.*", "")) %>%
dplyr::filter(statistic == "beta") %>%
dplyr::select(-statistic) %>%
tidyr::spread(key = "dataset", value = "value") %>%
ggplot(aes(x = `PsychENCODE eQTL`, y = `RBD GWAS`)) +
geom_point(aes(colour = genome_wide_signif), alpha = 0.5) +
# geom_density2d(colour = "black", alpha = 0.8) +
geom_smooth(method = "lm", level = 0.99, colour = "black", fill = "#EE442F") +
labs(x = "beta - pscyhENCODE eQTL", y = "beta - RBD") +
scale_colour_manual(values = c("#888888", "#EE442F")) +
theme_pubr()
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] forcats_0.5.0 stringr_1.4.0
## [3] dplyr_1.0.2 purrr_0.3.4
## [5] readr_1.3.1 tidyr_1.1.1
## [7] tibble_3.0.3 tidyverse_1.3.0
## [9] MafDb.1Kgenomes.phase3.hs37d5_3.10.0 GenomicScores_1.10.0
## [11] GenomicRanges_1.38.0 GenomeInfoDb_1.22.1
## [13] IRanges_2.20.2 S4Vectors_0.24.4
## [15] BiocGenerics_0.32.0 here_1.0.0
## [17] ggpubr_0.4.0 ggplot2_3.3.2
## [19] data.table_1.13.0 colochelpR_0.99.0
## [21] coloc_4.0-4
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.8
## [3] AnnotationHub_2.18.0 qdapTools_1.3.5
## [5] BiocFileCache_1.10.2 splines_3.6.1
## [7] crosstalk_1.1.0.1 BiocParallel_1.20.1
## [9] inline_0.3.15 digest_0.6.25
## [11] htmltools_0.5.1.1 viridis_0.5.1
## [13] magrittr_1.5 memoise_1.1.0
## [15] BSgenome_1.54.0 openxlsx_4.2.3
## [17] Biostrings_2.54.0 modelr_0.1.8
## [19] matrixStats_0.56.0 R.utils_2.9.2
## [21] colorspace_1.4-1 blob_1.2.1
## [23] rvest_0.3.6 rappdirs_0.3.1
## [25] rrcov_1.5-5 haven_2.3.1
## [27] xfun_0.16 crayon_1.3.4
## [29] RCurl_1.98-1.2 jsonlite_1.7.1
## [31] survival_3.1-12 glue_1.4.2
## [33] gtable_0.3.0 zlibbioc_1.32.0
## [35] XVector_0.26.0 DelayedArray_0.12.3
## [37] car_3.0-9 DEoptimR_1.0-8
## [39] abind_1.4-5 scales_1.1.1
## [41] mvtnorm_1.1-1 DBI_1.1.0
## [43] rstatix_0.6.0 Rcpp_1.0.5
## [45] viridisLite_0.3.0 xtable_1.8-4
## [47] foreign_0.8-72 bit_4.0.4
## [49] DT_0.15 htmlwidgets_1.5.1
## [51] httr_1.4.2 ellipsis_0.3.1
## [53] farver_2.0.3 pkgconfig_2.0.3
## [55] XML_3.99-0.3 R.methodsS3_1.8.0
## [57] dbplyr_1.4.4 labeling_0.3
## [59] tidyselect_1.1.0 rlang_0.4.7
## [61] later_1.1.0.1 AnnotationDbi_1.48.0
## [63] munsell_0.5.0 BiocVersion_3.10.1
## [65] cellranger_1.1.0 tools_3.6.1
## [67] cli_2.2.0.9000 generics_0.0.2
## [69] RSQLite_2.2.0 broom_0.7.0
## [71] evaluate_0.14 fastmap_1.0.1
## [73] yaml_2.2.1 knitr_1.29
## [75] bit64_4.0.2 fs_1.5.0
## [77] zip_2.1.0 robustbase_0.93-6
## [79] nlme_3.1-141 mime_0.9
## [81] R.oo_1.23.0 leaps_3.1
## [83] xml2_1.3.2 compiler_3.6.1
## [85] rstudioapi_0.11 curl_4.3
## [87] interactiveDisplayBase_1.24.0 ggsignif_0.6.0
## [89] reprex_0.3.0 pcaPP_1.9-73
## [91] stringi_1.4.6 highr_0.8
## [93] lattice_0.20-38 Matrix_1.2-17
## [95] vctrs_0.3.2 pillar_1.4.6
## [97] lifecycle_0.2.0 BiocManager_1.30.10
## [99] snpStats_1.36.0 bitops_1.0-6
## [101] httpuv_1.5.4 rtracklayer_1.46.0
## [103] R6_2.4.1 promises_1.1.1
## [105] gridExtra_2.3 rio_0.5.16
## [107] assertthat_0.2.1 chron_2.3-56
## [109] SummarizedExperiment_1.16.1 BMA_3.18.12
## [111] rprojroot_2.0.2 withr_2.2.0
## [113] GenomicAlignments_1.22.1 Rsamtools_2.2.3
## [115] GenomeInfoDbData_1.2.2 mgcv_1.8-29
## [117] hms_0.5.3 grid_3.6.1
## [119] rmarkdown_2.5 carData_3.0-4
## [121] Biobase_2.46.0 shiny_1.5.0
## [123] lubridate_1.7.9