ld_blocks <-
qtl_tidy$bivar$window_100000 %>% .[["ld_block"]] %>% unique() %>% str_c("ld", .)
# Full results
results_by_block <-
qtl_tidy %>%
lapply(., function(list_of_df){
tmp <-
list_of_df %>%
lapply(., function(df){
setNames(
object =
df %>%
dplyr::group_split(ld_block),
nm =
df[["ld_block"]] %>%
unique() %>%
sort() %>%
str_c("ld", .)
)
})
})
col_descriptions <-
tibble(
column = colnames(results_by_block$univ$window_100000$ld681),
description =
c(
"LD block ID",
"eQTL dataset from which gene eQTLs are derived",
"Ensembl ID for gene for which eQTLs were tested in genic region",
"HGNC symbol for gene for which eQTLs were tested in genic region",
"Genic region chromosome",
"Genic region start base pair",
"Genic region end base pair",
"The number of SNPs within the genic region",
"The number of PCs retained within the genic region",
"Analysed phenotype",
"Observed local heritability",
"P-value from the univariate test (F-test for continuous, Chi-sq for binary)"
)
) %>%
dplyr::mutate(sheet = "univariate") %>%
dplyr::bind_rows(
tibble(
column = colnames(results_by_block$bivar$window_100000$ld681),
description =
c(
"LD block ID",
"eQTL dataset from which gene eQTLs are derived",
"Ensembl ID for gene for which eQTLs were tested in genic region",
"HGNC symbol for gene for which eQTLs were tested in genic region",
"Genic region chromosome",
"Genic region start base pair",
"Genic region end base pair",
"The number of SNPs within the genic region",
"The number of PCs retained within the genic region",
"Phenotype 1",
"Phenotype 2. Gene expression traits (i.e. gene eQTLs) are denoted by their ensembl gene id",
"Standardised coefficient for the local genetic correlation",
"Lower 95% confidence estimate for rho",
"Upper 95% confidence estimate for rho",
"Equivalent of taking the square of rho. Denotes the proportion of variance in genetic signal for phen1 explained by phen2 (and vice versa)",
"Lower 95% confidence estimate for r2",
"Upper 95% confidence estimate for r2",
"Simulation p-values for the local genetic correlation",
"FDR-corrected simulation p-values",
"Bonferroni-corrected simulation p-values"
)
) %>%
dplyr::mutate(sheet = "bivariate")
) %>%
dplyr::select(sheet, column, description)
for(window in names(qtl_tidy$bivar)){
# Create ld block figures ----
qtl_list <-
setNames(
qtl_tidy$bivar[[window]] %>%
dplyr::filter(fdr < 0.05) %>%
dplyr::group_split(ld_block),
nm =
str_c(ld_blocks)
)
qtl_genes <-
qtl_tidy$bivar[[window]] %>%
dplyr::filter(fdr < 0.05) %>%
dplyr::group_by(eqtl_dataset, gene_locus) %>%
dplyr::filter(str_detect(phen2, "ENSG")) %>%
.[["gene_locus"]] %>%
unique()
fig_list <- vector(mode = "list", length = length(ld_blocks))
for(block in ld_blocks){
locus_plot <-
plot_locus(
locus_gr = loci_gr[loci_gr$locus == str_remove(block, "ld")],
ref = ref,
highlight_gene = qtl_genes,
highlight_gene_label = "QTL - local rg?",
window = str_remove(window, "window_") %>% as.numeric()
)
edge_plot <-
plot_qtl_edge_diagram(
bivar_corr_qtl = qtl_list[[block]],
phen = fct_disease,
seed = 89
)
if(length(edge_plot)/4 > 1){
height <- c(1,2.5)
} else{
height <- c(1,0.75)
}
fig_list[[which(ld_blocks == block)]] <-
ggpubr::ggarrange(
locus_plot,
ggpubr::ggarrange(
plotlist = edge_plot,
ncol = 4,
nrow = ceiling(length(edge_plot)/4),
common.legend = TRUE,
legend = "top"
),
ncol = 1,
labels = letters[1:2],
heights = height
)
names(fig_list)[which(ld_blocks == block)] <- block
}
# Save to workbook ----
wb <- openxlsx::createWorkbook()
# Write column descriptions ----
openxlsx::addWorksheet(wb, sheetName = "column_descriptions")
openxlsx::writeData(
wb,
sheet = "column_descriptions",
x = col_descriptions,
row.names = FALSE,
headerStyle = openxlsx::createStyle(textDecoration = "BOLD")
)
openxlsx::freezePane(
wb,
sheet = "column_descriptions",
firstRow = TRUE
)
for(block in ld_blocks){
# Write results ----
for(test in names(results_by_block)){
sheetname <- str_c(block, "_", test)
openxlsx::addWorksheet(wb, sheetName = sheetname)
openxlsx::writeData(
wb,
sheet = sheetname,
x = results_by_block[[test]][[window]][[block]],
row.names = FALSE,
headerStyle = openxlsx::createStyle(textDecoration = "BOLD")
)
openxlsx::freezePane(
wb,
sheet = sheetname,
firstRow = TRUE
)
}
# Add figure ----
print(fig_list[[block]])
if(block == "ld2351"){
height <- 45
} else{
height <- 20
}
openxlsx::insertPlot(
wb,
sheet = str_c(block, "_bivar"),
width = 20,
height = height,
units = "cm",
startRow = 2,
startCol = 22
)
}
openxlsx::saveWorkbook(
wb,
file = file.path(out_dir, str_c("SupplementaryTable_lava_local_eqtl_", window,".xlsx")),
overwrite = TRUE
)
}
xlsx <-
setNames(
vector(mode = "list", length = 2),
c("column_descriptions", "results")
)
xlsx[[2]] <-
pcorr
xlsx[[1]] <-
tibble(
column = colnames(pcorr),
description =
c("eQTL dataset from which gene eQTLs are derived",
"Ensembl ID for gene for which eQTLs were tested in genic region",
"HGNC symbol for gene for which eQTLs were tested in genic region",
"Genic region chromosome",
"Genic region start base pair",
"Genic region end base pair",
"The number of SNPs within the genic region",
"The number of PCs retained within the genic region",
"Phenotype 1. Gene expression traits (i.e. gene eQTLs) are denoted by their ensembl gene id",
"Phenotype 2. Gene expression traits (i.e. gene eQTLs) are denoted by their ensembl gene id",
"Phenotype which the genetic correlation between phenotype 1 and 2 was conditioned on",
"The proportion of genetic signal in phenotype 1 explained by z",
"The proportion of genetic signal in phenotype 2 explained by z",
"The partial correlation between phen1 and phen2 conditioned on z",
"Lower bound of 95% confidence interval for partial correlation",
"Upper bound of 95% confidence interval for partial correlation",
"Simulation p-values for the partial genetic correlation"
)
)
openxlsx::write.xlsx(
xlsx,
file = file.path(out_dir, "SupplementaryTable9_pcorr.xlsx"),
row.names = FALSE,
headerStyle = openxlsx::createStyle(textDecoration = "BOLD"),
firstRow = TRUE,
append = TRUE,
overwrite = TRUE
)