Aim: determine the impact of excluding by-proxy cases on AD-LBD and PD-LBD local \(r_g\)

1 Background

Both AD and PD GWASs used in this work include by-proxy cases, which are defined as individuals who have a parent with the disease. In AD, it has been shown that the genetic correlation with clinically diagnosed Alzheimer's disease is high (PMID: 29777097). However, mislabelling by-proxy cases when parents suffer from other types of dementia could lead to spurious genetic correlations. This is particularly relevant in comparisons of AD to LBD, which both present with cognitive decline. To assess the impact of by-proxy inclusions on local \(r_g\)'s between pairwise comparisons of AD, LBD and PD, we used releases of the AD and PD GWASs where by-proxy cases were not included.

2 Supplementary code

Following section includes any intermediary code used in this .Rmd.

2.1 Filter LD blocks

source(here::here("scripts", "05a_filter_loci.R"))

2.2 Get sample overlap

source(here::here("scripts", "05b_get_sample_overlaps.R"))

Sample overlap matrix shown below.

sample_overlap <-
  read.table(
    file = here::here("results", "05_noproxy_univar_bivar", "sample_overlap.txt")
    )

sample_overlap %>% 
  as.matrix()
##                           AD2019 AD2019.Kunkle LBD2020 PD2019.ex23andMe.exUKBB
## AD2019                   1.00000       0.28250 0.02589                -0.00118
## AD2019.Kunkle            0.28250       1.00000 0.03368                -0.00207
## LBD2020                  0.02589       0.03368 1.00000                 0.00819
## PD2019.ex23andMe.exUKBB -0.00118      -0.00207 0.00819                 1.00000
## PD2019.meta5.ex23andMe   0.01193       0.00020 0.01281                 0.83183
##                         PD2019.meta5.ex23andMe
## AD2019                                 0.01193
## AD2019.Kunkle                          0.00020
## LBD2020                                0.01281
## PD2019.ex23andMe.exUKBB                0.83183
## PD2019.meta5.ex23andMe                 1.00000

2.3 Prepare input info file

phenotypes <- c("AD2019", "AD2019.Kunkle", "PD2019.ex23andMe.exUKBB", "PD2019.meta5.ex23andMe", "LBD2020") %>% sort()

gwas_details <- 
  readxl::read_xlsx(path = "/data/LDScore/GWAS/LDSC_GWAS_details.xlsx") %>% 
  dplyr::mutate(
    Original_name = Original_name %>% 
      stringr::str_replace_all("_", "\\.")
  ) %>% 
  dplyr::select(
    Original_name, 
    cases = N_case,
    controls = N_ctrl
  ) 

input_info <- 
  tibble(
    phenotype = 
      list.files(
        path = gwas_dir,
        full.names = F,
        recursive = T,
        pattern = ".lava.gz"
      ) %>% 
      stringr::str_remove("/.*") %>% 
      stringr::str_replace_all("_", "\\."),
    filename = 
      list.files(
        path = gwas_dir,
        full.names = T,
        recursive = T,
        pattern = ".lava.gz"
      )
  ) %>% 
  dplyr::mutate(
    phenotype =
      case_when(
        phenotype == "PD2019.meta5.ex23andMe.exUKBB" ~ "PD2019.ex23andMe.exUKBB",
        TRUE ~ phenotype
      )
  ) %>% 
  dplyr::filter(phenotype %in% phenotypes) %>% 
  dplyr::inner_join(
    gwas_details,
    by = c("phenotype" = "Original_name")
  ) %>% 
  dplyr::mutate(
    cases = readr::parse_number(cases),
    controls = readr::parse_number(controls)
  ) %>% 
  dplyr::select(phenotype, cases, controls, filename)

write_delim(
  input_info,
  file = file.path(here::here("results", "05_noproxy_univar_bivar"),
                   "input.info.txt"),
  delim = "\t"
)

2.4 Run univariate and bivariate tests

This was run using nohup:

# Have to navigate to root project folder for script to work (as it uses here package)
cd /home/rreynolds/misc_projects/neurodegen-psych-local-corr

nohup Rscript \
/home/rreynolds/misc_projects/neurodegen-psych-local-corr/scripts/05c_run_univar_bivar_test.R \
&>/home/rreynolds/misc_projects/neurodegen-psych-local-corr/logs/05c_run_univar_bivar_test.log&

2.5 Run multiple regression

This was run using nohup:

# Have to navigate to root project folder for script to work (as it uses here package)
cd /home/rreynolds/misc_projects/neurodegen-psych-local-corr

nohup Rscript \
/home/rreynolds/misc_projects/neurodegen-psych-local-corr/scripts/05d_run_multi_reg.R \
&>/home/rreynolds/misc_projects/neurodegen-psych-local-corr/logs/05d_run_multi_reg.log&

2.6 Loading results

Once run, results can be loaded using the following code chunk:

2.6.1 LAVA bivariate

result_dir <- 
  setNames(
    list(
      here::here("results",
               "02_univar_bivar_test"),
      here::here("results",
               "05_noproxy_univar_bivar")
    ),
    nm = c("proxy", "no_proxy")
  )

results <- 
  setNames(
    vector(mode = "list", length = 2),
    nm = c("univ", "bivar")
  ) %>% 
  lapply(., function(x){
    
    setNames(
      vector(mode = "list", length = 2),
      nm = c("proxy", "no_proxy")
    )
    
  })

for(test in names(results)){
  
  for(proxy in names(results[[test]])){
    
    results[[test]][[proxy]] <- 
      list.files(
        path = 
          result_dir[[proxy]],
        pattern = str_c(test, ".lava.rds"), 
        full.names = T
      )  %>% 
      readRDS() %>% 
      purrr::discard(is.null) %>% 
      qdapTools::list_df2df() %>% 
      dplyr::select(-X1)
    
    if(proxy == "proxy" && test == "univ"){
      
      results[[test]][[proxy]] <- 
        results[[test]][[proxy]] %>% 
        dplyr::filter(
          phen %in% c("AD2019", "LBD2020", "PD2019.meta5.ex23andMe"),
          locus %in% loci$LOC
          )
      
    }
    
    if(proxy == "proxy" && test == "bivar"){
      
      results[[test]][[proxy]] <- 
        results[[test]][[proxy]] %>% 
        dplyr::filter(
          phen1 %in% c("AD2019", "LBD2020") & phen2 %in% c("LBD2020", "PD2019.meta5.ex23andMe"),
          locus %in% loci$LOC
        )
      
    }
    
  }
  
}

2.6.2 LAVA multiple regression

multireg <-
  list.files(
    here::here(
      "results", 
      "05_noproxy_univar_bivar"
      ),
    pattern = ".multireg.lava.rds", 
    full.names = T
  ) %>% 
  readRDS() %>% 
  lapply(., function(x){
    
    x %>% 
      lapply(., function(y){
        
        y %>% 
          lapply(., function(z){
            
            z %>% 
              qdapTools::list_df2df(col1 = "model_number")
            
          }) %>% 
          qdapTools::list_df2df(col1 = "list_name")
        
      }) %>% 
      qdapTools::list_df2df()
    
  }) %>% 
    qdapTools::list_df2df(col1 = "locus") %>% 
    dplyr::select(-X1) %>% 
    dplyr::select(
      locus, contains("model"), 
      outcome, predictors, 
      everything(), -contains("list")
      )

2.6.3 LDSC

global <- 
  read_delim(
    file = here::here("results", "05_noproxy_univar_bivar", "ldsc_corr", "ldsc_correlations.txt"),
    delim = "\t"
    ) 

2.7 Loading plotting functions

source(here::here("R", "plot_global_corr.R"))
source(here::here("R", "plot_chord_diagram.R"))
source(here::here("R", "plot_locus.R"))
source(here::here("R", "plot_edge_diagram.R"))

3 Methods

3.1 AD/PD GWASs used

For global \(r_g\)'s, we used a total of 5 GWASs, including:

  • AD2019, Jansen et al., PMID: 30617256
  • AD2019, Kunkle et al., PMID: 30820047
  • LBD2020, Chia et al., PMID: 33589841
  • PD2019, Nalls et al., excluding 23andMe cohort, PMID: 31701892
  • PD2019, Nalls et al., excluding 23andMe and UK Biobank cohorts, PMID: 31701892; kindly provided by Cornelis Blauwendraat, IPDGC

For local \(r_g\)'s, as we already had results from analysis of AD and PD GWASs with by-proxy cases against the LBD GWAS, we simply re-ran the analysis using instead AD and PD GWASs excluding by-proxy cases.

3.2 LAVA

Methods are as described in 01_preparing_inputs_gwas.html. LD blocks were filtered to include only those where significant bivariate local \(r_g\)'s were observed between LBD and either by-proxy AD or by-proxy PD GWASs, in addition to between by-proxy AD and by-proxy PD GWASs. This limited the number of LD blocks to 21 (locus 10, 335, 375, 472, 681, 711, 758, 1055, 1186, 1190, 1273, 1367, 1726, 1823, 1840, 2054, 2113, 2128, 2192, 2281, 2351). Bivariate local correlations were only performed for pairs of traits which both exhibited a significant univariate local genetic signal (p < 0.05/21, where the denominator represents the total number of tested loci). This resulted in a total of 10 bivariate tests spanning 6 distinct loci.

3.3 Multiple regression

For LD block 2351, multiple regression was used to determine the extent to which the genetic component of LBD could be explained by the genetic components of AD and PD when by-proxy cases were excluded.

4 Results

4.1 Tabular results

4.1.1 Global correlations

print("Global correlation column descriptions:")
## [1] "Global correlation column descriptions:"
tibble(
  column = colnames(global),
  description = 
    c(
      "Phenotype 1",
      "Phenotype 2",
      "The estimated genetic correlation",
      "The bootstrap standard error of the genetic correlation estimate",
      "The bootstrap standard error of the genetic correlation estimate",
      "P-value for genetic correlation",
      "Estimated snp-heritability of the second phenotype ",
      "Standard error of h2 for phenotype 2",
      "Single-trait LD score regression intercept for phenotype 2",
      "Standard error for single-trait LD score regression intercept for phenotype 2",
      "Estimated genetic covariance between p1 and p2",
      "Bootstrap standard error of cross-trait LD score regression intercept"
    )
) %>%
  DT::datatable(rownames = FALSE,
                options = list(scrollX = TRUE),
                class = 'white-space: nowrap')
print("Global correlation results (p < 0.05/n_tests):")
## [1] "Global correlation results (p < 0.05/n_tests):"
combn <-
  global$p1 %>%
  unique() %>%
  combn(m = 2) %>%
  t() %>%
  as_tibble() %>%
  dplyr::rename(
    p1 = V1,
    p2 = V2
  )

global_signif <- 
  global %>% 
  dplyr::filter(
    !p1==p2
  ) %>% 
  dplyr::inner_join(combn) %>%
  dplyr::filter(
    p < 0.05/nrow(combn)
  )

global_signif %>%
  DT::datatable(rownames = FALSE,
                options = list(scrollX = TRUE),
                class = 'white-space: nowrap')

4.1.2 Univariate results (no proxy)

results$univ <- 
  results$univ %>% 
  lapply(., function(df){
    
    df %>% 
      dplyr::mutate(
        phen =
          phen %>% 
          stringr::str_remove("\\..*")
      )
    
  })

print("Univariate column descriptions:")
## [1] "Univariate column descriptions:"
tibble(
  column = colnames(results$univ),
  description = 
    c(
      "Locus ID",
      "Locus chromosome",
      "Locus start base pair",
      "Locus end base pair",
      "The number of SNPs within the locus",
      "The number of PCs retained within the locus",
      "Analysed phenotype",
      "Observed local heritability",
      "P-value from the univariate test (F-test for continuous, Chi-sq for binary)"
    )
) %>%
  DT::datatable(rownames = FALSE,
                options = list(scrollX = TRUE),
                class = 'white-space: nowrap')
print("Univariate results (p < 0.05/n_loci):")
## [1] "Univariate results (p < 0.05/n_loci):"
results$univ$no_proxy %>% 
  dplyr::filter(p < 0.05/nrow(loci)) %>%
  DT::datatable(rownames = FALSE,
                options = list(scrollX = TRUE),
                class = 'white-space: nowrap')

4.1.3 Bivariate results (no proxy)

results$bivar <- 
  results$bivar %>% 
  lapply(., function(df){
    
    df %>% 
      dplyr::mutate(
        fdr = p.adjust(p, method = "fdr"),
        phen1 =
          phen1 %>% 
          stringr::str_remove("\\..*"),
        phen2 =
          phen2 %>% 
          stringr::str_remove("\\..*")
      )
    
  })

print("Bivariate column descriptions:")
## [1] "Bivariate column descriptions:"
tibble(
  column = colnames(results$bivar),
  description = 
    c(
      "Locus ID",
      "Locus chromosome",
      "Locus start base pair",
      "Locus end base pair",
      "The number of SNPs within the locus",
      "The number of PCs retained within the locus",
      "Phenotype 1",
      "Phenotype 2",
      "Standardised coefficient for the local genetic correlation",
      "Lower 95% confidence estimate for rho",
      "Upper 95% confidence estimate for rho",
      "Equivalent of taking the square of rho. Denotes the proportion of variance in genetic signal for phen1 explained by phen2 (and vice versa)",
      "Lower 95% confidence estimate for r2",
      "Upper 95% confidence estimate for r2",
      "Simulation p-values for the local genetic correlation",
      "FDR-adjusted p-values."
    )
) %>%
  DT::datatable(rownames = FALSE,
                options = list(scrollX = TRUE),
                class = 'white-space: nowrap')
print("Bivariate results (FDR < 0.05):")
## [1] "Bivariate results (FDR < 0.05):"
bivar_fdr <- 
  results$bivar %>% 
  lapply(., function(df) df %>% dplyr::filter(fdr < 0.05))
  

bivar_fdr$no_proxy %>%
  DT::datatable(rownames = FALSE,
                options = list(scrollX = TRUE),
                class = 'white-space: nowrap')

4.1.4 Overlapping bivariate results (proxy/no proxy)

  • This includes:
    • All LD blocks where significant bivariate local \(r_g\)'s were observed between LBD and either by-proxy AD or by-proxy PD GWASs
    • All LD blocks where sufficient univariate signal was detected when using AD and PD GWASs without by-proxy cases
# Pivot results
results_long <- 
  results$bivar %>% 
  qdapTools::list_df2df(col1 = "proxy_inclusion") %>% 
  tidyr::pivot_longer( 
    cols = -c("proxy_inclusion", "locus", "phen1", "phen2"),
    names_to = "feature",
    values_to = "value"
  ) %>% 
  tidyr::pivot_wider(names_from = "proxy_inclusion") %>% 
  dplyr::mutate(
    doe_prox =
      case_when(
        feature == "rho" & proxy < 0 ~ "-",
        feature == "rho" & proxy > 0 ~ "+"
      ),
    doe_no_prox =
      case_when(
        feature == "rho" & no_proxy < 0 ~ "-",
        feature == "rho" & no_proxy > 0 ~ "+" 
      ),
    doe_same =
      case_when(
        doe_prox == doe_no_prox ~ TRUE,
        doe_prox != doe_no_prox ~ FALSE
      )
  )

results_long %>% 
  dplyr::filter(
    feature %in% 
      c("rho", "rho.lower", "rho.upper", "r2", "r2.lower", "r2.upper", "p", "fdr"),
    !is.na(proxy),
    !is.na(no_proxy)
  ) %>% 
  dplyr::select(-contains("doe")) %>% 
  DT::datatable(rownames = FALSE,
                options = list(scrollX = TRUE),
                class = 'white-space: nowrap')

4.2 Global correlations

4.2.1 Text

  • Global cross-trait correlations revealed a significant positive correlation between LBD and PD2019 (without by-proxy cases), as previously observed when using PD2019 (with by-proxy cases). Notably, the strength of correlation was higher when no by-proxy cases were included in the PD GWAS (no by-proxy, \(r_{g}\) = 0.81; with by-proxy, \(r_{g}\) = 0.62). Furthermore, global cross-trait correlations revealed a significant positive correlation between LBD and AD when no by-proxy cases were included; this was not detected when using a GWAS that includes by-proxy cases. As expected, the global \(r_{g}\) between GWASs of the same disease with and without by-proxy cases was 1 (Figure 4.1).

4.2.2 Figures

fig_global <- 
  global %>% 
  dplyr::mutate(
    rg =
      case_when(
        rg > 1 ~ 1,
        TRUE ~ rg
        )
  ) %>% 
  dplyr::inner_join(
    phen_df,
    by = c("p1" = "name")
  ) %>%
  dplyr::inner_join(
    phen_df,
    by = c("p2" = "name")
  ) %>%
  dplyr::select(
    -p1, -p2
  ) %>%
  dplyr::rename(
    p1 = tidy_name.x,
    p2 = tidy_name.y
  ) %>%
  dplyr::mutate(
    p1 =
      fct_relevel(
        p1,
        phen_df %>%
          dplyr::arrange(name) %>%
          dplyr::pull(tidy_name)
      ),
    p2 =
      fct_relevel(
        p2,
        phen_df %>%
          dplyr::arrange(name) %>%
          dplyr::pull(tidy_name)
      )
  ) %>%
plot_global_corr(
  n_phenotypes = 5,
  tidy_label = FALSE
)

fig_global
Global genetic correlations between pairs of phenotypes. Significant negative and positive correlations are indicated by blue and red fill, respectively. Non-significant correlations (p >= 0.05/n_tests) have a grey fill.

Figure 4.1: Global genetic correlations between pairs of phenotypes. Significant negative and positive correlations are indicated by blue and red fill, respectively. Non-significant correlations (p >= 0.05/n_tests) have a grey fill.

4.3 Local correlations

4.3.1 Text

  • In univariate tests, a negative variance estimate was calculated for AD2019 (without by-proxy cases) in 2 LD blocks (472, 1190) likely due to insufficient signal. In addition, AD2019 (without by-proxy cases) had sufficient univariate signal in only 3 LD blocks. This lack of signal could reflect either (i) a loss of power due to decreased case/control numbers in the AD2019 GWAS without by-proxy cases (as compared to the AD2019 GWAS with by-proxy cases) or (ii) a genuine lack of genetic signal in the tested LD blocks when no by-proxy cases are included.
  • In univariate tests, the PD GWAS without by-proxy cases had sufficient univariate signal in 14 of the 21 LD blocks tested; this included 2 of the LD blocks where there was sufficient AD2019 univariate signal.
  • Across those pairs of phenotypes with sufficient univariate signal to carry out a bivariate test using AD/PD GWASs with or without by-proxy cases, bivariate \(r^{2}\), \(\rho\) and -log10(p-value) estimates were found to be positively correlated (Figure 4.2). Only one \(\rho\) estimate was found to have a different direction of effect when using GWASs without by-proxy cases, although this \(\rho\) was non-significant.
  • Using a lenient FDR cut-off, we replicated two \(r_{g}\)'s using AD/PD GWASs without by-proxy cases (out of the 22 \(r_{g}\)'s that were significant using AD/PD GWASs with by-proxy cases). This included the positive correlation between AD and PD in LD block 1273 (which contains CLU) and the positive correlation between AD and LBD in LD block 2351 (which contains APOE). In LD block 2351, we did not replicate the significant negative correlation between LBD and PD that was observed using the PD GWAS with by-proxy cases; however, it is worth noting that while non-significant, the \(\rho\) was negative (no by-proxy: \(\rho\) = -0.201 (CI: -0.443 - 0.007); p = 0.061; by-proxy: \(\rho\) = -0.293 (CI: -0.405 - -0.184); p = 2.7510^{-7}).

4.3.2 Figures

temp <- 
  results_long %>% 
  dplyr::filter(
    feature %in% 
      c("rho", "p"),
    !is.na(proxy),
    !is.na(no_proxy)
  ) %>% 
  dplyr::mutate(
    across(
      .cols = contains("proxy"),
      ~ case_when(
        feature == "p" ~ -log10(.x),
        TRUE ~ .x
      )
    )
  ) %>%
  dplyr::mutate(
    feature = 
      case_when(
        feature == "p" ~ "-log10(p)",
        TRUE ~ feature
      )
  ) %>% 
  ggplot(
    aes(
      x = proxy,
      y = no_proxy
    )
  ) + 
  geom_point(
    size = 1.5,
    alpha = 0.8,
    shape = 21,
    colour = "black",
    aes(fill = doe_same)
  ) + 
  geom_abline(
    intercept = 0,
    linetype = "dashed",
    colour = "black"
  ) +
  ggpubr::stat_cor(
    method = "pearson",
    cor.coef.name = "R",
    size = 3
  ) +
  facet_wrap(
    vars(feature),
    scales = "free"
  ) +
  labs(
    x = "Proxy",
    y = "No proxy",
    fill = "Same direction of effect?"
  ) +
  scale_fill_discrete(na.translate = F) +
  theme_rhr + 
  theme(
    axis.text.x = element_text(angle = 0, hjust = 0.5),
    legend.position = "top"
  )

temp
Scatter plot of -log10(p-value) and rho for each pair of phenotypes with sufficient univariate signal to carry out a bivariate test using AD/PD GWASs with or without by-proxy cases.  In each panel, Peason's coefficient (R) and associated p-value (p) are displayed. The black dashed line represents the line x = y. Points are coloured, where applicable, by whether they share the same direction of effect.

Figure 4.2: Scatter plot of -log10(p-value) and rho for each pair of phenotypes with sufficient univariate signal to carry out a bivariate test using AD/PD GWASs with or without by-proxy cases. In each panel, Peason's coefficient (R) and associated p-value (p) are displayed. The black dashed line represents the line x = y. Points are coloured, where applicable, by whether they share the same direction of effect.

plots <- 
  vector(mode = "list", length = 4)

plots[[1]] <- 
  plot_edge_diagram(
  bivar_corr = 
    bivar_fdr$proxy %>% 
    dplyr::filter(
      locus %in% unique(bivar_fdr$no_proxy$locus)
    ) %>% 
    dplyr::mutate(
      phen1 = phen1 %>%
      stringr::str_replace_all("[:digit:]", "") %>%
      stringr::str_remove("\\..*"),
    phen2 = phen2 %>%
      stringr::str_replace_all("[:digit:]", "") %>%
      stringr::str_remove("\\..*")
    ),
  phen = c("AD", "LBD", "PD"),
  p_threshold = 0.05/1603,
  multiple_corr = FALSE, 
  locus_labels = 
    setNames(
      nm = c(1273, 2351),
      object = c("1273\nProxy", "2351\nProxy")
    ),
  seed = 77,
  geom_node_size = 2,
  geom_label_size = 1.7,
  base_size = 8,
  ncol = 4
)

plots[[2]] <- 
  results$bivar$proxy %>% 
  dplyr::filter(locus %in% unique(bivar_fdr$no_proxy$locus)) %>% 
  dplyr::mutate(
    rho_fill = 
      case_when(
             p < 0.05/1603 ~ round(rho, 2) # use cut-off from original analyses in 02_*.Rmd
           ),
    locus_label =
      str_c(locus, "\nProxy"),
    phen1 = phen1 %>%
      stringr::str_replace_all("[:digit:]", "") %>%
      stringr::str_remove("\\..*"),
    phen2 = phen2 %>%
      stringr::str_replace_all("[:digit:]", "") %>%
      stringr::str_remove("\\..*")
  ) %>% 
  ggplot(
    aes(
      x = phen1,
      y = phen2,
      fill = rho_fill,
      label = round(rho, 2)
    )
  ) +
  geom_tile(colour = "black") +
  geom_text(
    size = 2
    ) +
  labs(x = "", y = "") +
  facet_wrap(vars(locus_label), ncol = 5) +
  scale_fill_distiller(palette = "RdYlBu", direction = -1, na.value = "#cccccc", limits = c(-1, 1)) +
  theme_rhr + 
  theme(
    axis.text.x = element_text(angle = 0, hjust = 0.5)
  )

plots[[3]] <-
  plot_edge_diagram(
    bivar_corr = 
      bivar_fdr$no_proxy %>% 
      dplyr::mutate(
        phen1 = phen1 %>%
      stringr::str_replace_all("[:digit:]", "") %>%
      stringr::str_remove("\\..*"),
    phen2 = phen2 %>%
      stringr::str_replace_all("[:digit:]", "") %>%
      stringr::str_remove("\\..*")
      ),
    phen = c("AD", "LBD", "PD"),
    multiple_corr = FALSE, 
    locus_labels = 
      setNames(
        nm = c(1273, 2351),
        object = c("1273\nNo proxy", "2351\nNo proxy")
      ),
    seed = 77,
    geom_node_size = 2,
    geom_label_size = 1.7,
    base_size = 8,
    ncol = 4
  )

plots[[4]] <-
  results$bivar$no_proxy %>% 
  dplyr::filter(locus %in% unique(bivar_fdr$no_proxy$locus)) %>% 
  dplyr::mutate(
    rho_fill = 
      case_when(
        fdr < 0.05 ~ round(rho, 2)
      ),
    locus_label =
      str_c(locus, "\nNo proxy"),
    phen1 = phen1 %>%
      stringr::str_replace_all("[:digit:]", "") %>%
      stringr::str_remove("\\..*"),
    phen2 = phen2 %>%
      stringr::str_replace_all("[:digit:]", "") %>%
      stringr::str_remove("\\..*")
  ) %>% 
  ggplot(
    aes(
      x = phen1,
      y = phen2,
      fill = rho_fill,
      label = round(rho, 2)
    )
  ) +
  geom_tile(colour = "black") +
  geom_text(
    size = 2
  ) +
  labs(x = "", y = "") +
  facet_wrap(vars(locus_label), ncol = 5) +
  scale_fill_distiller(palette = "RdYlBu", direction = -1, na.value = "#cccccc", limits = c(-1, 1)) +
  theme_rhr + 
  theme(
    axis.text.x = element_text(angle = 0, hjust = 0.5)
  )

ggpubr::ggarrange(
  plotlist = plots[c(1,3,2,4)], 
  nrow = 2, ncol = 2, 
  align = "v",
  heights = c(1,1.5),
  labels = c("a", "c", "b", "d"),
  common.legend = T,
  legend = "bottom"
  )
Significant bivariate local genetic correlations using AD/PD GWASs (a,b) with or (c,d) without by-proxy cases. (a, c) Edge diagrams for each LD block show the standardised coefficient for genetic correlation (rho) for each significant bivariate local genetic correlations (p < 0.05/n_tests). Significant negative and positive correlations are indicated by blue and red colour, respectively. (b, d) Heatmaps show the standardised coefficient for genetic correlation (rho) for each association within the LD block. Significant negative and positive correlations are indicated by blue and red fill, respectively. Non-significant correlations have a grey fill.

Figure 4.3: Significant bivariate local genetic correlations using AD/PD GWASs (a,b) with or (c,d) without by-proxy cases. (a, c) Edge diagrams for each LD block show the standardised coefficient for genetic correlation (rho) for each significant bivariate local genetic correlations (p < 0.05/n_tests). Significant negative and positive correlations are indicated by blue and red colour, respectively. (b, d) Heatmaps show the standardised coefficient for genetic correlation (rho) for each association within the LD block. Significant negative and positive correlations are indicated by blue and red fill, respectively. Non-significant correlations have a grey fill.

4.4 Multiple regression

4.4.1 Text

  • AD (without by-proxy cases) was found to be a significant predictor of LBD in LD block 2351 and was positively associated we LBD, replicating what we observed when including by-proxy cases. In contrast, PD (without by-proxy cases) was not found to be a significant predictor of LBD. Notably, the regression coefficient for PD (without by-proxy cases) was negative (95% confidence interval: -0.25-0.13), as observed when by-proxy cases were included.
  • The multivariate r2 in LD block 2351, which was modelled with LBD as the outcome and AD and PD (both without by-proxy cases) as predictors, was 0.49 (95% confidence interval: 0.44-0.57). This is comparable with what was observed when by-proxy cases were included (r2 = 0.43; 95% confidence interval = 0.38-0.5).

4.4.2 Figures

data_to_plot <-   
  multireg %>% 
  dplyr::mutate(
    predictors = predictors %>%
      stringr::str_replace_all("[:digit:]", "") %>%
      stringr::str_remove("\\..*"),
    outcome = outcome %>% 
      stringr::str_replace_all("[:digit:]", "") %>%
      stringr::str_remove("\\..*"),
    p_text = case_when(
      p < 0.001 ~ "***",
      p < 0.01 ~ "**",
      p < 0.05 ~ "*"
    ),
    locus = 
      as.numeric(locus)
  )

data_to_plot <- 
  data_to_plot %>% 
  dplyr::inner_join(
    data_to_plot %>% 
      dplyr::group_by(outcome, locus) %>% 
      dplyr::summarise(predictors = str_c(predictors, collapse = " + ")) %>% 
      dplyr::mutate(model = str_c(outcome, " ~ ", predictors)) %>% 
      dplyr::select(outcome, locus, model)
  )

a <- 
  data_to_plot %>% 
  ggplot2::ggplot(
    ggplot2::aes(
      x = predictors, 
      y = gamma, 
      ymin = gamma.lower, 
      ymax = gamma.upper
      )
    ) +
  ggplot2::geom_pointrange(
    colour = "#888888", fatten = 0.25
    ) +
  ggplot2::geom_hline(yintercept = 0, linetype = "dashed") +
  ggplot2::geom_text(
    aes(
      y = gamma.upper + (0.1 * gamma.upper), 
      label = p_text
      )
    ) +
  ggplot2::facet_wrap(
    vars(locus, model), 
    scales = "free_x",
    nrow = 1
    ) +
  ggplot2::labs(x = "Predictors", y = "Standardised multiple\nregression coefficient") +
  theme_rhr

b <- 
    data_to_plot %>% 
  dplyr::mutate(
    locus_model =
      str_c(locus, model, sep = "\n")
  ) %>% 
  dplyr::distinct(
    locus, locus_model, r2, r2.lower, r2.upper
  ) %>% 
  ggplot2::ggplot(
    ggplot2::aes(
      x = locus_model %>% 
        fct_reorder(., .x = as.numeric(locus), .fun = max),
      y = r2,
      ymin = r2.lower,
      ymax = r2.upper
    )
  ) +
  geom_col(colour = "black", fill = "#d9d9d9") + 
  geom_errorbar(width=.2) +
  labs(
    x = "Locus and model",
    y = "Multivariate r2"
  ) +
  theme_rhr

  
plot <- 
  cowplot::plot_grid(
    a,b, 
    nrow = 1, 
    axis = "bt", 
    align = "h",
    rel_widths = c(3,1),
    labels = letters[1:2]
  )

plot
Results for multiple regression models across LD block 2351. (a) Plots of standardised coefficients for each predictor in multiple regression model in LD block 2351, with whiskers spanning the 95% confidence interval for the coefficients. (b) Mulivariate r2 for LD block 2351, where multivariate r2 represents the proportion of variance in genetic signal for LBD explained by AD and PD simultaneously. Whiskers span the 95% confidence interval for the r2. 3 asterisks, p < 0.001; 2 asterisks, p < 0.01; 1 asterisk, p < 0.05.

Figure 4.4: Results for multiple regression models across LD block 2351. (a) Plots of standardised coefficients for each predictor in multiple regression model in LD block 2351, with whiskers spanning the 95% confidence interval for the coefficients. (b) Mulivariate r2 for LD block 2351, where multivariate r2 represents the proportion of variance in genetic signal for LBD explained by AD and PD simultaneously. Whiskers span the 95% confidence interval for the r2. 3 asterisks, p < 0.001; 2 asterisks, p < 0.01; 1 asterisk, p < 0.05.



5 Conclusions

  • Global correlations using GWASs without by-proxy cases mirrored global correlations using GWASs with by-proxy cases. The only exception was a significant positive correlation between LBD and AD when no by-proxy cases were included. Further, exclusion of by-proxy cases increased the strength of correlation between LBD and PD.
  • Where sufficient univariate signal existed in GWASs without by-proxy cases, we were able to replicate results using GWASs with by-proxy cases.
  • However, in those cases where there was insufficient univariate signal, it is hard to know whether this reflects (i) a technical issue (i.e. a lack of statistical power to detect a genetic signal) or (ii) a genuine biological effect (i.e. a genuine lack of genetic signal, as the genetic signal reflected the contribution of by-proxy cases to trait \(h^2\) in the region).

6 Manuscript

6.1 Supplementary figures

supp_fig <- 
  cowplot::plot_grid(
    ggpubr::ggarrange(
      temp +
        facet_wrap(
          vars(feature),
          ncol = 1,
          scales = "free"
        ) +
        guides(
          fill = guide_legend(
            nrow = 2,
            title.position = "top"
          )
        ) +
        theme_rhr +
        theme(
          axis.text.x = element_text(angle = 0, hjust = 0.5),
          legend.position = "bottom"
          ),
      ggpubr::ggarrange(
        plotlist = plots[c(2,4)], 
        nrow = 2,
        align = "v",
        common.legend = T,
        legend = "bottom"
      ),
      ncol = 2,
      align = "h", 
      widths = c(1,2),
      labels = c("a", "b")
    ),
    cowplot::plot_grid(
      a,b, 
      nrow = 1, 
      axis = "bt", 
      align = "h",
      rel_widths = c(3,1)
    ),
    ncol = 1,
    rel_heights = c(2,1),
    labels = c("", "c")
  )

ggsave(
  supp_fig,
  path = here::here("results", "99_manuscript_figures"), 
  filename = "suppfig_3_proxy_analysis.png",
  device = "png", 
  width = 160, 
  height = 180, 
  dpi = 300, 
  units = "mm"
)

6.2 Supplementary table

xlsx <- 
  setNames(
    vector(mode = "list", length = 3),
    c("column_descriptions", "univariate", "bivariate")
  )

xlsx[[2]] <-
  results$univ$no_proxy

xlsx[[3]] <-
   results$bivar$no_proxy

xlsx[[1]] <-
  tibble(
    column = colnames(results$univ),
    description = 
      c(
        "LD block ID",
        "LD block chromosome",
        "LD block start base pair",
        "LD block end base pair",
        "The number of SNPs within the LD block",
        "The number of PCs retained within the LD block",
        "Analysed phenotype",
        "Observed local heritability",
        "P-value from the univariate test (F-test for continuous, Chi-sq for binary)"
      )
  ) %>%
  dplyr::mutate(sheet = "univariate") %>% 
  dplyr::bind_rows(
    tibble(
      column = colnames(results$bivar),
      description = 
        c(
          "LD block ID",
          "LD block chromosome",
          "LD block start base pair",
          "LD block end base pair",
          "The number of SNPs within the LD block",
          "The number of PCs retained within the LD block",
          "Phenotype 1",
          "Phenotype 2",
          "Standardised coefficient for the local genetic correlation",
          "Lower 95% confidence estimate for rho",
          "Upper 95% confidence estimate for rho",
          "Equivalent of taking the square of rho. Denotes the proportion of variance in genetic signal for phen1 explained by phen2 (and vice versa)",
          "Lower 95% confidence estimate for r2",
          "Upper 95% confidence estimate for r2",
          "Simulation p-values for the local genetic correlation"
        )
    ) %>% 
      dplyr::mutate(sheet = "bivariate")
  ) %>% 
  dplyr::select(sheet, everything())

openxlsx::write.xlsx(
  xlsx,
  file = file.path(here::here("results", "99_manuscript_figures"), "SupplementaryTable5_lava_local_noproxy.xlsx"),
  row.names = FALSE,
  headerStyle = openxlsx::createStyle(textDecoration = "BOLD"),
  firstRow = TRUE,
  append = TRUE,
  overwrite = TRUE
)

7 Session info

Show/hide

## ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 4.0.5 (2021-03-31)
##  os       Ubuntu 16.04.7 LTS          
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language (EN)                        
##  collate  en_GB.UTF-8                 
##  ctype    en_GB.UTF-8                 
##  tz       Europe/London               
##  date     2022-12-29                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
##  ! package      * version  date       lib source         
##  P abind          1.4-5    2016-07-21 [?] CRAN (R 4.0.5) 
##  P assertthat     0.2.1    2019-03-21 [?] CRAN (R 4.0.5) 
##  P backports      1.3.0    2021-10-27 [?] CRAN (R 4.0.5) 
##  P bit            4.0.4    2020-08-04 [?] CRAN (R 4.0.5) 
##  P bit64          4.0.5    2020-08-30 [?] CRAN (R 4.0.5) 
##  P bitops         1.0-7    2021-04-24 [?] CRAN (R 4.0.5) 
##  P bookdown       0.22     2021-04-22 [?] CRAN (R 4.0.5) 
##  P broom          0.7.10   2021-10-31 [?] CRAN (R 4.0.5) 
##  P bslib          0.3.1    2021-10-06 [?] CRAN (R 4.0.5) 
##  P car            3.0-11   2021-06-27 [?] CRAN (R 4.0.5) 
##  P carData        3.0-4    2020-05-22 [?] CRAN (R 4.0.5) 
##  P cellranger     1.1.0    2016-07-27 [?] CRAN (R 4.0.5) 
##  P chron          2.3-56   2020-08-18 [?] CRAN (R 4.0.5) 
##  P cli            3.1.0    2021-10-27 [?] CRAN (R 4.0.5) 
##  P colorspace     2.0-2    2021-06-24 [?] CRAN (R 4.0.5) 
##  P cowplot        1.1.1    2020-12-30 [?] CRAN (R 4.0.5) 
##  P crayon         1.4.2    2021-10-29 [?] CRAN (R 4.0.5) 
##  P crosstalk      1.1.1    2021-01-12 [?] CRAN (R 4.0.5) 
##  P curl           4.3.2    2021-06-23 [?] CRAN (R 4.0.5) 
##  P data.table     1.14.2   2021-09-27 [?] CRAN (R 4.0.5) 
##  P DBI            1.1.1    2021-01-15 [?] CRAN (R 4.0.5) 
##  P dbplyr         2.1.1    2021-04-06 [?] CRAN (R 4.0.5) 
##  P digest         0.6.29   2021-12-01 [?] CRAN (R 4.0.5) 
##  P dplyr        * 1.0.7    2021-06-18 [?] CRAN (R 4.0.5) 
##  P DT             0.19     2021-09-02 [?] CRAN (R 4.0.5) 
##  P ellipsis       0.3.2    2021-04-29 [?] CRAN (R 4.0.5) 
##  P evaluate       0.14     2019-05-28 [?] CRAN (R 4.0.5) 
##  P fansi          0.5.0    2021-05-25 [?] CRAN (R 4.0.5) 
##  P farver         2.1.0    2021-02-28 [?] CRAN (R 4.0.5) 
##  P fastmap        1.1.0    2021-01-25 [?] CRAN (R 4.0.5) 
##  P forcats      * 0.5.1    2021-01-27 [?] CRAN (R 4.0.5) 
##  P foreign        0.8-81   2020-12-22 [?] CRAN (R 4.0.5) 
##  P fs             1.5.1    2021-11-30 [?] CRAN (R 4.0.5) 
##  P generics       0.1.1    2021-10-25 [?] CRAN (R 4.0.5) 
##  P ggforce        0.3.3    2021-03-05 [?] CRAN (R 4.0.5) 
##  P ggplot2      * 3.3.5    2021-06-25 [?] CRAN (R 4.0.5) 
##  P ggpubr         0.4.0    2020-06-27 [?] CRAN (R 4.0.5) 
##  P ggraph       * 2.0.5    2021-02-23 [?] CRAN (R 4.0.5) 
##  P ggrepel        0.9.1    2021-01-15 [?] CRAN (R 4.0.5) 
##  P ggsignif       0.6.3    2021-09-09 [?] CRAN (R 4.0.5) 
##  P glue           1.5.1    2021-11-30 [?] CRAN (R 4.0.5) 
##  P graphlayouts   0.7.1    2020-10-26 [?] CRAN (R 4.0.5) 
##  P gridExtra      2.3      2017-09-09 [?] CRAN (R 4.0.5) 
##  P gtable         0.3.0    2019-03-25 [?] CRAN (R 4.0.5) 
##  P haven          2.4.3    2021-08-04 [?] CRAN (R 4.0.5) 
##  P here           1.0.1    2020-12-13 [?] CRAN (R 4.0.5) 
##  P highr          0.9      2021-04-16 [?] CRAN (R 4.0.5) 
##  P hms            1.1.1    2021-09-26 [?] CRAN (R 4.0.5) 
##  P htmltools      0.5.2    2021-08-25 [?] CRAN (R 4.0.5) 
##  P htmlwidgets    1.5.4    2021-09-08 [?] CRAN (R 4.0.5) 
##  P httr           1.4.2    2020-07-20 [?] CRAN (R 4.0.5) 
##  P igraph         1.2.7    2021-10-15 [?] CRAN (R 4.0.5) 
##  P jquerylib      0.1.4    2021-04-26 [?] CRAN (R 4.0.5) 
##  P jsonlite       1.7.2    2020-12-09 [?] CRAN (R 4.0.5) 
##  P knitr          1.36     2021-09-29 [?] CRAN (R 4.0.5) 
##  P labeling       0.4.2    2020-10-20 [?] CRAN (R 4.0.5) 
##  P LAVA           0.0.6    2021-09-01 [?] xgit (@7be3421)
##  P lifecycle      1.0.1    2021-09-24 [?] CRAN (R 4.0.5) 
##  P lubridate      1.8.0    2021-10-07 [?] CRAN (R 4.0.5) 
##  P magrittr       2.0.1    2020-11-17 [?] CRAN (R 4.0.5) 
##  P MASS           7.3-54   2021-05-03 [?] CRAN (R 4.0.5) 
##  P modelr         0.1.8    2020-05-19 [?] CRAN (R 4.0.5) 
##  P munsell        0.5.0    2018-06-12 [?] CRAN (R 4.0.5) 
##  P openxlsx       4.2.4    2021-06-16 [?] CRAN (R 4.0.5) 
##  P pillar         1.6.4    2021-10-18 [?] CRAN (R 4.0.5) 
##  P pkgconfig      2.0.3    2019-09-22 [?] CRAN (R 4.0.5) 
##  P polyclip       1.10-0   2019-03-14 [?] CRAN (R 4.0.5) 
##  P purrr        * 0.3.4    2020-04-17 [?] CRAN (R 4.0.5) 
##  P qdapTools      1.3.5    2020-04-17 [?] CRAN (R 4.0.5) 
##  P R6             2.5.1    2021-08-19 [?] CRAN (R 4.0.5) 
##  P RColorBrewer   1.1-2    2014-12-07 [?] CRAN (R 4.0.5) 
##  P Rcpp           1.0.7    2021-07-07 [?] CRAN (R 4.0.5) 
##  P RCurl          1.98-1.5 2021-09-17 [?] CRAN (R 4.0.5) 
##  P readr        * 2.0.2    2021-09-27 [?] CRAN (R 4.0.5) 
##  P readxl         1.3.1    2019-03-13 [?] CRAN (R 4.0.5) 
##  P reprex         2.0.0    2021-04-02 [?] CRAN (R 4.0.5) 
##  P rio            0.5.27   2021-06-21 [?] CRAN (R 4.0.5) 
##  P rlang          0.4.12   2021-10-18 [?] CRAN (R 4.0.5) 
##  P rmarkdown      2.11     2021-09-14 [?] CRAN (R 4.0.5) 
##  P rprojroot      2.0.2    2020-11-15 [?] CRAN (R 4.0.5) 
##  P rstatix        0.7.0    2021-02-13 [?] CRAN (R 4.0.5) 
##  P rstudioapi     0.13     2020-11-12 [?] CRAN (R 4.0.5) 
##  P rvest          1.0.1    2021-07-26 [?] CRAN (R 4.0.5) 
##  P sass           0.4.0    2021-05-12 [?] CRAN (R 4.0.5) 
##  P scales         1.1.1    2020-05-11 [?] CRAN (R 4.0.5) 
##  P sessioninfo  * 1.1.1    2018-11-05 [?] CRAN (R 4.0.5) 
##  P stringi        1.7.6    2021-11-29 [?] CRAN (R 4.0.5) 
##  P stringr      * 1.4.0    2019-02-10 [?] CRAN (R 4.0.5) 
##  P tibble       * 3.1.6    2021-11-07 [?] CRAN (R 4.0.5) 
##  P tidygraph      1.2.0    2020-05-12 [?] CRAN (R 4.0.5) 
##  P tidyr        * 1.1.4    2021-09-27 [?] CRAN (R 4.0.5) 
##  P tidyselect     1.1.1    2021-04-30 [?] CRAN (R 4.0.5) 
##  P tidyverse    * 1.3.1    2021-04-15 [?] CRAN (R 4.0.5) 
##  P tweenr         1.0.2    2021-03-23 [?] CRAN (R 4.0.5) 
##  P tzdb           0.2.0    2021-10-27 [?] CRAN (R 4.0.5) 
##  P utf8           1.2.2    2021-07-24 [?] CRAN (R 4.0.5) 
##  P vctrs          0.3.8    2021-04-29 [?] CRAN (R 4.0.5) 
##  P viridis        0.6.2    2021-10-13 [?] CRAN (R 4.0.5) 
##  P viridisLite    0.4.0    2021-04-13 [?] CRAN (R 4.0.5) 
##  P vroom          1.5.5    2021-09-14 [?] CRAN (R 4.0.5) 
##  P withr          2.4.3    2021-11-30 [?] CRAN (R 4.0.5) 
##  P xfun           0.27     2021-10-18 [?] CRAN (R 4.0.5) 
##  P xml2           1.3.2    2020-04-23 [?] CRAN (R 4.0.5) 
##  P yaml           2.2.1    2020-02-01 [?] CRAN (R 4.0.5) 
##  P zip            2.2.0    2021-05-31 [?] CRAN (R 4.0.5) 
## 
## [1] /home/rreynolds/misc_projects/neurodegen-psych-local-corr/renv/library/R-4.0/x86_64-pc-linux-gnu
## [2] /opt/R/4.0.5/lib/R/library
## 
##  P ── Loaded and on-disk path mismatch.