# Script location: here::here("misc_scripts", "Formatting_PDGWAS.R")
PD <- fread("/data/LDScore/GWAS/PD2019_meta5_ex23andMe/nallsEtAl2019_excluding23andMe_allVariants.tab.gz")
hg38 <- PD %>%
tidyr::separate(col = SNP, into = c("CHR", "BP"), sep = ":") %>%
LDSCforRyten::liftover_hg19_to_hg38(., path_to_chain = "/data/liftover/hg19/hg19ToHg38.over.chain") %>%
LDSCforRyten::add_RS_to_GWAS(., dbsnp_151)
fwrite(hg38, "/data/LDScore/GWAS/PD2019_meta5_ex23andMe/PD2019_ex23andMe_hg38.txt", sep = "\t")
# Created GRCh37 version, too (not included in script)
hg19 <- PD %>%
tidyr::separate(col = SNP, into = c("CHR", "BP"), sep = ":") %>%
dplyr::mutate(BP = as.integer(BP)) %>%
LDSCforRyten::add_RS_to_GWAS(., dbsnp_144)
fwrite(hg19, "/data/LDScore/GWAS/PD2019_meta5_ex23andMe/PD2019_ex23andMe_hg19.txt", sep = "\t")
# Script location: here::here("misc_scripts", "Formatting_PDGWAS.R")
PD_AOO <- fread("/data/LDScore/GWAS/PD2018_AOO/sorted_AAO_april3_18_final_discovery.txt.gz")
hg38 <- PD_AOO %>%
dplyr::rename(SNP = MarkerName) %>%
tidyr::separate(col = SNP, into = c("CHR", "BP"), sep = ":") %>%
LDSCforRyten::liftover_hg19_to_hg38(., path_to_chain = "/data/liftover/hg19/hg19ToHg38.over.chain") %>%
LDSCforRyten::add_RS_to_GWAS(., dbsnp_151)
fwrite(hg38, "/data/LDScore/GWAS/PD2018_AOO/PD2018_AOO_hg38.txt", sep = "\t")
# Script location: here::here("misc_scripts", "Formatting_PDprogressionGWAS.R")
format_PD_progression <- function(path_to_GWAS, ref_file_path, path_to_chain){
library(data.table)
library(LDSCforRyten)
library(tidyverse)
library(stringr)
library(SNPlocs.Hsapiens.dbSNP151.GRCh38)
dbsnp <- SNPlocs.Hsapiens.dbSNP151.GRCh38
ref <- fread(ref_file_path)
GWAS_df <- data.frame(paths = list.files(path = path_to_GWAS, pattern = ".txt.gz", full.names = T),
base_or_surival = list.files(path = path_to_GWAS, pattern = ".txt.gz", full.names = F) %>%
str_replace("_.*", ""),
phenotype = list.files(path = path_to_GWAS, pattern = ".txt.gz", full.names = F) %>%
str_replace(".txt.gz", "") %>%
str_replace(".*_", "")
) %>%
dplyr::filter(!phenotype == "reference")
for(i in 1:nrow(GWAS_df)){
print(str_c("Formatting: ", GWAS_df$base_or_surival[i], "_", GWAS_df$phenotype[i]))
GWAS <- fread(GWAS_df$paths[i] %>%
as.character())
GWAS <- GWAS %>%
dplyr::inner_join(ref %>%
dplyr::select(SNP, CHR, START, REF, ALT, MAF)) %>%
dplyr::rename(BP = START,
A1 = ALT,
A2 = REF) %>%
dplyr::select(CHR, BP, A1, A2, MAF, BETA, SE, P, N, NSTUDY)
hg38 <- LDSCforRyten::liftover_hg19_to_hg38(GWAS, path_to_chain) %>%
LDSCforRyten::add_RS_to_GWAS(., dbSNPref = dbsnp) %>%
dplyr::select(SNP, everything())
fwrite(hg38, file = str_c("/data/LDScore/GWAS/PD2019_Progression/",
GWAS_df$base_or_surival[i], "_",
GWAS_df$phenotype[i], "_hg38.txt"),
sep = "\t")
}
}
format_PD_progression(path_to_GWAS = "/data/LDScore/GWAS/PD2019_Progression/",
ref_file_path = "/data/LDScore/GWAS/PD2019_Progression/reference.txt.gz",
path_to_chain = "/data/liftover/hg19/hg19ToHg38.over.chain")
LBD <- fread("/data/LDScore/GWAS/LBD2020/LBD2020.txt")
LBD_hg38 <- LBD %>%
dplyr::select(CHR = CHROM, BP = POS, A1, A2, P, MAF = A1_FREQ, Z_STAT, BETA, SE, N = OBS_CT) %>%
dplyr::mutate(CHR = as.factor(CHR)) %>%
LDSCforRyten::add_RS_to_GWAS(., dbsnp_151)
fwrite(LBD_hg38, "/data/LDScore/GWAS/LBD2020/LBD2020_rsids.txt.gz", sep = "\t")