将vcf文件转成孟德尔随机化分析格式
以https://gwas.mrcieu.ac.uk/datasets/ukb-b-7330/
为例:
原始文件形如:
转换代码
library(vcfR)
getwd()
a_data = read.vcfR('../ukb-b-7330.vcf.gz')
str(a_data)
head(a_data@meta,12)
head(a_data@fix)
head(a_data@gt)
fix = as.data.frame(a_data@fix[,(1:5)])
gt = as.data.frame(a_data@gt[,2])
colnames(gt)= 'GT'
beta = as.numeric(unlist(strsplit(as.character(gt$GT),split = ":"))[seq(1,dim(gt)[1]*5,5)])
se = as.numeric(unlist(strsplit(as.character(gt$GT),split = ":"))[seq(2,dim(gt)[1]*5,5)])
eaf = as.numeric(unlist(strsplit(as.character(gt$GT),split = ":"))[seq(4,dim(gt)[1]*5,5)])
pval = as.numeric(unlist(strsplit(as.character(gt$GT),split = ":"))[seq(3,dim(gt)[1]*5,5)])
mydata = data.frame(beta=beta,se=se,pval=pval,maf=eaf)
mydata = cbind(fix,mydata)
mydata$pval = 10^(-mydata$pval) #还原p值
mydata$N=40899
write.csv(mydata,'vcf2.csv')
输出结果: