sequenza细胞纯度计算
安装sequenza
bam文件要放在前面,否侧会-f命令可能识别错误
samtools mpileup a.bam -f hg19.fasta -Q 20 |gzip > normal.pileup.gz
samtools mpileup b.sorted.bam -f hg19.fasta -Q 20 |gzip > tumor.pileup.gz
··········································································································
将R语言里sequenza里的sequenza-utils.py复制到/usr/local/bin/下
运行:
#Generating a genome-wide GC content file
python sequenza-utils.py GC-windows -w 50 hg19.fasta |gzip > hg19.gc50Base.txt.gz
#Generate a seqz file
python sequenza-utils.py pileup2seqz -gc hg19.gc50Base.txt.gz -n normal.pileup.gz -t tumor.pileup.gz |gzip > out.seqz.gz
#Trim the seqz file
python sequenza-utils.py seqz-binning -w 50 -s out.seqz.gz | gzip > out_small.seqz.gz
············································································································
R
library(sequenza)
seqz.data <- read.seqz("out_small.seqz.gz") str(seqz.data, vec.len = 2)
gc.stats <- gc.sample.stats("out_small.seqz.gz")
str(gc.stats)
par(mfrow = c(1,2), cex = 1, las = 1, bty = 'l')
matplot(gc.stats$gc.values, gc.stats$raw,
type = 'b', col = 1, pch = c(1, 19, 1), lty = c(2, 1, 2),
xlab = 'GC content (%)', ylab = 'Uncorrected depth ratio')
legend('topright', legend = colnames(gc.stats$raw), pch = c(1, 19, 1))
hist2(seqz.data$depth.ratio, seqz.data$adjusted.ratio,
breaks = prettyLog, key = vkey, panel.first = abline(0, 1, lty = 2),
xlab = 'Uncorrected depth ratio', ylab = 'GC-adjusted depth ratio')
dev.off()
#可以生成RPlots.pdf文件
·······································································································
test <- sequenza.extract("out_small.seqz.gz",assembly = "hg19",chromosome.list=c((1:22),"X","Y","M"))
names(test)
chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]], ratio.windows = test$ratio[[1]], min.N.ratio = 1, segments = test$segments[[1]], main = test$chromosomes[1])
CP.example <- sequenza.fit(test)
#导出结果
sequenza.results(sequenza.extract = test, cp.table = CP.example, sample.id="TEST",out.dir="TEST")