vcf2maf
1. install VEP
(1) prerequisite
su
apt-get update
apt-get upgrade
apt-get install -y perl
#perl packages install
cpanm DBI
cpanm Archive::Zip
cpanm DBD::mysql (no DBD:mysql in conda)
#c complier not found
apt-get install build-essential
conda update --all -c conda/label/cf201901
conda update --all -c conda-forge/label/cf201901
#xlocale.h not found on Ubuntu while installing
ln -s /usr/include/locale.h /usr/include/xlocale.h
#conda install zlib,htslib,samtools,liftover(export conda PATH in .bashrc)
conda install -c bioconda/label/cf201901 ucsc-liftover
(2) download VEP
apt-get install -y build-essential git libncurses-dev
mkdir .vep
export VEP_PATH=$HOME/vep
export VEP_DATA=$HOME/.vep
export VER=96
#download VEP version96
curl -L -O https://github.com/Ensembl/ensembl-vep/archive/release/96.zip
unzip 96.zip; rm 96.zip; mv ensembl-vep-release-96 $VEP_PATH
export PERL5LIB=$VEP_PATH:$PERL5LIB //htslib and tabix must be in the same folder so that cache_convert can work
cd $VEP_PATH
#download cache file
#don't use rsync too slow and always error
cd $VEP_DATA
curl -O ftp://ftp.ensembl.org/pub/release-96/variation/vep/homo_sapiens_vep_96_GRCh37.tar.gz
tar -izxf homo_sapiens_vep_96_GRCh37.tar.gz -C $VEP_DATA
#download API
perl INSTALL.pl --AUTO a --DESTDIR $VEP_PATH --CACHEDIR $VEP_DATA --NO_HTSLIB
#download refeference FASTA
perl INSTALL.pl --AUTO f --SPECIES homo_sapiens --ASSEMBLY GRCh37 --DESTDIR $VEP_PATH --CACHEDIR $VEP_DATA
#convert cache
perl convert_cache.pl --species homo_sapiens --version $VER\_GRCh37 --dir $VEP_DATA
#Download the ExAC r0.3.1 VCF
cd $VEP_DATA
curl -L ftp://ftp.broadinstitute.org:/pub/ExAC_release/release0.3.1/subsets/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz > $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz
echo"##FILTER=<ID=AC_Adj0_Filter,Description=\"Only low quality genotype calls containing alternate alleles are present\">"> header_line.tmp
curl -LO https://raw.githubusercontent.com/mskcc/vcf2maf/v1.6.16/data/known_somatic_sites.bed
bcftools annotate --header-lines header_line.tmp --remove FMT,^INF/AF,INF/AC,INF/AN,INF/AC_Adj,INF/AN_Adj,INF/AC_AFR,INF/AC_AMR,INF/AC_EAS,INF/AC_FIN,INF/AC_NFE,INF/AC_OTH,INF/AC_SAS,INF/AN_AFR,INF/AN_AMR,INF/AN_EAS,INF/AN_FIN,INF/AN_NFE,INF/AN_OTH,INF/AN_SAS $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz | bcftools filter --targets-file ^known_somatic_sites.bed --output-type z --output $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz
mv -f $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz
tabix -p vcf $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz
./vep --species homo_sapiens --assembly GRCh37 --offline --no_progress --no_stats --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --pubmed --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --vcf --minimal --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --dir $VEP_DATA --fasta $VEP_DATA/homo_sapiens/$VER\_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz --input_file examples/homo_sapiens_GRCh37.vcf --output_file examples/homo_sapiens_GRCh37.vep.vcf --polyphen b --af --af_1kg --af_esp --regulatory
error:
2. install vcf2maf
export VCF2MAF_URL=`curl -sL https://api.github.com/repos/mskcc/vcf2maf/releases | grep -m1 tarball_url | cut -d\" -f4`
curl -L -o mskcc-vcf2maf.tar.gz $VCF2MAF_URL; tar -zxf mskcc-vcf2maf.tar.gz; cd mskcc-vcf2maf-*
perl vcf2maf.pl --man
---恢复内容结束---
echo"##FILTER=<ID=AC_Adj0_Filter,Description=\"Only low quality genotype calls containing alternate alleles are present\">"> header_line.tmp
curl -LO https://raw.githubusercontent.com/mskcc/vcf2maf/v1.6.16/data/known_somatic_sites.bed
bcftools annotate --header-lines header_line.tmp --remove FMT,^INF/AF,INF/AC,INF/AN,INF/AC_Adj,INF/AN_Adj,INF/AC_AFR,INF/AC_AMR,INF/AC_EAS,INF/AC_FIN,INF/AC_NFE,INF/AC_OTH,INF/AC_SAS,INF/AN_AFR,INF/AN_AMR,INF/AN_EAS,INF/AN_FIN,INF/AN_NFE,INF/AN_OTH,INF/AN_SAS $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz | bcftools filter --targets-file ^known_somatic_sites.bed --output-type z --output $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz