VEP安装指南

#下载依赖包

sudo apt-get install -y curl rsync tar make perl perl-base tabix

#设置perl环境变量

export PERL_PATH=~/perl5

#在perl安装依赖包

curl -L http://cpanmin.us | perl - --notest -l $PERL_PATH LWP::Simple LWP::Protocol::https Archive::Extract Archive::Tar Archive::Zip CGI DBI Time::HiRes

export PERL5LIB=$PERL_PATH/lib/perl5:$PERL_PATH/lib/perl5/x86_64-linux

#新建vep安装目录

export VEP_PATH=~/vep
export VEP_DATA=~/.vep

#下载vep

mkdir $VEP_PATH $VEP_DATA; cd $VEP_PATH

curl -LO https://github.com/Ensembl/ensembl-tools/archive/release/84.tar.gz

tar -zxf 84.tar.gz --starting-file variant_effect_predictor --transform='s|.*/|./|g'

#设置环境变量

export PERL5LIB=$VEP_PATH:$PERL5LIB
export PATH=$VEP_PATH/htslib:$PATH

#下载注释包

rsync -zvh rsync://ftp.ensembl.org/ensembl/pub/release-84/variation/VEP/homo_sapiens_vep_84_GRCh{37,38}.tar.gz $VEP_DATA
rsync -zvh rsync://ftp.ensembl.org/ensembl/pub/release-84/variation/VEP/mus_musculus_vep_84_GRCm38.tar.gz $VEP_DATA
cat $VEP_DATA/*_vep_84_GRC{h37,h38,m38}.tar.gz | tar -izxf - -C $VEP_DATA

#安装 Ensembl API, the reference FASTAs

perl INSTALL.pl --AUTO af --SPECIES homo_sapiens --ASSEMBLY GRCh37 --DESTDIR $VEP_PATH --CACHEDIR $VEP_DATA
perl INSTALL.pl --AUTO af --SPECIES homo_sapiens --ASSEMBLY GRCh38 --DESTDIR $VEP_PATH --CACHEDIR $VEP_DATA
perl INSTALL.pl --AUTO af --SPECIES mus_musculus --ASSEMBLY GRCm38 --DESTDIR $VEP_PATH --CACHEDIR $VEP_DATA
perl convert_cache.pl --species homo_sapiens --version 84_GRCh37 --dir $VEP_DATA
perl convert_cache.pl --species homo_sapiens --version 84_GRCh38 --dir $VEP_DATA
perl convert_cache.pl --species mus_musculus --version 84_GRCm38 --dir $VEP_DATA

#下载 ExAC r0.3 VCF,因为是google链接,可能不能下载,需要在其他网页下载

curl -L https://googledrive.com/host/0B6o74flPT8FAYnBJTk9aTF9WVnM > $VEP_DATA/ExAC.r0.3.sites.minus_somatic.vcf.gz
tabix -p vcf $VEP_DATA/ExAC.r0.3.sites.minus_somatic.vcf.gz

#安装完成,测试

perl variant_effect_predictor.pl --species homo_sapiens --assembly GRCh37 --offline --no_progress --everything --shift_hgvs 1 --check_existing --check_alleles --total_length --allele_number --no_escape --xref_refseq --dir $VEP_DATA --fasta $VEP_DATA/homo_sapiens/84_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz --plugin ExAC,$VEP_DATA/ExAC.r0.3.sites.minus_somatic.vcf.gz --input_file example_GRCh37.vcf --output_file example_GRCh37.vep.txt

#可以看到下列文件,即注释成功。

- Read existing cache info
- Loaded plugin: ExAC
- Starting...
- Detected format of input file as vcf
- Read 173 variants into buffer
- Checking for existing variations
- Reading transcript data from cache and/or database
- Retrieved 3097 transcripts (0 mem, 3162 cached, 0 DB, 65 duplicates)
- Reading regulatory data from cache and/or database
- Retrieved 14876 regulatory features (0 mem, 14877 cached, 0 DB, 1 duplicates)
- Analyzing chromosome 21
- Analyzing variants
- Analyzing RegulatoryFeatures
- Analyzing MotifFeatures
- Calculating consequences
- Analyzing chromosome 22
- Analyzing variants
- Analyzing RegulatoryFeatures
- Analyzing MotifFeatures
- Calculating consequences
- Processed 173 total variants (29 vars/sec, 29 vars/sec total)
- Wrote stats summary to example_GRCh37.vep.txt_summary.html
- Finished!

 

 

 

 

 

 

 

 

 


















posted @ 2016-06-05 15:35  qinqinyang  阅读(4945)  评论(0编辑  收藏  举报