22、IDP-ASE

IDPASE

https://github.com/bdeonovic/IDPASE.jl

 

Prepare necessary input files

 (1)FASTQ file of your hybrid-Seq data                                ##cat fq1 fq2

cat fq1 fq2 >fq

(2)  PSL alignment file of your hybrid-Seq data                    ##对bam 文件转换成psl格式,要有参考基因组fa和比对文件bam

/share/nas2/genome/biosoft/Python/2.7.8/bin/python /share/nas1/wenyh/develop/tools/Au-public-master/iron/utilities/sam_to_psl.py  -r transcript.fa T16.bam >T16.psl

(3)GPD file in the Extended format (Gene Predictions (Extended)      https://genome.ucsc.edu/FAQ/FAQformat.html#format9     

/share/nas1/wenyh/develop/tools/gtfToGenePred transcript.gtf -genePredExt transcript.gpd.tmp                                                                                            #gtf convert to gpd format

awk '{print 0"\t"$0}'transcript.gpd.tmp >transcript.gpd.tmp2 #perl -lane '{print "0\t$_"}' transcript.gpd.tmp |less

/share/nas1/wenyh/develop/pacbio/IDP-ASE/julia/bin/julia /home/wenyh/.julia/v0.4/IDPASE/scripts/convert_gpd.jl transcript.gpd.tmp2 >transcript.gpd.tmp3                                    

(4) VCF file                                                                         ##筛选杂合的vcf文件  

/share/nas1/yangch/script/Ref_Trans/v2.9.1/bin/snp_analysis/v2.0/SNP_Trans_main_Ref.pl          

/share/nas1/yangch/script/Ref_Trans/v2.9.1/bin/snp_analysis/v2.0/SNP_indel_anno/snp_indel_anno.pl     -id  inputdir    -r    ref file   -queue   middle.q  -s xx                  #SNP注释

 

cut -f 1-9,11 ../SNP/SNP_Trans/SNP/final.snp.vcf |grep -vP '\.\/\.' |grep -vP '0\/0'|grep -vP '1\/1'|grep -v '#'|less -S|perl -lane '{print join "\t",@F[0..7],"GT\t0|1"}' |less -S  >Heter.snp.vcf         ##选取杂合SNP

awk '$10!~/1\/1/;$10!~/\.\/\./{print}'|le >final.snp.anno.vcf1

cut -f 1 |sort |uniq -c | awk {print $2,$1}'|less -S >Snp.distribution && awk '$2>=10{print }' Snp.distribution |less                                                                                                                         ##染色体SNP分布

le final.snp.anno.vcf1|grep -v '#'|cut -f 1 |sort |uniq -c | awk '{print $2,$1}'|less -S|sort -k 2nr|le >Snp.distribution

 

le 11.Heter.snp.vcf |perl -le 'while(<>){chomp;@F=split;$a=$F[7];$b="\;SNPEFF_GENE_NAME=$F[0]";$c=$a.$b;print (join "\t",@F[0..6],$c,@F[8..9])}'|le                         #加入注释

 

5、

mkdir temp/; mkdir gene_files; mkdir isoform_files; mkdir gene_out; mkdir isoform_out;

 

posted @ 2017-08-18 17:53  风中之铃  阅读(428)  评论(0编辑  收藏  举报