生信分析常用脚本(一)
1.计算reads PE长度脚本
01.Cal_PE_Depth.sh
Read_count=`gzip -dc Reads1.fq.gz |wc -l |awk '{print $1/4}'` echo "read pair count : $Read_count" average_depth=`expr $Read_count \* 200 / 6000000 ` echo average depth: $average_depth
2.计算reads覆盖度
02.coverage.R
depth<-(1599999*100*2/6e6) print(depth) coverage<-ppois(40,lambda=depth,lower=F) print(coverage)
3. 计算N50
03.Cal_N50.pl
#!/usr/bin/perl use strict; my $file = shift; open (IN,"$file") or die ("can't open the file!\n"); open (OUT,">scaf_sort_length.txt") or die ("can't create the file!\n"); my $total_len = 0; my $ref_len = 6e6; my $flag = 1; my @count_len; my %scaf_hash; $/=">"; <IN>; while(<IN>){ chomp; my ($name,@seq) = split(/\n/,$_); my $scafseq = join("",@seq); $total_len += length($scafseq); my $ID = (split(/\s+/,$name))[0]; $scaf_hash{$ID}=length($scafseq); } my $count = 0; foreach my $key (sort {$scaf_hash{$b}<=>$scaf_hash{$a}} keys %scaf_hash){ print OUT "$scaf_hash{$key}\n"; push(@count_len,$scaf_hash{$key}); } foreach my $len (@count_len){ $count += $len; if ($count /$total_len >=0.5 && $flag){ print "N50: $len\n"; $flag = 0; } if ($count/$total_len>=0.9){ print "N90: $len\n"; last; } } close IN; close OUT;
4. 长度累积曲线分布图
04.scaf_acclen_graph.R
data<-read.table("scaf_sort_length.txt",header=F) len<-data[,1] head(len) sum<-0 acclen<-numeric(length(len)) for (i in 1:nrow(data)){ sum <-sum + len[i]; acclen[i]<-sum; } head(acclen) pdf("Length.pdf") plot(x=(1:length(acclen)),y=acclen,xlab="accseq ID",ylab="acclen",type="l",col="blue",main="scaSeq accumulate length grap") dev.off()
越努力,越幸运,越是烦躁,越要静心,it's be OK!