【代码速记】Genemapper软件数据转预处理
1 use strict; 2 #use warnings; 3 4 use Math::Round; 5 6 #input : open the origin file 7 #e.g. Dye/Sample FileName Size Height Area DataPoint 8 open RAWDATA , "<aflp1.txt" or die ("cannot open file:$!"); 9 10 #output 11 my $output_name = "result-".time(); 12 mkdir ($output_name,0777) || die "cannot mkdir"; 13 open RESULT, ">$output_name/resultData.txt"; 14 15 my @all = <RAWDATA>; 16 my @rows; #file to array 17 my $tmp; 18 my @all_b; #all bins 19 my %all; #for binary data 20 21 foreach $tmp (@all){ 22 push @rows, [split(/ /,$tmp)] 23 } 24 25 26 my @bins; #片段大小 27 my %h; #片段大小-数量 28 my %height; #每个样本的 29 foreach my $bin (@rows){ 30 my $myBin; # Size 31 $myBin = round(@$bin[2]); 32 push @bins, $myBin; 33 $h{$myBin}++; 34 } 35 36 37 for ( sort { $h{$b} <=> $h{$a} } keys %h ) { 38 #print RESULT "$_\t$h{$_}$/"; 39 } 40 41 my @loci; #片段大小 42 foreach my $key (sort keys %h){ 43 #print RESULT $key." " 44 push @loci, $key; 45 } 46 47 my %data; # all information in this hash 48 foreach my $locus (@loci){ 49 foreach my $bin (@rows){ 50 my $size; 51 my $height; 52 my $name; 53 my $locus_name; 54 #$name = @$bin[1] =~ /[A-H][01-12]/; 55 $name = substr(@$bin[1],2,3); 56 #print $name . "\n"; 57 $size = round(@$bin[2]); 58 $height = round(@$bin[3]); 59 $locus_name = "locus_".$locus; 60 if ($locus == $size){ 61 $data{$name}{$locus_name} = $height; 62 } 63 } 64 } 65 66 67 print RESULT "sample"."\t"; 68 foreach my $locus (@loci){ 69 print RESULT "locus_".$locus."\t"; 70 } 71 foreach my $key1 (keys %data){ 72 my $hash2 = $data{$key1}; 73 print RESULT "\n".$key1."\t"; 74 foreach my $locus (@loci){ 75 my $key2; 76 $key2 = "locus_".$locus; 77 print RESULT %$hash2{$key2}."\t"; 78 } 79 } 80 81 ##Sort 82 # my %hash; 83 # sort {$a <=> $b} @bins;#无效 84 # @bins = grep { ++$hash{$_} < 2 } @bins; 85 86 # foreach my $bin (@bins){ 87 # print RESULT $bin."\n"; 88 #}
增加限制酶种类和重复后的脚本
use strict; #use warnings; use Math::Round; #input : open the origin file #e.g. Dye/Sample FileName Size Height Area DataPoint open RAWDATA, "<msap2.txt" or die ("cannot open file:$!"); open ORDER, "<order.txt" or die ("cannot open file:$!"); open REPEAT, "<repeat.txt" or die ("cannot open file:$!"); #output my $output_name = "result-".time(); mkdir ($output_name,0777) || die "cannot mkdir"; open RESULT, ">$output_name/data_msap2.txt"; my @order_array = <ORDER>; my @repeat_array = <REPEAT>; my %order; my %repeat; foreach my $tmp (@order_array){ my @match = split(/ /,$tmp); chomp $match[1]; $order{$match[0]} = $match[1]; } foreach my $tmp (@repeat_array){ my @match = split(/ /,$tmp); chomp $match[1]; $repeat{$match[0]} = $match[1]; } my @all = <RAWDATA>; my @rows; #file to array my $tmp; my @all_b; #all bins my %all; #for binary data foreach $tmp (@all){ push @rows, [split(/ /,$tmp)] } my @bins; #片段大小 my %h; #片段大小-数量 my %height; #每个样本的 foreach my $bin (@rows){ my $myBin; # Size $myBin = round(@$bin[2]); push @bins, $myBin; $h{$myBin}++; } for ( sort { $h{$b} <=> $h{$a} } keys %h ) { #print RESULT "$_\t$h{$_}$/"; } my @loci; #片段大小 foreach my $key (sort keys %h){ #print RESULT $key." " push @loci, $key; } my %data; # all information in this hash foreach my $locus (@loci){ foreach my $bin (@rows){ my $size; my $height; my $name; my $name1; my $name2; my $locus_name; $name1 = substr(@$bin[1],2,3); #for name_length $name2 = substr(@$bin[1],6,2); $name1 =~ s/$name1/$order{$name1}/; $name2 =~ s/$name2/$repeat{$name2}/; $name = $name1."_".$name2; #print $name . "\n"; $size = round(@$bin[2]); $height = round(@$bin[3]); $locus_name = "locus_".$locus; if ($locus == $size){ $data{$name}{$locus_name} = $height; } } } print RESULT "sample"."\t"; foreach my $locus (@loci){ print RESULT "locus_".$locus."\t"; } foreach my $key1 (keys %data){ my $hash2 = $data{$key1}; print RESULT "\n".$key1."\t"; foreach my $locus (@loci){ my $key2; $key2 = "locus_".$locus; print RESULT %$hash2{$key2}."\t"; } } close(RAWDATA);close(RESULT);close(ORDER);