【代码速记】Genemapper软件数据转预处理

 1 use strict;
 2 #use warnings;
 3 
 4 use Math::Round;
 5 
 6 #input : open the origin file
 7 #e.g. Dye/Sample FileName Size Height Area DataPoint
 8 open RAWDATA , "<aflp1.txt" or die ("cannot open file:$!");
 9 
10 #output
11 my $output_name = "result-".time();
12 mkdir ($output_name,0777) || die "cannot mkdir";
13 open RESULT, ">$output_name/resultData.txt";
14 
15 my @all = <RAWDATA>;
16 my @rows; #file to array
17 my $tmp;
18 my @all_b; #all bins
19 my %all; #for binary data
20 
21 foreach $tmp (@all){
22     push @rows, [split(/    /,$tmp)]
23 }
24 
25 
26 my @bins; #片段大小
27 my %h; #片段大小-数量
28 my %height; #每个样本的
29 foreach my $bin (@rows){
30     my $myBin; # Size
31     $myBin = round(@$bin[2]);
32     push @bins, $myBin;
33     $h{$myBin}++;
34 }
35 
36 
37 for ( sort { $h{$b} <=> $h{$a} } keys %h ) {
38     #print RESULT "$_\t$h{$_}$/";
39 }
40 
41 my @loci; #片段大小
42 foreach my $key (sort keys %h){
43     #print RESULT $key."    "
44     push @loci, $key;
45 }
46 
47 my %data; # all information in this hash
48 foreach my $locus (@loci){
49     foreach my $bin (@rows){
50         my $size;
51         my $height;
52         my $name;
53         my $locus_name;
54         #$name = @$bin[1] =~ /[A-H][01-12]/;
55         $name = substr(@$bin[1],2,3);
56         #print $name . "\n";
57         $size = round(@$bin[2]);
58         $height = round(@$bin[3]);
59         $locus_name = "locus_".$locus;
60         if ($locus == $size){
61             $data{$name}{$locus_name} = $height;
62         }
63     }
64 }
65 
66 
67 print RESULT "sample"."\t";
68 foreach my $locus (@loci){
69     print RESULT "locus_".$locus."\t";
70 }
71 foreach my $key1 (keys %data){    
72     my $hash2 = $data{$key1};
73     print RESULT "\n".$key1."\t";
74     foreach my $locus (@loci){
75         my $key2;
76         $key2 = "locus_".$locus;
77         print RESULT %$hash2{$key2}."\t";
78     }
79 }
80 
81 ##Sort
82 # my %hash;
83 # sort {$a <=> $b} @bins;#无效
84 # @bins = grep { ++$hash{$_} < 2 } @bins; 
85 
86 # foreach my $bin (@bins){
87     # print RESULT $bin."\n";
88 #}

 增加限制酶种类和重复后的脚本

use strict;
#use warnings;

use Math::Round;

#input : open the origin file
#e.g. Dye/Sample FileName Size Height Area DataPoint
open RAWDATA, "<msap2.txt" or die ("cannot open file:$!");
open ORDER, "<order.txt" or die ("cannot open file:$!");
open REPEAT, "<repeat.txt" or die ("cannot open file:$!");

#output
my $output_name = "result-".time();
mkdir ($output_name,0777) || die "cannot mkdir";
open RESULT, ">$output_name/data_msap2.txt";

my @order_array = <ORDER>;
my @repeat_array = <REPEAT>;
my %order;
my %repeat;

foreach my $tmp (@order_array){
    my @match = split(/    /,$tmp);
    chomp $match[1];
    $order{$match[0]} = $match[1];
}

foreach my $tmp (@repeat_array){
    my @match = split(/    /,$tmp);
    chomp $match[1];
    $repeat{$match[0]} = $match[1];
}

my @all = <RAWDATA>;
my @rows; #file to array
my $tmp;
my @all_b; #all bins
my %all; #for binary data

foreach $tmp (@all){
    push @rows, [split(/    /,$tmp)]
}


my @bins; #片段大小
my %h; #片段大小-数量
my %height; #每个样本的
foreach my $bin (@rows){
    my $myBin; # Size
    $myBin = round(@$bin[2]);
    push @bins, $myBin;
    $h{$myBin}++;
}


for ( sort { $h{$b} <=> $h{$a} } keys %h ) {
    #print RESULT "$_\t$h{$_}$/";
}

my @loci; #片段大小
foreach my $key (sort keys %h){
    #print RESULT $key."    "
    push @loci, $key;
}

my %data; # all information in this hash
foreach my $locus (@loci){
    foreach my $bin (@rows){
        my $size;
        my $height;
        my $name;
        my $name1;
        my $name2;
        my $locus_name;
        $name1 = substr(@$bin[1],2,3); #for name_length
        $name2 = substr(@$bin[1],6,2);
        $name1 =~ s/$name1/$order{$name1}/;
        $name2 =~ s/$name2/$repeat{$name2}/;
        $name = $name1."_".$name2;
        #print $name . "\n";
        $size = round(@$bin[2]);
        $height = round(@$bin[3]);
        $locus_name = "locus_".$locus;
        if ($locus == $size){
            $data{$name}{$locus_name} = $height;
        }
    }
}


print RESULT "sample"."\t";
foreach my $locus (@loci){
    print RESULT "locus_".$locus."\t";
}
foreach my $key1 (keys %data){    
    my $hash2 = $data{$key1};
    print RESULT "\n".$key1."\t";
    foreach my $locus (@loci){
        my $key2;
        $key2 = "locus_".$locus;
        print RESULT %$hash2{$key2}."\t";
    }
}


close(RAWDATA);close(RESULT);close(ORDER);

 

posted @ 2017-12-13 14:41  LeleLiu  阅读(655)  评论(0编辑  收藏  举报