Perl FASTA文件拆分合并

1、合并并转化一代测序seq纯文本为fasta格式文件

复制代码
use strict;
use warnings;

my @dir;
my @filelist;
open OUT, ">result.fst";
opendir (DIR, "./") or die "can't open the directory!";
@dir = readdir DIR;
foreach my $file (@dir) {
    if ( $file =~ /[a-z]*\.seq/) {
        push @filelist,$file;
    } 
}
closedir(DIR);

foreach my $file (@filelist){
    open IN, "<".$file or die "cannot open $file";
    print OUT ">".$file."\n";
    print OUT <IN>;
    close(IN);
}

close (OUT);
复制代码

 2、合并文件夹下的纯文本文件

复制代码
use strict;
use warnings;

open T, ">T.fas";
open R, ">R.fas";

opendir (DIR_T, "./T/") or die "cannot open this dir $!";
opendir (DIR_R, "./R/") or die "cannot open this dir $!";
my @t = readdir DIR_T;
my @r = readdir DIR_R;

closedir(DIR_T);
closedir(DIR_R);

foreach my $file (@t){
    if($file ne "." && $file ne ".."){
        open IN, "<T/".$file or die "cannot open $file";
        print T <IN>;
        close (IN);
    }
}

foreach my $file (@r){
    if($file ne "." && $file ne ".."){
        open IN, "<R/".$file or die "cannot open $file";
        print R <IN>;
        close (IN);
    }
}

close(T);
close(R);
复制代码

 3、批量序列拼接

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
use strict;
use warnings;
 
open T, "<T_a.fas";
open R, "<R_a.fas";
open H, ">Haplotypes.fas";
my @t = <T>;
my @r = <R>;
sub combine{
 
    print H ">$_[0]\n";
    my $tag = 0;
    foreach (@t){
        if(/>$_[1]\s/){
            $tag = 1;
        }elsif(/>\w+/){
            $tag = 0
        }elsif($tag){
            print H $_;
        }
    }
    $tag = 0;
    foreach (@r){
        if(/>$_[2]\s/){
            $tag = 1;
        }elsif(/>\w+/){
            $tag = 0
        }elsif($tag){
            print H $_;
        }
    }
    #print H "\n";
}
  
# 调用函数
combine("O","T7","R4");
combine("P","T1","R5");
combine("M","T1","R4");
combine("L","T5","R4");
combine("U","T1","R9");
combine("I","T1","R3");
combine("AT","T25","R5");
combine("AS","T1","R26");
combine("BF","T1","R36");
combine("BG","T36","R4");
combine("BH","T1","R37");
combine("BI","T37","R5");
combine("BJ","T38","R5");
 
close(T);
close(R);
close(H);

 

posted @   LeleLiu  阅读(1782)  评论(0编辑  收藏  举报
编辑推荐:
· DeepSeek 解答了困扰我五年的技术问题
· 为什么说在企业级应用开发中,后端往往是效率杀手?
· 用 C# 插值字符串处理器写一个 sscanf
· Java 中堆内存和栈内存上的数据分布和特点
· 开发中对象命名的一点思考
阅读排行:
· 为什么说在企业级应用开发中,后端往往是效率杀手?
· DeepSeek 解答了困扰我五年的技术问题。时代确实变了!
· 本地部署DeepSeek后,没有好看的交互界面怎么行!
· 趁着过年的时候手搓了一个低代码框架
· 推荐一个DeepSeek 大模型的免费 API 项目!兼容OpenAI接口!
点击右上角即可分享
微信分享提示