【WDL】6. 实践:fastp质控测序数据

功能

输入(Pair End)测序序列文件,利用fastp进行QC和质量过滤(包括质量QC统计,Adapter去除,序列trimming,过滤等),生成Clean Reads文件,以及html的报告。

input.json

{
    "fastp.pair_end.adapter_sequence": "",
    "fastp.pair_end.adapter_sequence_r2": "",
    "fastp.pair_end.cpu": 2,
    "fastp.pair_end.disks": "local-disk 50 cloud_ssd",
    "fastp.pair_end.fix_mgi_id": false,
    "fastp.pair_end.html": "fastp.html",
    "fastp.pair_end.in1": "/path/to/reads_1.fastq",
    "fastp.pair_end.in2": "/path/to/reads_2.fastq",
    "fastp.pair_end.json": "fastp.json",
    "fastp.pair_end.memory": "4G",
    "fastp.pair_end.phred64": false,
    "fastp.pair_end.reads_to_process": "",
    "fastp.pair_end.report_title": "'fastp report'"
}

WDL

version 1.0

workflow fastp {

    call pair_end

    output {
        File clean_out1 = pair_end.out1
        File clean_out2 = pair_end.out2
        File html_report = pair_end.html_report
        File json_report = pair_end.json_report
    }

}

task pair_end {

    input {
        
        # I/O options
        File in1
        File in2

        Boolean? phred64 = false 
        Boolean? fix_mgi_id = false

        String? adapter_sequence
        String? adapter_sequence_r2

        Int? reads_to_process # specify how many reads/pairs to be processed. Default 0 means process all reads.

        # reporting options
        String json = "fastp.json"
        String html = "fastp.html"
        String report_title = "\'fastp report\'"

        # excute env
        Int cpu = 2
        String memory = "4G"
        String disks = "local-disk 50 cloud_ssd"

    }

    String out1_name = "clean-" + basename(in1)
    String out2_name = "clean-" + basename(in2)

    command <<<

        # basic command
        /opt/conda/bin/fastp \
        --in1 ~{in1} \
        --in2 ~{in2} \
        --out1 ~{out1_name} \
        --out2 ~{out2_name} \
        --json ~{json} \
        --html ~{html} \
        --report_title ~{report_title} \
        
        # options 可选参数使用值得借鉴
        ~{ true="--phred64 " false="" phred64 } \
        ~{ "--reads_to_process " + reads_to_process } \
        ~{ true="--fix_mgi_id " false="" fix_mgi_id } \
        ~{ "--adapter_sequence " + adapter_sequence } \
        ~{ "--adapter_sequence_r2 " + adapter_sequence_r2 }

    >>>

    runtime {
        cpu: cpu
        memory: memory
        disks: disks
        docker: "fastp:v0.20.1_cv1"
    }

    output {
        File out1 = out1_name
        File out2 = out2_name
        File json_report = json
        File html_report = html
    }

}

Referenced from aliyun

posted @ 2022-05-14 17:02  生物信息与育种  阅读(396)  评论(0编辑  收藏  举报