linux shell实现 GWAS显著性区域的合并
001、
root@DESKTOP-1N42TVH:/home/test5# ls record.sh region.bed root@DESKTOP-1N42TVH:/home/test5# wc -l region.bed ## 测试数据 1058 region.bed root@DESKTOP-1N42TVH:/home/test5# head -n 5 region.bed ## 测试数据 Chr1 1560051 1570051 3.6112E-10 Chr1 1570053 1580053 3.6112E-10 Chr1 5178621 5188621 5.6283E-8 Chr1 5178672 5188672 4.6785E-8 Chr1 5178703 5188703 6.9055E-8 root@DESKTOP-1N42TVH:/home/test5# cat record.sh ## 脚本 #!/bin/bash ##step1 awk '{OFS = "\t"; print $1, $2, $3}' $1 | sort -k 1V,1V -k 2n,2n > region2.bed ##step2 awk '{print $1}' region2.bed | uniq | while read i; do grep -w "^$i" region2.bed | awk 'BEGIN {tmp = 1000000000} {OFS = "\t"; print $0, $2 - tmp; tmp = $3}' | awk 'BEGIN {tmp = 1}{OFS = "\t"; if($4 <= 0) {print $0, tmp} else {tmp++; print $0, tmp}}' > $i.txt; done ## step3 cut -f 1 region2.bed | uniq | while read i; do awk '{print $NF}' $i.txt | uniq | while read j; do awk -v a=$j '$5 == a' $i.txt | awk '{if(NR == 1) {min = $2; max = $3} else {if($2 < min) {min = $2}; if($3 > max) {max = $3}}} END {OFS = "\t"; print $1, min, max}' >> $i.result ; done; done ## step4 cut -f 1 region2.bed | uniq | while read i; do cat $i.result >> finalresult.txt; rm -f $i.txt $i.result ; done rm -f region2.bed root@DESKTOP-1N42TVH:/home/test5# bash record.sh region.bed ## 执行命令 root@DESKTOP-1N42TVH:/home/test5# ls finalresult.txt record.sh region.bed root@DESKTOP-1N42TVH:/home/test5# head finalresult.txt ## 合并结果文件 Chr1 1560051 1570051 Chr1 1570053 1580053 Chr1 5178621 5200653 Chr1 6893986 6903986 Chr1 6903988 6913988 Chr1 7032660 7042660 Chr1 7042662 7052662 Chr1 8522115 8532115 Chr1 8532117 8542117 Chr1 25624455 25634455
root@DESKTOP-1N42TVH:/home/test5# ls finalresult.txt record.sh region.bed root@DESKTOP-1N42TVH:/home/test5# wc -l finalresult.txt 218 finalresult.txt root@DESKTOP-1N42TVH:/home/test5# sort finalresult.txt | md5sum ## 排序并生成MD5 0320c34c92b2465d8da93fb939cd7cb2 -
002、bedtools软件验证
root@DESKTOP-1N42TVH:/home/test5# ls finalresult.txt record.sh region.bed root@DESKTOP-1N42TVH:/home/test5# bedtools merge -i region.bed > bedtools_reuslt.txt ## bedtools合并 root@DESKTOP-1N42TVH:/home/test5# ls bedtools_reuslt.txt finalresult.txt record.sh region.bed root@DESKTOP-1N42TVH:/home/test5# head bedtools_reuslt.txt ## 结果文件 Chr1 1560051 1570051 Chr1 1570053 1580053 Chr1 5178621 5200653 Chr1 6893986 6903986 Chr1 6903988 6913988 Chr1 7032660 7042660 Chr1 7042662 7052662 Chr1 8522115 8532115 Chr1 8532117 8542117 Chr1 25624455 25634455 root@DESKTOP-1N42TVH:/home/test5# wc -l bedtools_reuslt.txt 218 bedtools_reuslt.txt root@DESKTOP-1N42TVH:/home/test5# sort bedtools_reuslt.txt | md5sum ## 排序并生成MD5 0320c34c92b2465d8da93fb939cd7cb2 -