linux shell实现 GWAS显著性区域的合并

 

001、

root@DESKTOP-1N42TVH:/home/test5# ls
record.sh  region.bed
root@DESKTOP-1N42TVH:/home/test5# wc -l region.bed     ## 测试数据
1058 region.bed
root@DESKTOP-1N42TVH:/home/test5# head -n 5 region.bed    ## 测试数据
Chr1    1560051 1570051 3.6112E-10
Chr1    1570053 1580053 3.6112E-10
Chr1    5178621 5188621 5.6283E-8
Chr1    5178672 5188672 4.6785E-8
Chr1    5178703 5188703 6.9055E-8
root@DESKTOP-1N42TVH:/home/test5# cat record.sh     ## 脚本
#!/bin/bash
##step1
awk '{OFS = "\t"; print $1, $2, $3}' $1  | sort -k 1V,1V -k 2n,2n > region2.bed
##step2
awk '{print $1}' region2.bed | uniq | while read i; do grep -w "^$i" region2.bed | awk 'BEGIN {tmp = 1000000000} {OFS = "\t"; print $0, $2 - tmp; tmp = $3}' | awk 'BEGIN {tmp = 1}{OFS = "\t"; if($4 <= 0) {print $0, tmp} else {tmp++; print $0, tmp}}' > $i.txt; done
## step3
cut -f 1 region2.bed | uniq | while read i; do awk '{print $NF}' $i.txt | uniq | while read j; do awk -v a=$j '$5 == a' $i.txt | awk '{if(NR == 1) {min = $2; max = $3} else {if($2 < min) {min = $2}; if($3 > max) {max = $3}}} END {OFS = "\t"; print $1, min, max}' >> $i.result ; done; done
## step4
cut -f 1 region2.bed | uniq | while read i; do cat $i.result >> finalresult.txt; rm -f $i.txt $i.result ; done
rm -f region2.bed
root@DESKTOP-1N42TVH:/home/test5# bash record.sh region.bed    ## 执行命令
root@DESKTOP-1N42TVH:/home/test5# ls
finalresult.txt  record.sh  region.bed
root@DESKTOP-1N42TVH:/home/test5# head finalresult.txt   ## 合并结果文件
Chr1    1560051 1570051
Chr1    1570053 1580053
Chr1    5178621 5200653
Chr1    6893986 6903986
Chr1    6903988 6913988
Chr1    7032660 7042660
Chr1    7042662 7052662
Chr1    8522115 8532115
Chr1    8532117 8542117
Chr1    25624455        25634455
root@DESKTOP-1N42TVH:/home/test5# ls
finalresult.txt  record.sh  region.bed
root@DESKTOP-1N42TVH:/home/test5# wc -l finalresult.txt
218 finalresult.txt
root@DESKTOP-1N42TVH:/home/test5# sort finalresult.txt | md5sum   ## 排序并生成MD5
0320c34c92b2465d8da93fb939cd7cb2  -

 

002、bedtools软件验证

root@DESKTOP-1N42TVH:/home/test5# ls
finalresult.txt  record.sh  region.bed
root@DESKTOP-1N42TVH:/home/test5# bedtools merge -i region.bed > bedtools_reuslt.txt  ## bedtools合并
root@DESKTOP-1N42TVH:/home/test5# ls
bedtools_reuslt.txt  finalresult.txt  record.sh  region.bed
root@DESKTOP-1N42TVH:/home/test5# head bedtools_reuslt.txt     ## 结果文件
Chr1    1560051 1570051
Chr1    1570053 1580053
Chr1    5178621 5200653
Chr1    6893986 6903986
Chr1    6903988 6913988
Chr1    7032660 7042660
Chr1    7042662 7052662
Chr1    8522115 8532115
Chr1    8532117 8542117
Chr1    25624455        25634455
root@DESKTOP-1N42TVH:/home/test5# wc -l bedtools_reuslt.txt
218 bedtools_reuslt.txt
root@DESKTOP-1N42TVH:/home/test5# sort bedtools_reuslt.txt | md5sum   ## 排序并生成MD5
0320c34c92b2465d8da93fb939cd7cb2  -

 

posted @ 2022-07-19 12:51  小鲨鱼2018  阅读(85)  评论(0编辑  收藏  举报