linux shell实现 GWAS显著性区域的合并

 

001、

复制代码
root@DESKTOP-1N42TVH:/home/test5# ls
record.sh  region.bed
root@DESKTOP-1N42TVH:/home/test5# wc -l region.bed     ## 测试数据
1058 region.bed
root@DESKTOP-1N42TVH:/home/test5# head -n 5 region.bed    ## 测试数据
Chr1    1560051 1570051 3.6112E-10
Chr1    1570053 1580053 3.6112E-10
Chr1    5178621 5188621 5.6283E-8
Chr1    5178672 5188672 4.6785E-8
Chr1    5178703 5188703 6.9055E-8
root@DESKTOP-1N42TVH:/home/test5# cat record.sh     ## 脚本
#!/bin/bash
##step1
awk '{OFS = "\t"; print $1, $2, $3}' $1  | sort -k 1V,1V -k 2n,2n > region2.bed
##step2
awk '{print $1}' region2.bed | uniq | while read i; do grep -w "^$i" region2.bed | awk 'BEGIN {tmp = 1000000000} {OFS = "\t"; print $0, $2 - tmp; tmp = $3}' | awk 'BEGIN {tmp = 1}{OFS = "\t"; if($4 <= 0) {print $0, tmp} else {tmp++; print $0, tmp}}' > $i.txt; done
## step3
cut -f 1 region2.bed | uniq | while read i; do awk '{print $NF}' $i.txt | uniq | while read j; do awk -v a=$j '$5 == a' $i.txt | awk '{if(NR == 1) {min = $2; max = $3} else {if($2 < min) {min = $2}; if($3 > max) {max = $3}}} END {OFS = "\t"; print $1, min, max}' >> $i.result ; done; done
## step4
cut -f 1 region2.bed | uniq | while read i; do cat $i.result >> finalresult.txt; rm -f $i.txt $i.result ; done
rm -f region2.bed
root@DESKTOP-1N42TVH:/home/test5# bash record.sh region.bed    ## 执行命令
root@DESKTOP-1N42TVH:/home/test5# ls
finalresult.txt  record.sh  region.bed
root@DESKTOP-1N42TVH:/home/test5# head finalresult.txt   ## 合并结果文件
Chr1    1560051 1570051
Chr1    1570053 1580053
Chr1    5178621 5200653
Chr1    6893986 6903986
Chr1    6903988 6913988
Chr1    7032660 7042660
Chr1    7042662 7052662
Chr1    8522115 8532115
Chr1    8532117 8542117
Chr1    25624455        25634455
复制代码
root@DESKTOP-1N42TVH:/home/test5# ls
finalresult.txt  record.sh  region.bed
root@DESKTOP-1N42TVH:/home/test5# wc -l finalresult.txt
218 finalresult.txt
root@DESKTOP-1N42TVH:/home/test5# sort finalresult.txt | md5sum   ## 排序并生成MD5
0320c34c92b2465d8da93fb939cd7cb2  -

 

002、bedtools软件验证

复制代码
root@DESKTOP-1N42TVH:/home/test5# ls
finalresult.txt  record.sh  region.bed
root@DESKTOP-1N42TVH:/home/test5# bedtools merge -i region.bed > bedtools_reuslt.txt  ## bedtools合并
root@DESKTOP-1N42TVH:/home/test5# ls
bedtools_reuslt.txt  finalresult.txt  record.sh  region.bed
root@DESKTOP-1N42TVH:/home/test5# head bedtools_reuslt.txt     ## 结果文件
Chr1    1560051 1570051
Chr1    1570053 1580053
Chr1    5178621 5200653
Chr1    6893986 6903986
Chr1    6903988 6913988
Chr1    7032660 7042660
Chr1    7042662 7052662
Chr1    8522115 8532115
Chr1    8532117 8542117
Chr1    25624455        25634455
root@DESKTOP-1N42TVH:/home/test5# wc -l bedtools_reuslt.txt
218 bedtools_reuslt.txt
root@DESKTOP-1N42TVH:/home/test5# sort bedtools_reuslt.txt | md5sum   ## 排序并生成MD5
0320c34c92b2465d8da93fb939cd7cb2  -
复制代码

 

posted @   小鲨鱼2018  阅读(91)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
历史上的今天:
2021-07-19 c语言中打印浮点数
2021-07-19 linux系统中diff命令
点击右上角即可分享
微信分享提示