GWAS分析中 显著性区域的基因注释

 

001、

复制代码
root@DESKTOP-1N42TVH:/home/test6/test# ls
all.gff3  region_merged.bed
root@DESKTOP-1N42TVH:/home/test6/test# grep -v "^#" all.gff3 | awk '$3 == "gene"' > gene.gff3 ## 提取注释文件gene信息
root@DESKTOP-1N42TVH:/home/test6/test# ls
all.gff3  gene.gff3  region_merged.bed
root@DESKTOP-1N42TVH:/home/test6/test# head -n 2 gene.gff3   ## 注释文件格式
Chr1    MSU_osa1r7      gene    2903    10817   .       +       .       ID=LOC_Os01g01010;Name=LOC_Os01g01010;Note=TBC%20domain%20containing%20protein%2C%20expressed
Chr1    MSU_osa1r7      gene    11218   12435   .       +       .       ID=LOC_Os01g01019;Name=LOC_Os01g01019;Note=expressed%20protein
root@DESKTOP-1N42TVH:/home/test6/test# head -n 2 region_merged.bed   ## 显著性区域格式
Chr1    1560051 1570051
Chr1    1570053 1580053
root@DESKTOP-1N42TVH:/home/test6/test# cat region_merged.bed | while read {i,j,k}; do awk -v a=$i -v b=$j -v c=$k '{if($1 == a && $4 >= b && $4 <= c && $5 >= c || $1 == a && $4 <= b && $5 >= c || $1 == a && $4 <= b && $5 >= b && $5 <= c || $1 == a && $4 >= b && $5 <= c) {print $0}}' gene.gff3 >> result.txt; done
root@DESKTOP-1N42TVH:/home/test6/test# ls
all.gff3  gene.gff3  region_merged.bed  result.txt
root@DESKTOP-1N42TVH:/home/test6/test# wc -l result.txt
611 result.txt
root@DESKTOP-1N42TVH:/home/test6/test# sort -u result.txt > result2.txt   ## 去重复
root@DESKTOP-1N42TVH:/home/test6/test# wc -l result2.txt
554 result2.txt
root@DESKTOP-1N42TVH:/home/test6/test# head -n 3 result2.txt
Chr1    MSU_osa1r7      gene    1562612 1566554 .       -       .       ID=LOC_Os01g03740;Name=LOC_Os01g03740;Note=nuclease%20PA3%2C%20putative%2C%20expressed
Chr1    MSU_osa1r7      gene    1566138 1569472 .       +       .       ID=LOC_Os01g03730;Name=LOC_Os01g03730;Note=nuclease%20PA3%2C%20putative%2C%20expressed
Chr1    MSU_osa1r7      gene    1569585 1570939 .       -       .       ID=LOC_Os01g03750;Name=LOC_Os01g03750;Note=expressed%20protein
复制代码

 

002、bedtools验证

复制代码
root@DESKTOP-1N42TVH:/home/test6/test# ls
all.gff3  gene.gff3  region_merged.bed  result.txt  result2.txt       ## bedtools软件进行注释
root@DESKTOP-1N42TVH:/home/test6/test# bedtools  intersect -wo -nonamecheck -a region_merged.bed -b gene.gff3 | awk -F "\t"  '{print $4"\t"$7"\t"$8"\t"$10"\t"$12}' | sort -u  > bedtoolsresult.txt
root@DESKTOP-1N42TVH:/home/test6/test# ls
all.gff3  bedtoolsresult.txt  gene.gff3  region_merged.bed  result.txt  result2.txt
root@DESKTOP-1N42TVH:/home/test6/test# wc -l bedtoolsresult.txt
554 bedtoolsresult.txt
root@DESKTOP-1N42TVH:/home/test6/test# head -n 2 bedtoolsresult.txt
Chr1    1562612 1566554 -       ID=LOC_Os01g03740;Name=LOC_Os01g03740;Note=nuclease%20PA3%2C%20putative%2C%20expressed
Chr1    1566138 1569472 +       ID=LOC_Os01g03730;Name=LOC_Os01g03730;Note=nuclease%20PA3%2C%20putative%2C%20expressed
root@DESKTOP-1N42TVH:/home/test6/test# head -n 2 result2.txt
Chr1    MSU_osa1r7      gene    1562612 1566554 .       -       .       ID=LOC_Os01g03740;Name=LOC_Os01g03740;Note=nuclease%20PA3%2C%20putative%2C%20expressed
Chr1    MSU_osa1r7      gene    1566138 1569472 .       +       .       ID=LOC_Os01g03730;Name=LOC_Os01g03730;Note=nuclease%20PA3%2C%20putative%2C%20expressed
root@DESKTOP-1N42TVH:/home/test6/test# awk '{OFS = "\t"; print $1, $4, $5, $7, $9}' result2.txt | head -n 2 ## 修改为bedtools注释结果同样的格式
Chr1    1562612 1566554 -       ID=LOC_Os01g03740;Name=LOC_Os01g03740;Note=nuclease%20PA3%2C%20putative%2C%20expressed
Chr1    1566138 1569472 +       ID=LOC_Os01g03730;Name=LOC_Os01g03730;Note=nuclease%20PA3%2C%20putative%2C%20expressed
root@DESKTOP-1N42TVH:/home/test6/test# awk '{OFS = "\t"; print $1, $4, $5, $7, $9}' result2.txt | sort | md5sum   ## 排序生成MD5
2d32d952202fa3f1b7cc6dbf4fe1a694  -
root@DESKTOP-1N42TVH:/home/test6/test# sort bedtoolsresult.txt | md5sum  ## 排序生成MD5
2d32d952202fa3f1b7cc6dbf4fe1a694  -
复制代码

 

posted @   小鲨鱼2018  阅读(292)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
历史上的今天:
2021-07-19 c语言中打印浮点数
2021-07-19 linux系统中diff命令
点击右上角即可分享
微信分享提示