linux shell实现统计 位点缺失率

 

1、脚本

复制代码
[root@centos79 test]# cat test.sh
#!/bin/bash

#step1 check ped file
uniqn=$(sed 's/\r//g' $1 | cut -d " " -f 7- | sed 's/ /\n/g' | sort -u | wc -l)
if [ $uniqn -gt 5 ]
then
echo "error, exception nucleotide: "
sed 's/^M//g' $1 | cut -d " " -f 7- | sed 's/ /\n/g' | sort -u | grep -v [ATGC0]
exit
fi


temp1=$(head -n 1 $1 | awk '{print NF}')
for i in $(seq `sed -n "$=" $1`)
do
temp2=$(sed -n "$i"p $1 | awk '{print NF}')
if [ $temp2 -ne $temp1 ]
then
echo "inconsistent number of columns!"
echo -e "colun1 1: $temp1\ncolume $i: $temp2"
exit
fi
done
echo "step 1 done!"


#step2 check consistence of ped and map file

mapline=$(sed -n "$=" $2)
locinum=$(head -n 1 $1 | awk '{print (NF - 6)/2}')

if [ $mapline -ne $locinum ]
then
echo "error, map file and ped file do not match!"
echo -e "mapline: $mapline\nlocinum: $locinum"
exit
fi
echo "step 2 done!"

#step 3 statistics
lines=$(sed -n "$=" $1)
for i in $(seq `head -n 1 $1 | awk '{print (NF - 6)/2}'`); do awk -v a=$i '{print $(a * 2 + 5), $(a * 2 + 6)}' $1 | awk -v a=$lines -v RS="@#$j" '{print gsub(/0/,"&")/(a*2)}' >> missresult1; done
echo "step 3 done!"

#step 4 add loci and headline
cut -f 1-2,4 $2 | paste - missresult1 | sed '1i chr\tsnp\tpos\tmissing_rate' > missresult.txt
rm -f  missresult1
echo "fineshed!"
复制代码

 

2、测试数据及用法

复制代码
[root@centos79 test]# ls
outcome.map  outcome.ped  test  test.sh
[root@centos79 test]# cat outcome.map
1       snp1    0       55910
1       snp2    0       85204
1       snp3    0       122948
1       snp4    0       203750
1       snp5    0       312707
1       snp6    0       356863
1       snp7    0       400518
1       snp8    0       487423
[root@centos79 test]# cat outcome.ped
DOR 1 0 0 0 -9 G G 0 0 0 0 0 0 A A T T G G 0 0
DOR 2 0 0 0 -9 0 0 G G 0 0 0 0 A A T T A G 0 0
DOR 3 0 0 0 -9 0 0 C C 0 0 G G G G T T A G 0 0
DOR 4 0 0 0 -9 0 0 C C 0 0 G G G G A A G G G G
DOR 5 0 0 0 -9 G G C C 0 0 G G G G A A A G G C
DOR 6 0 0 0 -9 G G C C 0 0 G G G G A A A A C C
[root@centos79 test]# bash test.sh outcome.ped outcome.map
step 1 done!
step 2 done!
step 3 done!
fineshed!
[root@centos79 test]# ls
missresult.txt  outcome.map  outcome.ped  test  test.sh
[root@centos79 test]# cat missresult.txt
chr     snp     pos     missing_rate
1       snp1    55910   0.5
1       snp2    85204   0.166667
1       snp3    122948  1
1       snp4    203750  0.333333
1       snp5    312707  0
1       snp6    356863  0
1       snp7    400518  0
1       snp8    487423  0.5
复制代码

 

3、plink软件验证

复制代码
[root@centos79 test]# ls
missresult.txt  outcome.map  outcome.ped  test  test.sh
[root@centos79 test]# plink --file outcome --missing --out temp > /dev/null; rm *.log *.nosex
[root@centos79 test]# ls
missresult.txt  outcome.map  outcome.ped  temp.imiss  temp.lmiss  test  test.sh
[root@centos79 test]# cat  temp.lmiss
 CHR  SNP   N_MISS   N_GENO   F_MISS
   1 snp1        3        6      0.5
   1 snp2        1        6   0.1667
   1 snp3        6        6        1
   1 snp4        2        6   0.3333
   1 snp5        0        6        0
   1 snp6        0        6        0
   1 snp7        0        6        0
   1 snp8        3        6      0.5
[root@centos79 test]# cat missresult.txt
chr     snp     pos     missing_rate
1       snp1    55910   0.5
1       snp2    85204   0.166667
1       snp3    122948  1
1       snp4    203750  0.333333
1       snp5    312707  0
1       snp6    356863  0
1       snp7    400518  0
1       snp8    487423  0.5
复制代码

 

posted @   小鲨鱼2018  阅读(103)  评论(0编辑  收藏  举报
编辑推荐:
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
历史上的今天:
2020-10-31 linux系统 服务的访问控制列表
2020-10-31 linux系统中firewalld防火墙管理工具firewall-config(GUI图形用户界面)
点击右上角即可分享
微信分享提示