linux shell 实现 plink --recode A 命令

1、

复制代码
cut -d " " -f 7- $1 > nuc.ped


awk '{for(i = 1; i <= NF; i = i + 2) {printf("%s ", $i)} {printf("\n")}}' nuc.ped > top.ped

awk '{for(i = 2; i <= NF; i = i + 2) {printf("%s ", $i)} {printf("\n")}}' nuc.ped > bottom.ped

cat top.ped bottom.ped > onecol.ped; rm top.ped bottom.ped

for i in $(seq `head -n 1 onecol.ped | awk '{print NF}'`); do cut -d " " -f $i onecol.ped | sort | uniq -c | sort -n | head -n 1 | awk '{print $2}' >> min_allele.txt;done
rm -f onecol.ped

awk '{for(i = 1; i <= NF; i++) if(i % 2 != 0) {printf("%s_", $i)} else {printf("%s ", $i)} {printf("\n")}}' nuc.ped > a && mv a nuc.ped

k=0;for i in `cat min_allele.txt`; do let k++;cut -d " " -f $k nuc.ped > tempx; for j in `cat tempx`; do echo $j | grep -o $i | wc -l >> tempresult; done; done
rm -f tempx

ind=$(sed -n "$=" nuc.ped )
awk -v a=$ind '{if(NR % a == 0) {printf("%s\n", $0)} else {printf("%s ", $0)}}' tempresult > a && mv a tempresult


for i in $(seq `head -n 1 tempresult | awk '{print NF}'`); do cut -d " " -f $i tempresult | paste -s -d " " >> rrr; done

mv rrr tempresult

cut -f 2 $2 | paste - -d "_" min_allele.txt | paste -s -d " " | cat - tempresult > a && mv a tempresult

cut -d " " -f 1-6 $1 | sed "1i FID IID PAT MAT SEX PHENOTYPE" | paste - -d " " tempresult > result.paw
rm -f tempresult min_allele.txt nuc.ped
复制代码

用法:

复制代码
root@PC1:/home/test/test/test2# ls
outcome.map  outcome.ped  record.r
root@PC1:/home/test/test/test2# bash record.r outcome.ped outcome.map
root@PC1:/home/test/test/test2# ls
outcome.map  outcome.ped  record.r  result.paw
root@PC1:/home/test/test/test2# cat result.paw
FID IID PAT MAT SEX PHENOTYPE snp1_C snp2_G snp3_T snp4_A snp5_A snp6_G
DOR 1 0 0 0 -9 2 0 2 0 1 2
DOR 2 0 0 0 -9 1 1 0 0 0 2
DOR 3 0 0 0 -9 0 0 0 0 0 0
DOR 4 0 0 0 -9 0 0 0 0 0 0
DOR 5 0 0 0 -9 0 0 0 0 0 0
DOR 6 0 0 0 -9 0 0 0 0 0 0
DOR 7 0 0 0 -9 0 0 0 1 2 0
DOR 9 0 0 0 -9 0 0 0 1 2 0
root@PC1:/home/test/test/test2# cat outcome.ped
DOR 1 0 0 0 -9 C C C C T T G G A G G G
DOR 2 0 0 0 -9 C G G C G G G G G G G G
DOR 3 0 0 0 -9 G G C C G G G G G G A A
DOR 4 0 0 0 -9 G G C C G G G G G G A A
DOR 5 0 0 0 -9 G G C C G G G G G G A A
DOR 6 0 0 0 -9 G G C C G G G G G G A A
DOR 7 0 0 0 -9 G G C C G G A G A A A A
DOR 9 0 0 0 -9 G G C C G G A G A A A A
复制代码

 

2、plink软件验证

复制代码
root@PC1:/home/test/test/test2# ls
outcome.map  outcome.ped  record.r
root@PC1:/home/test/test/test2# bash record.r outcome.ped outcome.map
root@PC1:/home/test/test/test2# ls
outcome.map  outcome.ped  record.r  result.paw
root@PC1:/home/test/test/test2# plink --file outcome --recode A --out temp > /dev/null; rm *log *.nosex
root@PC1:/home/test/test/test2# ls
outcome.map  outcome.ped  record.r  result.paw  temp.raw
root@PC1:/home/test/test/test2# cat temp.raw
FID IID PAT MAT SEX PHENOTYPE snp1_C snp2_G snp3_T snp4_A snp5_A snp6_G
DOR 1 0 0 0 -9 2 0 2 0 1 2
DOR 2 0 0 0 -9 1 1 0 0 0 2
DOR 3 0 0 0 -9 0 0 0 0 0 0
DOR 4 0 0 0 -9 0 0 0 0 0 0
DOR 5 0 0 0 -9 0 0 0 0 0 0
DOR 6 0 0 0 -9 0 0 0 0 0 0
DOR 7 0 0 0 -9 0 0 0 1 2 0
DOR 9 0 0 0 -9 0 0 0 1 2 0
root@PC1:/home/test/test/test2# cat result.paw
FID IID PAT MAT SEX PHENOTYPE snp1_C snp2_G snp3_T snp4_A snp5_A snp6_G
DOR 1 0 0 0 -9 2 0 2 0 1 2
DOR 2 0 0 0 -9 1 1 0 0 0 2
DOR 3 0 0 0 -9 0 0 0 0 0 0
DOR 4 0 0 0 -9 0 0 0 0 0 0
DOR 5 0 0 0 -9 0 0 0 0 0 0
DOR 6 0 0 0 -9 0 0 0 0 0 0
DOR 7 0 0 0 -9 0 0 0 1 2 0
DOR 9 0 0 0 -9 0 0 0 1 2 0
root@PC1:/home/test/test/test2# md5sum result.paw temp.raw
563fbde796e2d64dfc9c4570e71a925f  result.paw
563fbde796e2d64dfc9c4570e71a925f  temp.raw
复制代码

 

posted @   小鲨鱼2018  阅读(436)  评论(0编辑  收藏  举报
编辑推荐:
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
历史上的今天:
2020-11-02 网卡是什么?
点击右上角即可分享
微信分享提示