使用基因型数据计算近交系数

 

001、plink --het (纯合度近交系数)

root@PC1:/home/test# ls
outcome.map  outcome.ped
root@PC1:/home/test# plink --file outcome --het 1> /dev/null
root@PC1:/home/test# ls
outcome.map  outcome.ped  plink.het  plink.log
root@PC1:/home/test# head plink.het      ## 最后一列F为近交系数
    FID     IID       O(HOM)       E(HOM)        N(NM)            F
   DOR1    DOR1        28943    2.669e+04        42629       0.1412
   DOR2    DOR2        29737    2.669e+04        42629       0.1911
   DOR3    DOR3        28395    2.669e+04        42629       0.1068
   DOR4    DOR4        28430    2.669e+04        42629        0.109
   DOR5    DOR5        29235    2.669e+04        42629       0.1596
   DOR6    DOR6        28082    2.669e+04        42629       0.0872
   DOR7    DOR7        27779    2.669e+04        42629      0.06819
   DOR8    DOR8        29014    2.669e+04        42629       0.1457
   DOR9    DOR9        28135    2.669e+04        42629      0.09053

 

002、ROH 近交系数

root@PC1:/home/test# ls
outcome.map  outcome.ped                                                        ## plink检测ROH
root@PC1:/home/test# plink --file outcome --homozyg-snp 30 --homozyg-kb 1000 --homozyg-density 1000 --homozyg-gap 1000 --homozyg-window-snp 50 --homozyg-window-het 1 --homozyg-window-missing 1 --out roh 1> /dev/null
root@PC1:/home/test# ls
outcome.map  outcome.ped  roh.hom  roh.hom.indiv  roh.hom.summary  roh.log                                 ## 统计染色体的长度
root@PC1:/home/test# cut -f 1 outcome.map | sort -k 1n,1n| uniq | while read i; do grep -w "^$i" outcome.map | sort -k 4rn,4rn | head -n 1 >>chr_len.txt; done
root@PC1:/home/test# ls
chr_len.txt  outcome.map  outcome.ped  roh.hom  roh.hom.indiv  roh.hom.summary  roh.log
root@PC1:/home/test# geno_len=$(awk '{sum += $4} END {print sum / 1000}' chr_len.txt )  ## 计算基因组的长度                                           
root@PC1:/home/test# awk -v a=$geno_len '{print $2, $(NF - 1)/a}' roh.hom.indiv | head ## 计算基因组近交系数 IID 0 DOR1 0.141861 DOR2 0.193722 DOR3 0.115116 DOR4 0.114285 DOR5 0.162598 DOR6 0.0992099 DOR7 0.0779776 DOR8 0.14785 DOR9 0.0968709

 

003、gcta计算近交系数

root@PC1:/home/test# ls
outcome.map  outcome.ped
root@PC1:/home/test# plink --file outcome --make-bed 1> /dev/null
root@PC1:/home/test# ls
outcome.map  outcome.ped  plink.bed  plink.bim  plink.fam  plink.log
root@PC1:/home/test# /home/software/gcta_v1.94.0Beta_linux_kernel_3_x86_64/gcta_v1.94.0Beta_linux_kernel_3_x86_64_static --bfile plink --ibc --out result 1> /dev/null
root@PC1:/home/test# ls
outcome.map  outcome.ped  plink.bed  plink.bim  plink.fam  plink.log  result.ibc  result.log
root@PC1:/home/test# head result.ibc
FID     IID     NOMISS  Fhat1   Fhat2   Fhat3
DOR1    DOR1    42629   0.0462481       0.154288        0.100268
DOR2    DOR2    42629   0.0309685       0.217734        0.124351
DOR3    DOR3    42629   0.0261114       0.114167        0.0701392
DOR4    DOR4    42629   -0.0207756      0.130706        0.0549651
DOR5    DOR5    42629   -0.0187196      0.189537        0.0854086
DOR6    DOR6    42629   -0.0154794      0.106942        0.0457311
DOR7    DOR7    42629   -0.0635135      0.0992687       0.0178776
DOR8    DOR8    42629   -0.0352431      0.177956        0.0713562
DOR9    DOR9    42629   0.0317887       0.0974181       0.0646034

 

posted @ 2022-07-29 23:49  小鲨鱼2018  阅读(1063)  评论(0编辑  收藏  举报