Chipseq数据库的建立

这里以小鼠为例子下载相应的注释文件,基因组版本为mm10

less -S gene_info.gz | awk -F "\t" '{if($1=="10090") print $2"\t"$3"\t"$5}' >../mm10.ID

less -S refGene.txt.g |awk -F "\t" '{print $2"\t"$13}' >mm10.name

perl gene.pl ../mm10.ID mm10.name mm10-id-gene

perl turn.pl mm10-id-gene refGene.txt.gz Gene.bed.2
less Gene.bed.2 |sort -k 1,1 -k 2,2n > Gene.bed
rm Gene.bed.2

perl Gene2exon_intron.pl refGene.txt.gz
sort -k 1,1 -k 2,2n exon.bed>Exon.bed
sort -k 1,1 -k 2,2n intron.bed>Intron.bed
rm exon.bed intron.bed

perl get4Intergenic.pl Gene.bed 2000
  • GO and KEGG
less -S /ldfssz1/ST_BIGDATA/USER/yueyao/12.Pro/04.RNASeq/gene2go.gz |grep "^10090" >mm10.go
perl /ifs4/BC_PUB/biosoft/pipeline/RNA/RNA_RNAdenovo/RNA_RNAdenovo_2016a/Annotation/annot2goa.pl /ifs4/BC_PUB/biosoft/db/Pub/go/RNA/20171220/gene_ontology.1_2.obo mm10.annot /ldfssz1/ST_BIGDATA/USER/yueyao/16.Pipeline/chipseq_test/mmdatabase/GO/mm10
perl dealGOObo.pl -go /ifs4/BC_PUB/biosoft/db/Pub/go/RNA/20171220/gene_ontology.1_2.obo -prefix go
les /ifs4/BC_PUB/biosoft/db/Pub/kegg/RNA/84.0/animal.id.annot.xls |grep ^mmu >mm10.kegg
les mm10.kegg |perl -ne '$line=$_;$id=(split)[0];$gene=(split/:/,$id)[1];if($line=~/(K\d{4})/g){print "$gene\t$1\n"}' >mm10.ko
posted @ 2019-05-24 13:52  raisok  阅读(594)  评论(0编辑  收藏  举报