circos准备输入文件及绘图

摘录自:http://events.jianshu.io/p/7c594d01fede

记录circos输入文件的准备过程。

##染色体长度文件

perl ~/biosoft/bin/getFalen.pl -i MAH1.fa -o mac1s.lens 

##制作滑动窗

bedtools makewindows -g mac1s.lens -w 100000 >mac1s.windows #设置window为100k

##获取基因密度

#gff3文件获取基因的位置
grep '[[:blank:]]gene[[:blank:]]' mac1s.gff | cut -f 1,4,5 | awk '{print $1"\t"$2"\t"$3}'| grep chr > mac1s.genes.bed
#获取基因覆盖率
bedtools coverage -a mac1s.windows -b mac1s.genes.bed | cut -f 1-4 > mac1s.genes.100kb.bed

##染色体配置文件

cat mac1s.lens | awk '{print "chr - "$1" "$1" 0 "$2" "$1}' >mac1s.karyotype.txt

##获取GC含量

bedtools nuc -fi MAH1.fa -bed mac1s.windows | cut -f 1-3,5 | sed '1d' >mac1s.GC.100kb.bed

##准备可视化LTR相关数据

#获取Copia类型转座子位置
cat MAH1.fa.mod.pass.list | grep Copia | cut -f1 | tr : "\t" | sed 's/\.\./\t/g' >mac1s.Copia.bed
#获取Copia覆盖率
bedtools coverage -a mac1s.windows -b mac1s.Copia.bed |cut -f 1-4 >mac1s.copia.100kb.bed
#准备Gypsy类型转座子
cat MAH1.fa.mod.pass.list | grep Gypsy | cut -f1 | tr : "\t" | sed 's/\.\./\t/g' >mac1s.Gypsy.bed
#获取Gypsy覆盖率
bedtools coverage -a mac1s.windows -b mac1s.Gypsy.bed |cut -f 1-4 >mac1s.gypsy.100kb.bed

###准备LAI的文件
cat MAH1.fa.mod.out.LAI | cut -f 1,2,3,7 | sed '1,2d' >LAI_num.txt
###完整LTR的百分比(每个windowsize的完整LTR的百分比)
cat MAH1.fa.mod.out.LAI | awk '{print $1"\t"$2"\t"$3"\t"$4*100}' | sed '1,2d' >full_LTR_num.txt
###所有LTR的百分比(每个windowsize的LTR的百分比)
cat MAH1.fa.mod.out.LAI | awk '{print $1"\t"$2"\t"$3"\t"$5*100}' | sed '1,2d' >LTR_num.txt

##准备共线性文件

python -m jcvi.formats.gff bed --type=mRNA --key=Name Vvinifera_145_Genoscope.12X.gene.gff3.gz -o grape.bed
python -m jcvi.formats.fasta format Vvinifera_145_Genoscope.12X.cds.fa.gz grape.cds
python -m jcvi.compara.catalog ortholog grape grape --cscore=.99 --no_strip_names --notex
#####从共线性的基因对,提取整合成circos的bed格式。
cat banana.banana.anchors | grep -v \#| while read line;
do
        gene_array=($line)
        gene1_pos=`fgrep ${gene_array[0]} banana.bed | cut -f 1-3`
        gene2_pos=`fgrep ${gene_array[1]} banana.bed | cut -f 1-3`
        bed=${gene1_pos}" "${gene2_pos}
        echo $bed >>banana.coline_num.txt
done
###把染色体中的chr0转换成chr.(所有的用于circos画图的染色体都需要改成chr这种,否则,默认的circos不会识别chr01这种格式)
cat mac.coline_num.txt | sed 's/chr0/chr/g' >coline_num.txt
cat mac1s.karyotype.txt | sed 's/chr0/chr/g' >mac1s.karyotype.new.txt

##circos文件及环境配置

01.mac1s.karyotype.new.txt(染色体长度文件)
circos.conf(配置文件)
ticks.conf(配置文件)
02.colline_num.new.txt(共线性文件)
GC_num.txt(GC含量文件)
03.full_LTR_num.new.txt(完整LTR的百分比)
05.copia_num.txt(copia的百分比)
06.gypsy_num.txt(gypsy的百分比)
04.LTR_num.new.txt(所有LTR的百分比)
07.genes_num.txt(基因密度)
08.LAI_num.new.txt(LAI的分布)

##karyotype.txt内容

chr - Chr1 Chr1 0 27519054 Chr1
chr - Chr2 Chr2 0 37982726 Chr2
chr - Chr3 Chr3 0 37451735 Chr3
chr - Chr4 Chr4 0 28204096 Chr4
chr - Chr5 Chr5 0 45635594 Chr5
chr - Chr6 Chr6 0 32349783 Chr6
chr - Chr7 Chr7 0 36685546 Chr7
chr - Chr8 Chr8 0 31371431 Chr8
chr - Chr9 Chr9 0 35059391 Chr9
chr - Chr10 Chr10 0 43700000 Chr10
chr - Chr11 Chr11 0 39651545 Chr11

##ticks.conf内容

#刻度ticks配置文件
chromosomes_units = 1000000
show_ticks          = yes
show_tick_labels    = yes

<ticks>

radius           = 1r
color            = black
thickness        = 2p

multiplier       = 1e-6 #输出的标签为实际长度与其相乘

format           = %d # %d表示显示整数
tick_separation = 3p #管理刻度和外圈之间的距离
label_separation        = 10p
<tick>
spacing        = 1u
size           = 5p
</tick>

<tick>
spacing        = 5u
size           = 10p
show_label     = yes
label_size     = 10p
label_offset   = 10p
format         = %d
</tick>

</ticks>

##circos.conf内容

#首尾染色体之间有间隔,添加了共线性,同时调整了半径

karyotype = karyotype.txt #定义基因组的染色体大小文件
#chromosomes_color = Chr1=rdylbu-11-div-1,Chr2=rdylbu-11-div-3,Chr3=rdylbu-11-div-5,Chr4=rdylbu-11-div-7,Chr5=rdylbu-11-div-9 #定义染色体的颜色,染色体名称是karyotype.Spo.txt中第3列的名称Chr1-17
<<include ticks.conf>> #引入自定义的刻度配置文件,目录是相对路径

##共线性图
<links>

<link>
file          = coline_num.txt #共线性文件
radius        = 0.40r #外圈半径
color         = blue_a4 #默认颜色
thickness = 1p #定义线条的粗细
ribbon = yes
######rules指定线条的颜色,
<rules>
<rule>
condition = var(chr1) eq "Chr1" #共线性文件中,左边是Chr1的,指定颜色。var(chr1)这个不用修改,指的是file里左侧第一个,后面的“Chr1”根据实际的Chr来修改。
color = rdylgn-11-div-1
</rule>

<rule>
condition = var(chr1) eq "Chr2"
color = rdylgn-11-div-2
</rule>

<rule>
condition = var(chr1) eq "Chr3"
color = rdylgn-11-div-3
</rule>

<rule>
condition = var(chr1) eq "Chr4"
color = rdylgn-11-div-4
</rule>

<rule>
condition = var(chr1) eq "Chr5"
color = rdylgn-11-div-5
</rule>

<rule>
condition = var(chr1) eq "Chr6"
color = rdylgn-11-div-6
</rule>

<rule>
condition = var(chr1) eq "Chr7"
color = rdylgn-11-div-7
</rule>

<rule>
condition = var(chr1) eq "Chr8"
color = rdylgn-11-div-8
</rule>

<rule>
condition = var(chr1) eq "Chr9"
color = rdylgn-11-div-9
</rule>

<rule>
condition = var(chr1) eq "Chr10"
color = rdylgn-11-div-10
</rule>

<rule>
condition = var(chr1) eq "Chr11"
color = rdylgn-11-div-11
</rule>

<rule>
condition = var(chr1) eq "Chr12"
color = rdylbu-9-div-1
</rule>

<rule>
condition = var(chr1) eq "Chr13"
color = rdylbu-9-div-2
</rule>

<rule>
condition = var(chr1) eq "Chr14"
color = rdylbu-9-div-3
</rule>

<rule>
condition = var(chr1) eq "Chr15"
color = rdylbu-9-div-4
</rule>

<rule>
condition = var(chr1) eq "Chr16"
color = rdylbu-9-div-5
</rule>

<rule>
condition = var(chr1) eq "Chr17"
color = rdylbu-9-div-6
</rule>

</rules>

</link>
</links>



<plots>

#完整的LTR的百分比直方图
<plot>
type = histogram
file = full_LTR_num.txt
fill_color = 231,41,138 # 填充色
r1   = 0.50r
r0   = 0.41r
</plot>


#LTR的百分比直方图
<plot>
type = histogram
file = LTR_num.txt
fill_color = 254,196,79 # 填充色
r1   = 0.60r
r0   = 0.51r
</plot>



#Gypsy的热图
<plot>
type = heatmap
file = Gypsy_num.txt
color = oranges-9-seq #填充色必须是一个list,格式是 color = reds-9-seq 或者color = red,green,blue
r1   = 0.70r
r0   = 0.61r
</plot>

#Copia的热图
<plot>
type    = heatmap
file    = Copia_num.txt
color   = reds-9-seq #填充色
r1      = 0.80r
r0      = 0.71r
</plot>

##LAI的散点图 参考https://blog.csdn.net/weixin_43569478/article/details/83824929
<plot>
type = scatter #散点图
fill_color       = grey # 点的填充色
stroke_color     = black #边框的颜色
glyph            = circle
glyph_size       = 3 # 元素大小
file = LAI_num.txt
r1   = 0.90r
r0   = 0.81r

#划分3个区间,每个区间不同的颜色。用于LAI的划分
<backgrounds>
<background>
color = vvlgreen
y0 = 20
</background>
<background>
color = vlgrey
y1 = 20
y0 = 10
</background>
<background>
color = vvlred
y0 =10
</background>

</backgrounds>
##设置y轴刻度线
<axes>
<axis>
color = lgreen
thickness = 1p
spacing = 0.05r
y1 = 20
y0 = 10
</axis>
</axes>

</plot>

##基因密度直方图
<plot>
type = histogram
file = genes_num.txt
fill_color = blue # 填充色
r1   = 0.99r
r0   = 0.91r
</plot>

</plots>


<ideogram>
<spacing>
default = 0.005r

#设置染色体之间空出个距离,用来标图例a,b,c,d,e,f
<pairwise Chr17;Chr1>  #染色体名称是karyotype.Spo.txt中第3列的名称,根据你实际的物种第一条和最后一条来修改此处
spacing = 5r #设置第一个和最后一个染色体中间空出5*default(0.005r)的距离
</pairwise>

</spacing>

radius           = 0.85r #指定画图的半径(此半径和plots里的半径不一致,这个是总的0.85r,决定了整个圈图的最大的半径,plots里的0.99r半径是以这个0.85r作为100%来分配99%)
thickness        = 10p #坐标轴的染色体的厚度
fill             = yes
show_bands      = yes #设置染色体条带填充,如果不想用条带填充,只用颜色,就设置为no
fill_bands      = yes
stroke_color     = dgrey
stroke_thickness = 2p
show_label     = yes #展示label
label_font     = default # 字体
label_radius   = dims(ideogram,radius) + 0.075r #位置
label_size     = 12 # 字体大小
label_parallel = yes # 是否平行
label_format   = eval(sprintf("%s",var(chr))) # 格式

</ideogram>

<image>
# 覆盖在 etc/image.conf 中定义的 angle_offset
angle_offset* = -82  #指定旋转角度,用于插入图例的位置
dir* = . #输出文件夹
radius* = 500p #图片半径
svg* = yes #是否输出svg,默认是yes
<<include etc/image.conf>>
</image>

<<include etc/colors_fonts_patterns.conf>>
<<include etc/housekeeping.conf>>

 

posted @ 2023-02-04 11:20  pd_liu  阅读(772)  评论(0编辑  收藏  举报