发表级ggplot绘图流程和技巧 | 论文发表
2024年08月27日
好用的color theme:https://github.com/nanxstats/ggsci
2023年10月18日
barplot截取y轴,非常规操作。
You could try, with library(scales):
+ scale_y_continuous(limits=c(2000,2500),oob = rescale_none)
2023年09月20日
Seurat的violin plot迅速加mean
1 2 3 4 5 6 7 8 9 10 11 12 | library(ggpubr) median.stat <- function (x){ out <- quantile(x, probs = c(0.5)) names(out) <- c( "ymed" ) return (out) } # single plot VlnPlot(clustered_colon, features = c( "EZH2" )) + stat_summary(fun.y = median.stat, geom= 'point' , size = 5, colour = "black" , shape = 21) # or for multiple plots VlnPlot(clustered_colon, features = c( "EZH2" )) & stat_summary(fun.y = median.stat, geom= 'point' , size = 5, colour = "black" , shape = 21) |
1 2 3 4 5 | options (repr.plot.width=8, repr.plot.height=5) VlnPlot (GC.IM.seuset, features = c ( "JARID2_imputation" , "EZH2_imputation" , "SOX9_imputation" ), group.by = "source" ) & stat_summary (fun.y = median.stat, geom= 'point' , size = 5, colour = "black" , shape = 21) & stat_compare_means (comparisons = list ( c ( "Normal" , "Cancer" )), label.y = 0.20, label = "p.signif" , method = "wilcox.test" ) |
2023年05月05日
最常用的themes
1 2 | annotate ( "text" , label = NES, parse = T, x = 10000, y = 0.2, size = 6, colour = "black" , fontface = "plain" ) |
1 2 3 4 5 6 7 8 9 10 11 | options(repr.plot.width=4, repr.plot.height=4) labs(x = "Log2 fold change" ,y = "-Log10(P-value)" , title = my_title) + # geom_hline(aes(yintercept=1), colour="grey50", linetype="dashed", size=0.2) + # geom_vline(aes(xintercept=0.5), colour="red", linetype="dashed", size=0.2) + scale_color_manual(values=c( "#619CFF" , "lightgrey" , "#F8766D" )) + scale_fill_manual(values=c( "blue" , "lightgrey" , "red" )) + scale_y_continuous(position= "left" , breaks = seq (0, 15, by = 5), limits=c(0, 15)) + geom_text_repel(data=mark_data, aes(label=gene), size=5, fontface= "italic" , arrow = arrow(ends= "first" , length = unit(0.01, "npc" )), box.padding = 0.2, point.padding = 0.3, segment.color = 'black' , segment.size = 0.3, force = 1, max.iter = 3e3) + annotate( "text" , label = "1509 DOWN" , x = -3.5, y = 14, size = 4, colour = "#619CFF" , fontface = "bold" ) |
1 2 3 4 5 6 7 8 9 | cns_theme <- theme_classic() + # theme_bw() + # theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), # panel.border = element_rect(size=0.8, colour = "black")) + theme(axis.title =element_text(size = 13, color = "black" , face = "bold" ), axis.text =element_text(size = 13, color = "black" , face = "bold" ), plot.title =element_text(hjust = 0.5, size = 16, color = "black" , face = "bold" )) + # theme(legend.title=element_blank(), legend.key.size = unit(0.8, 'lines')) + theme(legend.position = "none" ) |
2022年09月01日
现在基本的图形已经得心应手了。
问题是如何配色,使图形更加fancy,组合图如何美观统一。【向CNS靠齐,最少也要以CNS子刊的标准来出图】
参考:
- 要想文章好,图片少不了,22个技能助您获得美图(带字幕视频) - AI教程 【字体线条一旦不统一,出图将会非常丑】
我的私人库:https://github.com/leezx/FigureCode
The R Graph Gallery - R绘图代码库、案例大全
themes - https://www.r-graph-gallery.com/ggplot2-package.html#themes
要开始修饰以前的核心图片,准备发表论文了。
把之前比较raw的图修饰格式,统一生成高清晰图片,准备放入paper中。
会慢慢补充所有常见的绘图代码。
一个raw image的代码
1 2 3 4 5 | p1 <- ggplot(oxidation. df , aes(x=group, y=score, color=group)) + geom_boxplot() + geom_jitter(shape=16, position=position_jitter(0.2)) + labs(title = "Fatty acid metabolism" ) p1 |
第一步:修改df里的标签
1 2 3 | oxidation.df$group <- plyr:: mapvalues (oxidation.df$group, from = c ( "GFP- early" , "GFP+ early" , "GFP- late" , "GFP+ late" ), to = c ( "HhOFF early" , "HhON early" , "HhOFF late" , "HhON late" )) |
第二步:修改标签顺序
1 | oxidation.df$group <- factor (oxidation.df$group, levels = c ( "HhOFF early" , "HhON early" , "HhOFF late" , "HhON late" )) |
第三步:精修格式主题字体
主题
常用的主题:https://www.r-graph-gallery.com/ggplot2-package.html#themes
- theme_bw - 去掉了灰白背景,加了边框,最常用
- theme_classic - 只留下了加粗的左下边框,最经典,适合实验图
- egg::theme_article - 只有四周的边框,最适合发表文章,缺点:图例间隔太小
- theme_minimal - 只留下了grid,没有边框
- theme_minimal_hgrid - 只留下了hgrid
- theme_void - 只留下了图例,适合tSNE图
1 | theme_bw () |
1 | theme_void () |
1 2 | # remove grid theme (panel.grid.major = element_blank (), panel.grid.minor = element_blank ()) |
修改title
1 | labs (x = "" , y = "Pathway score\n" , title = "Fatty acid metabolism" ) |
xy轴标签字体大小
1 2 3 | theme (axis.text.x = element_text (face= "plain" , angle=30, size = 14, color = "black" , vjust=0.6), axis.text.y = element_text (size = 10), axis.title.y = element_text (size = 14)) |
去掉多余的图例
1 | theme (legend.position = "none" ) |
填充颜色
1 | library (RColorBrewer) |
1 2 | scale_fill_manual (values= brewer.pal (9, "Paired" )) scale_color_manual (values= brewer.pal (9, "Paired" )[ c (3,4,5,6)]) |
其他细节
限制xy坐标范围
1 2 | scale_x_continuous (limits = c (0,2.5)) scale_y_continuous (limits = c (0,2.5)) |
一组数据的比较
1 2 | library (ggpubr) stat_compare_means (label.y = 2.3, label.x = 1, size=5) |
多组数据的比较
1 2 3 4 5 6 | library (ggpubr) my_comparisons <- list ( c ( "HhOFF early" , "HhON early" ), c ( "HhOFF late" , "HhON late" )) stat_compare_means (method = "anova" , label.y = 1.29) + # global stat_compare_means (comparisons = my_comparisons, label.y = 1, label = "p.signif" ) + # paired scale_y_continuous (limits = c (-0.52, 1.3)) |
代码汇总
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | tmp$group <- plyr:: mapvalues (tmp$group, from = c ( "GFP- early" , "GFP+ early" , "GFP- late" , "GFP+ late" ), to = c ( "HhOFF early" , "HhON early" , "HhOFF late" , "HhON late" )) tmp$group <- factor (tmp$group, levels = c ( "HhOFF early" , "HhON early" , "HhOFF late" , "HhON late" )) library (ggpubr) my_comparisons <- list ( c ( "HhOFF early" , "HhON early" ), c ( "HhOFF late" , "HhON late" )) options (repr.plot.width=4, repr.plot.height=4) p1 <- ggplot (tmp, aes (x=group, y=score, color=group)) + geom_boxplot () + theme_bw () + labs (x = "" , y = "Pathway score\n" , title = "Fatty acid metabolism" ) + geom_jitter (shape=16, position= position_jitter (0.2)) + theme (legend.position = "none" ) + theme (axis.text.x = element_text (face= "plain" , angle=30, size = 14, color = "black" , vjust=0.6), axis.text.y = element_text (size = 10), axis.title.y = element_text (size = 14)) + # scale_fill_manual(values=brewer.pal(9,"Paired")) scale_color_manual (values= brewer.pal (9, "Paired" )[ c (3,4,5,6)]) + stat_compare_means (method = "anova" , label.y = 1.29) + # global stat_compare_means (comparisons = my_comparisons, label.y = 1, label = "p.signif" ) + # paired scale_y_continuous (limits = c (-0.52, 1.3)) p1 |
多图拼接
1 2 | options (repr.plot.width=8, repr.plot.height=9) cowplot:: plot_grid (p1,p2,p3,p4,ncol = 2) |
PDF出图
1 | ggsave (filename = "HhOFF HhON metabolic pathways.pdf" , width = 8, height = 9) |
有些图不能这么保存,比如heatmap,这时就要用到pdf函数
1 2 3 4 | # traditional save pdf ( "manuscript/HSCR.cluster.heatmap.pdf" , width=8, height=7) p dev.off () |
lnkscape里修改文字【对齐,上下标、斜体等等】
结果
OK, 一个准发表级的图就制作好了,可能还需要精修。
其余细节
点的类型
1 2 | # change the border of point geom_point (shape = 21, colour = "black" , fill = "white" , size = 5, stroke = 5) |
把点拟合成线
1 | stat_smooth (method = "loess" , size = 1.1, se = F, span = 0.2) |
在散点图边缘显示其分布,需要一个额外的包ggExtra,非常简单
1 | ggMarginal (p, type= "histogram" , fill= "black" , color= NA , size=7, xparams = list (bins=200), yparams = list (bins=200)) |
散点图显示mean点
1 | stat_summary (fun.y=mean, geom= "point" , shape=20, size=7, color= "black" , fill= "black" ) + |
线
直线
1 | geom_hline (yintercept=20, linetype= "dashed" , color = "red" , size=2) |
线段
1 | geom_segment (data=seg_1, aes (x=x,y=y,xend=xend,yend=yend), arrow= arrow (length= unit (0.3, "cm" )), size=1) |
矩形
1 | geom_rect (data=AS_1, aes (xmin=EXONSTART, xmax=EXONEND,ymin= -0.1,ymax=0.1),fill= "#4DAF4A" ) |
图例,比如改legend title,改点大小,去掉图例
1 | labs (x = "\nTranscriptional level" ,y = "\nPost-transcriptional level" , title = "" , color = "Clinical score" ) |
1 2 | # change legend dot size guides (colour = guide_legend (override.aes = list (size=10))) |
1 2 | # ggplot remove legend title theme (legend.title = element_blank ()) |
1 2 | # position theme (legend.text = element_text (size = 12), legend.position = c (0.8, 0.75)) |
1 2 | # remove legend background theme (legend.background= element_blank ()) |
去掉legend的白色背景布,一步到位
1 2 | theme (legend.title = element_blank (), legend.text = element_text (size = 11), legend.position = c (0.85, 0.15), legend.background = element_blank ()) |
1 2 | # 改变图例元素的顺序 scale_fill_manual (values= brewer.pal (3, "Set1" )[ c (3,2,1)], breaks= c ( "H3K9Ac" , "H4K8Ac" , "H3K27Ac" )) |
标题格式,比如居中
1 | theme (plot.title = element_text (hjust = 0.5, size = 18)) |
去掉边框,轴线,刻度;去掉右上边框
1 2 3 4 5 | # empty border, ticks, text theme (panel.border = element_blank (), panel.grid.major = element_blank (), panel.grid.minor = element_blank (), axis.line = element_blank ()) + labs (x = "" ,y = "" , title = "" ) + theme (axis.title= element_blank (), axis.text= element_blank (), axis.ticks= element_blank ()) |
1 2 3 | # remove top and right border theme (axis.line = element_line (colour = "black" ), panel.grid.major = element_blank (), panel.grid.minor = element_blank (), panel.border = element_blank (), panel.background = element_blank ()) |
坐标轴,比如修改起点,范围
1 2 | # force y start from 0 scale_y_continuous (expand = c (0, 0), limits = c (0, NA )) |
去掉画布中的网格线条
1 2 | # just remove inside grid theme (panel.grid.major = element_blank (), panel.grid.minor = element_blank ()) |
添加文本
1 2 | # add text annotate ( "text" , label = "Wilcoxon test\nP-value = 1.48e-12" , x = 0.5, y =2, size = 6, colour = "black" ) |
添加背景色
1 2 | # add background color to mark different region geom_rect (xmin=0, xmax=2.5, ymin=-2, ymax=-1, fill= "#4DAF4A" , alpha=1, color= NA ) |
修改填充颜色
1 2 | # color scale_color_manual (values= brewer.pal (9, "Set1" )[ c (1:5,7:9)]) |
精准控制圈图的两种alpha,比如venn图
1 2 | scale_color_manual (values = sample.colors) + scale_fill_manual (values = alpha (sample.colors, .2)) |
快速统计分析
1 2 3 4 | # quick statistic testing # Wilcoxon test wilcox.test ( subset (time.df,GeneSet== "Common risk" )$Time, subset (time.df,GeneSet== "L-HSCR specific" )$Time, alternative = "two.sided" ) |
# packages
# significance
https://github.com/const-ae/ggsignif
数据的基本处理
分组计算,如取mean,单列
1 2 3 | # quick data process # get group mean weather %>% group_by (city) %>% summarise (mean_temperature = mean (temperature)) |
分组取mean,多列
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | d <- read.table (text= 'Name Month Rate1 Rate2 Aira 1 12 23 Aira 2 18 73 Aira 3 19 45 Ben 1 53 19 Ben 2 22 87 Ben 3 19 45 Cat 1 22 87 Cat 2 67 43 Cat 3 45 32', header= TRUE ) aggregate (d[, 3:4], list (d$Name), mean) Group.1 Rate1 Rate2 1 Aira 16.33333 47.00000 2 Ben 31.33333 50.33333 3 Cat 44.66667 54.00000 |
小数点保留,科学计数法
1 2 3 4 | # format decimals formatC (0.46, format = "e" , digits = 1) library (scales) scientific (0.46, digits = 2) |
查看默认的颜色 - 画图的结果数据
1 2 3 | # see the colors in ggplot # To see what colors are used to make your plot you can use function ggplot_build() and then look at data part of this object (in column colour are codes). ggplot_build (p)$data |
其他图种
柱状图barplot
改bar的顺序,以及添加边框
1 | geom_bar (stat= "identity" , alpha=1, position = position_fill (reverse = TRUE ), color = "grey50" ) + # position="fill", |
配对的箱线图、柱状图、折线图 - 用于比较case和control
data
aes里加一个fill就可以把x轴分组(颜色填充默认就会分组),非常适合case和control的比较。
这里想加点需要用另一个函数geom_dotplot
1 2 3 4 5 | lineage lineage.sub stage S.Score G2M.Score cc.score <chr> <fct> <chr> <dbl> <dbl> <dbl> ctrl_AAACCTGAGACATAAC NP NPlate Control -0.8162696 -0.98076576 -0.8162696 ctrl_AAACCTGCAAGTAATG BP BP Control 0.3118349 -0.05584626 0.3118349 ctrl_AAACCTGCATGCTAGT GP GP Control 0.4443853 0.27702244 0.4443853 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | # http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/ library (ggpubr) options (repr.plot.width=5, repr.plot.height=4) p <- ggplot (cc.df, aes (x=lineage.sub, y=cc.score, fill=stage)) + geom_boxplot (position= position_dodge (1)) + geom_dotplot (binaxis= 'y' , stackdir= 'center' , position = "dodge" , dotsize=0.15, binwidth=1/25, binpositions= "all" ) + theme_bw () + labs (x = "" , y = "Proliferation score\n" , title = "" ) + theme (axis.text.x = element_text (face= "plain" , angle=0, size = 14, color = "black" , vjust=0.6), axis.text.y = element_text (size = 10), axis.title.y = element_text (size = 14)) + scale_fill_manual (values= c ( "blue" , "red" )) + stat_compare_means ( aes (group = stage), label = "p.signif" , label.y = 4) + theme (legend.title = element_blank ()) p |
封装好的函数
1 2 3 4 5 6 7 8 | ggbarplot (ToothGrowth, x = "dose" , y = "len" , add = "mean_se" , color = "supp" , palette = "jco" , position = position_dodge (0.8))+ stat_compare_means ( aes (group = supp), label = "p.signif" , label.y = 29) ggline (ToothGrowth, x = "dose" , y = "len" , add = "mean_se" , color = "supp" , palette = "jco" )+ stat_compare_means ( aes (group = supp), label = "p.signif" , label.y = c (16, 25, 29)) |
热图 - 最直观
# heatmap
https://jokergoo.github.io/ComplexHeatmap-reference/book/
热图骚操作
平滑热图 - smooth heatmap
monocle里面的一种热图,很多顶刊都在用,也确实很漂亮。对应函数:plot_pseudotime_heatmap
问题是不够灵活,需要用monocle处理后才行,需要自定义一个处理函数。
小提琴图marker - 分布
stacked violin plot for visualizing single-cell data in Seurat
参见:mouse/singleCell/case/Kif7_ENCC/Kif7-integration/integration_public_and_Kif7.ipynb
Venn韦恩图/UpSetR图 - 交集
Beeswarm Plot 蜂群图 - 序列数据展开
https://github.com/eclarke/ggbeeswarm
1 2 3 4 | #With different beeswarm point distribution priority dat <- data.frame (x= rep (1:3, c (20,40,80))) dat$y <- rnorm ( nrow (dat),dat$x) dat$z <- 1 |
1 2 3 4 | ggplot (dat, aes (z,y)) + geom_beeswarm (size=2,priority= 'descending' , cex=3) + ggtitle ( 'Descending' ) + scale_x_continuous (expand= expansion (add= c (0.5, 0.5))) |
我的代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | set.seed (49) library (ggplot2) library (ggbeeswarm) pca_HSCR2$z <- 1 pca_HSCR2$pseudotime <- -pca_HSCR2$X2 options (repr.plot.width=6, repr.plot.height=4) ggplot (pca_HSCR2, aes (x=z, y=pseudotime, fill=severity, color=severity)) + geom_beeswarm (size=1.2,priority= 'ascending' , cex=1.4) + # ggtitle('ascending') + # Descending scale_x_continuous (expand= expansion (add= c (0.5, 0.5))) + coord_flip () + theme_void () + scale_color_manual (values=severity.colors) |
基因模块在pseudotime表达的line图
参见:mouse/singleCell/case/Kif7_ENCC/Kif7/Kif7_basic_analysis.ipynb
火山图/对角线图 - 特殊散点图
参考:mouse/singleCell/case/Kif7_ENCC/Kif7-integration/Ezh2_analysis.ipynb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | # prepare data log2FC <- data.frame (gene= rownames (HSCR.DEG.log2FC.df.final), S_log2FC=S.log2FC, L_log2FC=L.log2FC) # add color label log2FC$color <- "none" log2FC[log2FC$gene % in % c ( "HDAC1" ),]$color <- "red" # the genes want to be labeled label.genes <- c ( 'RAMP2' , 'HEY1' , 'STAMBP' , 'CCNB1IP1' , 'LMOD3' , 'NUP107' , 'HEY2' , 'FOXO1' , 'CRLF1' , 'ZFP36L2' , 'NR2F2' , 'TUBB3' , 'ZNF385A' , 'TMEM14C' , 'FLNA' , 'TFAP2A' , 'SOX11' , 'HDAC1' , 'GLI3' , 'BCL11A' ) label.df <- subset (log2FC, gene % in % label.genes) options (repr.plot.width=4.5, repr.plot.height=5) library (ggplot2) library ( "ggrepel" ) # Basic scatter plot ggplot (log2FC, aes (x=S_log2FC, y=L_log2FC, color=color)) + # , color=coregene geom_hline (yintercept=0) + geom_vline (xintercept=0) + geom_abline (intercept = 0, slope = 1, color= "black" , linetype= "dashed" , size=1) + geom_point (size=0.5) + geom_point (data = label.df, size=2, color = "red" ) + theme_bw () + labs (x = "\nLog2FC in S-HSCR" ,y = "Log2FC in L-HSCR" , title = "" ) + theme (legend.title= element_blank ()) + # Change fontface. Allowed values : 1(normal), 2(bold), 3(italic), 4(bold.italic) geom_text_repel (data=label.df, aes (label = gene), size = 3.5, fontface=3, color= "red" , box.padding = 0.4, max.overlaps = Inf ) + theme (legend.position = "none" , axis.text = element_text (size = 10), # axis.text.y = element_text(size = 10), axis.title = element_text (size = 16, face= "plain" )) + scale_x_continuous (limits = c (-8, 8)) + scale_y_continuous (limits = c (-8, 8)) + scale_color_manual (values= c ( "grey" , "red" )) |
点的文本标记
geom_text_repel,基本用法
进阶篇 - 风格统一
为什么顶刊的图那么的赏心悦目?而自己的图拼到一起却那么的不和谐,都被自己丑哭了,却不知从何下手。
这里有几个教程还不错:
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· .NET10 - 预览版1新功能体验(一)