发表级ggplot绘图流程和技巧 | 论文发表
2024年08月27日
好用的color theme:https://github.com/nanxstats/ggsci
2023年10月18日
barplot截取y轴,非常规操作。
You could try, with library(scales):
+ scale_y_continuous(limits=c(2000,2500),oob = rescale_none)
2023年09月20日
Seurat的violin plot迅速加mean
library(ggpubr) median.stat <- function(x){ out <- quantile(x, probs = c(0.5)) names(out) <- c("ymed") return(out) } # single plot VlnPlot(clustered_colon, features = c("EZH2")) + stat_summary(fun.y = median.stat, geom='point', size = 5, colour = "black", shape = 21) # or for multiple plots VlnPlot(clustered_colon, features = c("EZH2")) & stat_summary(fun.y = median.stat, geom='point', size = 5, colour = "black", shape = 21)
options(repr.plot.width=8, repr.plot.height=5) VlnPlot(GC.IM.seuset, features = c("JARID2_imputation", "EZH2_imputation","SOX9_imputation"), group.by = "source") & stat_summary(fun.y = median.stat, geom='point', size = 5, colour = "black", shape = 21) & stat_compare_means(comparisons = list(c("Normal","Cancer")), label.y = 0.20, label = "p.signif", method = "wilcox.test")
2023年05月05日
最常用的themes
annotate("text", label = NES, parse = T, x = 10000, y = 0.2, size = 6, colour = "black", fontface = "plain")
options(repr.plot.width=4, repr.plot.height=4) labs(x = "Log2 fold change",y = "-Log10(P-value)", title = my_title) + # geom_hline(aes(yintercept=1), colour="grey50", linetype="dashed", size=0.2) + # geom_vline(aes(xintercept=0.5), colour="red", linetype="dashed", size=0.2) + scale_color_manual(values=c("#619CFF", "lightgrey", "#F8766D")) + scale_fill_manual(values=c("blue", "lightgrey", "red")) + scale_y_continuous(position="left", breaks = seq(0, 15, by = 5), limits=c(0, 15)) + geom_text_repel(data=mark_data, aes(label=gene), size=5, fontface="italic", arrow = arrow(ends="first", length = unit(0.01, "npc")), box.padding = 0.2, point.padding = 0.3, segment.color = 'black', segment.size = 0.3, force = 1, max.iter = 3e3) + annotate("text", label = "1509 DOWN", x = -3.5, y = 14, size = 4, colour = "#619CFF", fontface = "bold")
cns_theme <- theme_classic() + # theme_bw() + # theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), # panel.border = element_rect(size=0.8, colour = "black")) + theme(axis.title =element_text(size = 13, color = "black", face = "bold"), axis.text =element_text(size = 13, color = "black", face = "bold"), plot.title =element_text(hjust = 0.5, size = 16, color = "black", face = "bold")) + # theme(legend.title=element_blank(), legend.key.size = unit(0.8, 'lines')) + theme(legend.position = "none")
2022年09月01日
现在基本的图形已经得心应手了。
问题是如何配色,使图形更加fancy,组合图如何美观统一。【向CNS靠齐,最少也要以CNS子刊的标准来出图】
参考:
- 要想文章好,图片少不了,22个技能助您获得美图(带字幕视频) - AI教程 【字体线条一旦不统一,出图将会非常丑】
我的私人库:https://github.com/leezx/FigureCode
The R Graph Gallery - R绘图代码库、案例大全
themes - https://www.r-graph-gallery.com/ggplot2-package.html#themes
要开始修饰以前的核心图片,准备发表论文了。
把之前比较raw的图修饰格式,统一生成高清晰图片,准备放入paper中。
会慢慢补充所有常见的绘图代码。
一个raw image的代码
p1 <- ggplot(oxidation.df, aes(x=group, y=score, color=group)) + geom_boxplot() + geom_jitter(shape=16, position=position_jitter(0.2)) + labs(title = "Fatty acid metabolism") p1
第一步:修改df里的标签
oxidation.df$group <- plyr::mapvalues(oxidation.df$group, from = c("GFP- early","GFP+ early","GFP- late","GFP+ late"), to = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))
第二步:修改标签顺序
oxidation.df$group <- factor(oxidation.df$group, levels = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))
第三步:精修格式主题字体
主题
常用的主题:https://www.r-graph-gallery.com/ggplot2-package.html#themes
- theme_bw - 去掉了灰白背景,加了边框,最常用
- theme_classic - 只留下了加粗的左下边框,最经典,适合实验图
- egg::theme_article - 只有四周的边框,最适合发表文章,缺点:图例间隔太小
- theme_minimal - 只留下了grid,没有边框
- theme_minimal_hgrid - 只留下了hgrid
- theme_void - 只留下了图例,适合tSNE图
theme_bw()
theme_void()
# remove grid theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
修改title
labs(x = "", y = "Pathway score\n", title = "Fatty acid metabolism")
xy轴标签字体大小
theme(axis.text.x = element_text(face="plain", angle=30, size = 14, color = "black", vjust=0.6), axis.text.y = element_text(size = 10), axis.title.y = element_text(size = 14))
去掉多余的图例
theme(legend.position = "none")
填充颜色
library(RColorBrewer)
scale_fill_manual(values=brewer.pal(9,"Paired")) scale_color_manual(values=brewer.pal(9,"Paired")[c(3,4,5,6)])
其他细节
限制xy坐标范围
scale_x_continuous(limits = c(0,2.5)) scale_y_continuous(limits = c(0,2.5))
一组数据的比较
library(ggpubr) stat_compare_means(label.y = 2.3, label.x = 1, size=5)
多组数据的比较
library(ggpubr) my_comparisons <- list(c("HhOFF early", "HhON early"), c("HhOFF late", "HhON late")) stat_compare_means(method = "anova", label.y = 1.29) + # global stat_compare_means(comparisons = my_comparisons, label.y = 1, label = "p.signif") + # paired scale_y_continuous(limits = c(-0.52, 1.3))
代码汇总
tmp$group <- plyr::mapvalues(tmp$group, from = c("GFP- early","GFP+ early","GFP- late","GFP+ late"), to = c("HhOFF early", "HhON early", "HhOFF late", "HhON late")) tmp$group <- factor(tmp$group, levels = c("HhOFF early", "HhON early", "HhOFF late", "HhON late")) library(ggpubr) my_comparisons <- list(c("HhOFF early", "HhON early"), c("HhOFF late", "HhON late")) options(repr.plot.width=4, repr.plot.height=4) p1 <- ggplot(tmp, aes(x=group, y=score, color=group)) + geom_boxplot() + theme_bw() + labs(x = "", y = "Pathway score\n", title = "Fatty acid metabolism") + geom_jitter(shape=16, position=position_jitter(0.2)) + theme(legend.position = "none") + theme(axis.text.x = element_text(face="plain", angle=30, size = 14, color = "black", vjust=0.6), axis.text.y = element_text(size = 10), axis.title.y = element_text(size = 14)) + # scale_fill_manual(values=brewer.pal(9,"Paired")) scale_color_manual(values=brewer.pal(9,"Paired")[c(3,4,5,6)]) + stat_compare_means(method = "anova", label.y = 1.29) + # global stat_compare_means(comparisons = my_comparisons, label.y = 1, label = "p.signif") + # paired scale_y_continuous(limits = c(-0.52, 1.3)) p1
多图拼接
options(repr.plot.width=8, repr.plot.height=9) cowplot::plot_grid(p1,p2,p3,p4,ncol = 2)
PDF出图
ggsave(filename = "HhOFF HhON metabolic pathways.pdf", width = 8, height = 9)
有些图不能这么保存,比如heatmap,这时就要用到pdf函数
# traditional save pdf("manuscript/HSCR.cluster.heatmap.pdf", width=8, height=7) p dev.off()
lnkscape里修改文字【对齐,上下标、斜体等等】
结果
OK, 一个准发表级的图就制作好了,可能还需要精修。
其余细节
点的类型
# change the border of point geom_point(shape = 21, colour = "black", fill = "white", size = 5, stroke = 5)
把点拟合成线
stat_smooth(method = "loess", size = 1.1, se = F, span = 0.2)
在散点图边缘显示其分布,需要一个额外的包ggExtra,非常简单
ggMarginal(p, type="histogram", fill="black", color=NA, size=7, xparams = list(bins=200), yparams = list(bins=200))
散点图显示mean点
stat_summary(fun.y=mean, geom="point", shape=20, size=7, color="black", fill="black") +
线
直线
geom_hline(yintercept=20, linetype="dashed", color = "red", size=2)
线段
geom_segment(data=seg_1, aes(x=x,y=y,xend=xend,yend=yend), arrow=arrow(length=unit(0.3,"cm")), size=1)
矩形
geom_rect(data=AS_1, aes(xmin=EXONSTART, xmax=EXONEND,ymin= -0.1,ymax=0.1),fill="#4DAF4A")
图例,比如改legend title,改点大小,去掉图例
labs(x = "\nTranscriptional level",y = "\nPost-transcriptional level", title = "", color = "Clinical score")
# change legend dot size guides(colour = guide_legend(override.aes = list(size=10)))
# ggplot remove legend title theme(legend.title = element_blank())
# position theme(legend.text = element_text(size = 12), legend.position = c(0.8, 0.75))
# remove legend background theme(legend.background=element_blank())
去掉legend的白色背景布,一步到位
theme(legend.title = element_blank(), legend.text = element_text(size = 11), legend.position = c(0.85, 0.15), legend.background = element_blank())
# 改变图例元素的顺序 scale_fill_manual(values=brewer.pal(3,"Set1")[c(3,2,1)], breaks=c("H3K9Ac", "H4K8Ac", "H3K27Ac"))
标题格式,比如居中
theme(plot.title = element_text(hjust = 0.5, size = 18))
去掉边框,轴线,刻度;去掉右上边框
# empty border, ticks, text theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_blank()) + labs(x = "",y = "", title = "") + theme(axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
# remove top and right border theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank())
坐标轴,比如修改起点,范围
# force y start from 0 scale_y_continuous(expand = c(0, 0), limits = c(0, NA))
去掉画布中的网格线条
# just remove inside grid theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
添加文本
# add text annotate("text", label = "Wilcoxon test\nP-value = 1.48e-12", x = 0.5, y =2, size = 6, colour = "black")
添加背景色
# add background color to mark different region geom_rect(xmin=0, xmax=2.5, ymin=-2, ymax=-1, fill="#4DAF4A", alpha=1, color=NA)
修改填充颜色
# color scale_color_manual(values=brewer.pal(9,"Set1")[c(1:5,7:9)])
精准控制圈图的两种alpha,比如venn图
scale_color_manual(values = sample.colors) + scale_fill_manual(values = alpha(sample.colors, .2))
快速统计分析
# quick statistic testing # Wilcoxon test wilcox.test(subset(time.df,GeneSet=="Common risk")$Time, subset(time.df,GeneSet=="L-HSCR specific")$Time, alternative = "two.sided")
# packages
# significance
https://github.com/const-ae/ggsignif
数据的基本处理
分组计算,如取mean,单列
# quick data process # get group mean weather %>% group_by(city) %>% summarise(mean_temperature = mean(temperature))
分组取mean,多列
d <- read.table(text= 'Name Month Rate1 Rate2 Aira 1 12 23 Aira 2 18 73 Aira 3 19 45 Ben 1 53 19 Ben 2 22 87 Ben 3 19 45 Cat 1 22 87 Cat 2 67 43 Cat 3 45 32', header=TRUE) aggregate(d[, 3:4], list(d$Name), mean) Group.1 Rate1 Rate2 1 Aira 16.33333 47.00000 2 Ben 31.33333 50.33333 3 Cat 44.66667 54.00000
小数点保留,科学计数法
# format decimals formatC(0.46, format = "e", digits = 1) library(scales) scientific(0.46, digits = 2)
查看默认的颜色 - 画图的结果数据
# see the colors in ggplot # To see what colors are used to make your plot you can use function ggplot_build() and then look at data part of this object (in column colour are codes). ggplot_build(p)$data
其他图种
柱状图barplot
改bar的顺序,以及添加边框
geom_bar(stat="identity", alpha=1, position = position_fill(reverse = TRUE), color = "grey50") + # position="fill",
配对的箱线图、柱状图、折线图 - 用于比较case和control
data
aes里加一个fill就可以把x轴分组(颜色填充默认就会分组),非常适合case和control的比较。
这里想加点需要用另一个函数geom_dotplot
lineage lineage.sub stage S.Score G2M.Score cc.score <chr> <fct> <chr> <dbl> <dbl> <dbl> ctrl_AAACCTGAGACATAAC NP NPlate Control -0.8162696 -0.98076576 -0.8162696 ctrl_AAACCTGCAAGTAATG BP BP Control 0.3118349 -0.05584626 0.3118349 ctrl_AAACCTGCATGCTAGT GP GP Control 0.4443853 0.27702244 0.4443853
# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/ library(ggpubr) options(repr.plot.width=5, repr.plot.height=4) p <- ggplot(cc.df, aes(x=lineage.sub, y=cc.score, fill=stage)) + geom_boxplot(position=position_dodge(1)) + geom_dotplot(binaxis='y', stackdir='center', position = "dodge", dotsize=0.15, binwidth=1/25, binpositions="all") + theme_bw() + labs(x = "", y = "Proliferation score\n", title = "") + theme(axis.text.x = element_text(face="plain", angle=0, size = 14, color = "black", vjust=0.6), axis.text.y = element_text(size = 10), axis.title.y = element_text(size = 14)) + scale_fill_manual(values=c("blue","red")) + stat_compare_means(aes(group = stage), label = "p.signif", label.y = 4) + theme(legend.title = element_blank()) p
封装好的函数
ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se", color = "supp", palette = "jco", position = position_dodge(0.8))+ stat_compare_means(aes(group = supp), label = "p.signif", label.y = 29) ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se", color = "supp", palette = "jco")+ stat_compare_means(aes(group = supp), label = "p.signif", label.y = c(16, 25, 29))
热图 - 最直观
# heatmap
https://jokergoo.github.io/ComplexHeatmap-reference/book/
热图骚操作
平滑热图 - smooth heatmap
monocle里面的一种热图,很多顶刊都在用,也确实很漂亮。对应函数:plot_pseudotime_heatmap
问题是不够灵活,需要用monocle处理后才行,需要自定义一个处理函数。
小提琴图marker - 分布
stacked violin plot for visualizing single-cell data in Seurat
参见:mouse/singleCell/case/Kif7_ENCC/Kif7-integration/integration_public_and_Kif7.ipynb
Venn韦恩图/UpSetR图 - 交集
Beeswarm Plot 蜂群图 - 序列数据展开
https://github.com/eclarke/ggbeeswarm
#With different beeswarm point distribution priority dat <- data.frame(x=rep(1:3,c(20,40,80))) dat$y <- rnorm(nrow(dat),dat$x) dat$z <- 1
ggplot(dat, aes(z,y)) + geom_beeswarm(size=2,priority='descending', cex=3) + ggtitle('Descending') + scale_x_continuous(expand=expansion(add=c(0.5, 0.5)))
我的代码
set.seed(49) library(ggplot2) library(ggbeeswarm) pca_HSCR2$z <- 1 pca_HSCR2$pseudotime <- -pca_HSCR2$X2 options(repr.plot.width=6, repr.plot.height=4) ggplot(pca_HSCR2, aes(x=z, y=pseudotime, fill=severity, color=severity)) + geom_beeswarm(size=1.2,priority='ascending', cex=1.4) + # ggtitle('ascending') + # Descending scale_x_continuous(expand=expansion(add=c(0.5, 0.5))) + coord_flip() + theme_void() + scale_color_manual(values=severity.colors)
基因模块在pseudotime表达的line图
参见:mouse/singleCell/case/Kif7_ENCC/Kif7/Kif7_basic_analysis.ipynb
火山图/对角线图 - 特殊散点图
参考:mouse/singleCell/case/Kif7_ENCC/Kif7-integration/Ezh2_analysis.ipynb
# prepare data log2FC <- data.frame(gene=rownames(HSCR.DEG.log2FC.df.final), S_log2FC=S.log2FC, L_log2FC=L.log2FC) # add color label log2FC$color <- "none" log2FC[log2FC$gene %in% c("HDAC1"),]$color <- "red" # the genes want to be labeled label.genes <- c('RAMP2', 'HEY1', 'STAMBP', 'CCNB1IP1', 'LMOD3', 'NUP107', 'HEY2', 'FOXO1', 'CRLF1', 'ZFP36L2', 'NR2F2', 'TUBB3', 'ZNF385A', 'TMEM14C', 'FLNA', 'TFAP2A', 'SOX11', 'HDAC1', 'GLI3', 'BCL11A') label.df <- subset(log2FC, gene %in% label.genes) options(repr.plot.width=4.5, repr.plot.height=5) library(ggplot2) library("ggrepel") # Basic scatter plot ggplot(log2FC, aes(x=S_log2FC, y=L_log2FC, color=color)) + # , color=coregene geom_hline(yintercept=0) + geom_vline(xintercept=0) + geom_abline(intercept = 0, slope = 1, color="black", linetype="dashed", size=1) + geom_point(size=0.5) + geom_point(data = label.df, size=2, color = "red") + theme_bw() + labs(x = "\nLog2FC in S-HSCR",y = "Log2FC in L-HSCR", title = "") + theme(legend.title=element_blank()) + # Change fontface. Allowed values : 1(normal), 2(bold), 3(italic), 4(bold.italic) geom_text_repel(data=label.df, aes(label = gene), size = 3.5, fontface=3, color="red", box.padding = 0.4, max.overlaps = Inf) + theme(legend.position = "none", axis.text = element_text(size = 10), # axis.text.y = element_text(size = 10), axis.title = element_text(size = 16, face="plain")) + scale_x_continuous(limits = c(-8, 8)) + scale_y_continuous(limits = c(-8, 8)) + scale_color_manual(values=c("grey","red"))
点的文本标记
geom_text_repel,基本用法
进阶篇 - 风格统一
为什么顶刊的图那么的赏心悦目?而自己的图拼到一起却那么的不和谐,都被自己丑哭了,却不知从何下手。
这里有几个教程还不错: