教小高改bug

  博客园 :: 首页 :: 博问 :: 闪存 :: :: 联系 :: :: 管理 ::

0. 准备

setwd("D:/R/CHOL")
rm(list = ls())  
load(file = "step4output.Rdata")

library(clusterProfiler)
library(dplyr)
library(ggplot2)

 构建kegg_plot函数(以up_kegg和down_kegg为输入数据做图):

kegg_plot <- function(up_kegg,down_kegg){
  dat=rbind(up_kegg,down_kegg)
  colnames(dat)
  dat$pvalue = -log10(dat$pvalue)
  dat$pvalue = dat$pvalue*dat$group 
  dat = dat[order(dat$pvalue,decreasing = F),]
  
  g_kegg <- ggplot(dat, aes(x = reorder(Description,order(pvalue, decreasing = F)), y = pvalue, fill = group)) + 
    geom_bar(stat = "identity") + 
    scale_fill_gradient(low = "blue",high = "red",guide = "none") + 
    scale_x_discrete(name = "Pathway names") +
    scale_y_continuous(name = "-log10Pvalue") +
    coord_flip() + 
    theme_bw()+
    theme(plot.title = element_text(hjust = 0.5))+
    ggtitle("Pathway Enrichment") 
}

 1. GO富集分析(GO database analysis)

输入数据

gene_up = deg[deg$change == 'up','ENTREZID']
gene_down = deg[deg$change == 'down','ENTREZID']
gene_diff = c(gene_up,gene_down)
gene_all = deg[,'ENTREZID']

1. 上调基因的ENTREZID。gene_up = deg$ENTREZID[deg$change == 'up']可实现相同功能。
2. 下调基因的ENTREZID
3. 所有差异基因的ENTREZID
4. 所有基因的ENTREZID

GO分析

细胞组分(Cellular Component,CC)、生物过程(Biological Process,BP)、分子功能(Molecular Function,MF):

ego_CC <- enrichGO(gene = gene_diff,
                     OrgDb= org.Hs.eg.db,
                     ont = "CC",
                     pAdjustMethod = "BH",
                     minGSSize = 1,
                     pvalueCutoff = 0.01,
                     qvalueCutoff = 0.01,
                     readable = TRUE)
ego_BP <- enrichGO(gene = gene_diff,
                     OrgDb= org.Hs.eg.db,
                     ont = "BP",
                     pAdjustMethod = "BH",
                     minGSSize = 1,
                     pvalueCutoff = 0.01,
                     qvalueCutoff = 0.01,
                     readable = TRUE)
ego_MF <- enrichGO(gene = gene_diff,
                     OrgDb= org.Hs.eg.db,
                     ont = "MF",
                     pAdjustMethod = "BH",
                     minGSSize = 1,
                     pvalueCutoff = 0.01,
                     qvalueCutoff = 0.01,
                     readable = TRUE)

可视化

条带图:

barplot(ego_CC,showCategory = 20)
默认显示8个类别,一般设置为20,可修改

气泡图:

dotplot(ego_CC)

其他图

下面的图需要映射颜色

geneList = deg$logFC
names(geneList) = deg$ENTREZID
geneList = sort(geneList,decreasing = T)

1. 设置geneList向量的值为logFC
2. 修改其中元素的名字为ENTREZID,
3. 按logFC从大到小排序

1. cnetplot(Gene-Concept Network,通路-基因网络图):展示top5通路下的基因网络

cnetplot(ego_CC, categorySize = "pvalue", foldChange = geneList,showCategory = 5,colorEdge = TRUE,color_category = "steelblue")
cnetplot(ego_CC, foldChange=geneList, circular = TRUE, colorEdge = TRUE)

1. 散布状分布

2. 圈状分布

2. emapplot(Enrichment Map):展示有共同基因的通路

library(enrichplot)
emapplot(pairwise_termsim(ego_CC))

emapplot富集图将被富集的术语组织成一个边缘连接重叠基因集的网络,相互重叠的基因集往往会聚集在一起,因此有助于我们识别功能模块。

3. goplot(GO通路网络图):展示通路关系

goplot(ego_CC)

4. heatplot(Heatmap-like functional classification,热图)

heatplot(ego_CC,foldChange = geneList,showCategory = 5)

与cnetplot展示内容类似,但是将不同富集通路的相同基因聚在了一起,有助于我们更好识别基因模块


 2. KEGG富集分析(KEGG pathway analysis)

输入数据

gene_up = deg[deg$change == 'up','ENTREZID'] 
gene_down = deg[deg$change == 'down','ENTREZID'] 
gene_diff = c(gene_up,gene_down)
gene_all = deg[,'ENTREZID']

对上调/下调/所有差异基因进行富集分析

kk.up <- enrichKEGG(gene           = gene_up,
                      organism     = 'hsa',
                      universe     = gene_all,
                      pvalueCutoff = 0.9,
                      qvalueCutoff = 0.9)
kk.down <- enrichKEGG(gene           = gene_down,
                        organism     = 'hsa',
                        universe     = gene_all,
                        pvalueCutoff = 0.9,
                        qvalueCutoff =0.9)
kk.diff <- enrichKEGG(gene           = gene_diff,
                        organism     = 'hsa',
                        pvalueCutoff = 0.9)

从富集结果中提取出结果数据框

kegg_diff_dt <- kk.diff@result

按照pvalue筛选通路

在enrichKEGG时没有设置pvalueCutoff,在此处筛选:

down_kegg <- kk.down@result %>%
  filter(pvalue < 0.05) %>%
  mutate(group = -1)
up_kegg <- kk.up@result %>%
  filter(pvalue < 0.05) %>%
  mutate(group = 1)

3. 将p<0.05的下调基因标记为-1

6. 将p>0.05的下调基因标记为1

可视化

g_kegg <- kegg_plot(up_kegg,down_kegg)
g_kegg + scale_y_continuous(labels = c(20,15,10,5,0,5))

2. 图中的横坐标有误,需手动修改横坐标(>0),修改时结合图中对应的横坐标

保存图片

ggsave(g_kegg,filename = 'kegg_up_down.png')

 3. GSEA作KEGG富集分析(了解)

查看示例数据

data(geneList, package="DOSE")

将数据转换成示例数据的格式

geneList=deg$logFC
names(geneList) = deg$ENTREZID
geneList = sort(geneList,decreasing = T)

富集分析

kk_gse <- gseKEGG(geneList     = geneList,
                  organism     = 'hsa',
                  nPerm        = 1000,
                  minGSSize    = 120,
                  pvalueCutoff = 0.9,
                  verbose      = FALSE)
down_kegg <- kk_gse[kk_gse$pvalue < 0.05 & kk_gse$enrichmentScore < 0,];down_kegg$group = -1
up_kegg <- kk_gse[kk_gse$pvalue < 0.05 & kk_gse$enrichmentScore > 0,];up_kegg$group = 1

可视化

kegg_plot <- kegg_plot(up_kegg,down_kegg)

保存图片

ggsave('kegg_up_down_gsea.png')

 

posted on 2022-09-16 00:09  小高不高  阅读(1274)  评论(0编辑  收藏  举报