R:microtable包alpha多样性计算+Beta多样性(MetaCyc)

rm(list = ls())
setwd("C:\\Users\\Administrator\\Desktop\\machine learning\\Diversity_pathway")
library(microeco)
library(magrittr)
library(file2meco)

abund_file_path <- "HUMAnN_MetaCyc_abund.tsv"
match_file_path <- "metagenome_match_table.tsv"
sample_file_path <- "metagenome_sample_info.tsv"

#humann2meco(abund_file_path, db = "MetaCyc", sample_table = sample_file_path, match_table = match_file_path)

dataset <- humann2meco(abund_file_path, db = "MetaCyc", sample_table = sample_file_path, match_table = match_file_path)
dataset$tidy_dataset() #整理数据集

dataset$cal_abund(select_cols = 1:3, rel = FALSE)
dataset$taxa_abund$Superclass1 %<>% .[!grepl("unclass", rownames(.)), ]
dataset$taxa_abund$Superclass2 %<>% .[!grepl("unclass", rownames(.)), ]
dataset$taxa_abund$pathway %<>% .[!grepl("unclass", rownames(.)), ]

write.table(dataset$taxa_abund$Superclass1, file = "Superclass1.txt", sep = "\t", quote = FALSE, row.names = TRUE)
write.table(dataset$taxa_abund$Superclass2, file = "Superclass2.txt", sep = "\t", quote = FALSE, row.names = TRUE)
write.table(dataset$taxa_abund$pathway, file = "pathway.txt", sep = "\t", quote = FALSE, row.names = TRUE)

test1 <- trans_abund$new(test, taxrank = "Superclass1", ntaxa = 10, use_percentage = FALSE)
test1$ylabname <- "Abundance (RPK)"
test1$plot_bar(facet = "Group", bar_full = FALSE)

test$cal_abund(select_cols = c("Superclass1", "Phylum", "Genus"), rel = TRUE)
test1 <- trans_abund$new(test, taxrank = "Phylum", ntaxa = 10, delete_taxonomy_lineage = FALSE)
test1$plot_bar(facet = "Group")

# functional biomarker
dataset <- trans_diff$new(dataset, method = "lefse", group = "Group",taxa_level = "pathway")
dataset$plot_diff_bar(use_number = 1:30)
write.table(dataset$res_diff, file = "lefse.txt", sep = "\t", quote = FALSE, row.names = TRUE)

dataset$otu_table[dataset$otu_table < 0] <- 0
dataset$cal_alphadiv(PD = FALSE) #计算 alpha 多样性
alpha_div_table <- dataset$alpha_diversity
write.csv(alpha_div_table, file = "alpha_diversity.csv", row.names = TRUE)

t1 <- trans_alpha$new(dataset = dataset, group = "Group") #设置数据与样本分组
t1 <- trans_alpha$new(dataset = dataset, group = "Subgroup", by_group = "Type") #设置数据与样本分组
t1 <- trans_alpha$new(dataset = dataset, group = "Group", by_group = "Type") #设置数据与样本分组
t1 <- trans_alpha$new(dataset = dataset, group = "Type", by_group = "Group") #设置数据与样本分组


t1$cal_diff(method = "KW")
t1$cal_diff(method = "wilcox")

rm(t1)
#head(t1$res_diff)
write.table(t1$res_diff, file = "alpha_diversity_differences_gene.txt", sep = "\t", quote = FALSE, row.names = FALSE)
write.table(t1$res_diff, file = "alpha_diversity_differences_DAS.txt", sep = "\t", quote = FALSE, row.names = FALSE)
write.table(t1$res_diff, file = "alpha_diversity_differences_time.txt", sep = "\t", quote = FALSE, row.names = FALSE)

t1$plot_alpha(measure = "Pielou", add_sig_text_size = 6, add = "jitter", order_x_mean = TRUE)
###############################################################
library(ggplot2)
library(ggpubr)
library(dplyr)
library(multcompView)
index <- read.table('Shannon_index.txt', header = TRUE, row.names = 1)

# 绘制箱线图
p <- ggboxplot(index, x = "Type", y = "Shannon", color = "Group", width = 0.75,
               size = 1.5,  # 直接在ggboxplot中设置箱线图的线条粗细
               add = "jitter",
               add.params = list(size = 5, alpha = 0.7)) +
  theme_minimal(base_size = 16) +
  labs(x = "", y = "Shannon") +  # 设置y轴标题
  theme(legend.position = "top",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(colour = "black", linewidth = 1.5),
        axis.text.x = element_text(size = 22, face = "plain", color = "black", angle = 0, hjust = 0.5),
        axis.text.y = element_text(size = 22, face = "plain", color = "black"),
        axis.title.y = element_text(size = 36, face = "plain", color = "black"),  # 自定义Y轴标题文字大小
        legend.title = element_blank(),  # 移除图例标题
        axis.ticks = element_line(color = "black"),
        axis.ticks.length = unit(0.25, "cm"),
        legend.key.size = unit(1.2, "cm"),
        legend.text = element_text(size = 18)) +
  scale_color_manual(values = c("B73" = "#8FC9E2", "Mo17" = "#ECC97F"))

# 手动添加显著性标记
dodge_width <- 0.375
p <- p + 
  annotate("segment", x = 1 - dodge_width, xend = 1 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
  annotate("text", x = 1, y = 4.61, label = "**", size = 10, vjust = 0) +
  annotate("segment", x = 2 - dodge_width, xend = 2 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
  annotate("text", x = 2, y = 4.62, label = "ns", size = 10, vjust = 0) +
  annotate("segment", x = 3 - dodge_width, xend = 3 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
  annotate("text", x = 3, y = 4.62, label = "ns", size = 10, vjust = 0) +
  annotate("segment", x = 4 - dodge_width, xend = 4 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
  annotate("text", x = 4, y = 4.61, label = "*", size = 10, vjust = 0)
# 显示图表
print(p)
# 保存图形
ggsave("boxplot.png", plot = p, width = 8, height = 8, dpi = 1200, bg = "white")

rm(dodge_width)
rm(index)
rm(p)
###############################################################
library(ggplot2)
library(ggpubr)
library(dplyr)
library(multcompView)
index <- read.table('Simpson_index.txt', header = TRUE, row.names = 1)

# 绘制箱线图
p <- ggboxplot(index, x = "Type", y = "Simpson", color = "Group", width = 0.75,
               size = 1.5,  # 直接在ggboxplot中设置箱线图的线条粗细
               add = "jitter",
               add.params = list(size = 5, alpha = 0.7)) +
  theme_minimal(base_size = 16) +
  labs(x = "", y = "Simpson") +  # 设置y轴标题
  theme(legend.position = "top",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(colour = "black", linewidth = 1.5),
        axis.text.x = element_text(size = 22, face = "plain", color = "black", angle = 0, hjust = 0.5),
        axis.text.y = element_text(size = 22, face = "plain", color = "black"),
        axis.title.y = element_text(size = 36, face = "plain", color = "black"),  # 自定义Y轴标题文字大小
        legend.title = element_blank(),  # 移除图例标题
        axis.ticks = element_line(color = "black"),
        axis.ticks.length = unit(0.25, "cm"),
        legend.key.size = unit(1.2, "cm"),
        legend.text = element_text(size = 18)) +
  scale_color_manual(values = c("B73" = "#8FC9E2", "Mo17" = "#ECC97F"))

# 手动添加显著性标记
dodge_width <- 0.375
p <- p + 
  annotate("segment", x = 1 - dodge_width, xend = 1 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
  annotate("text", x = 1, y = 0.914, label = "*", size = 10, vjust = 0) +
  annotate("segment", x = 2 - dodge_width, xend = 2 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
  annotate("text", x = 2, y = 0.914, label = "*", size = 10, vjust = 0) +
  annotate("segment", x = 3 - dodge_width, xend = 3 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
  annotate("text", x = 3, y = 0.915, label = "ns", size = 10, vjust = 0) +
  annotate("segment", x = 4 - dodge_width, xend = 4 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
  annotate("text", x = 4, y = 0.914, label = "*", size = 10, vjust = 0)
# 显示图表
print(p)
# 保存图形
ggsave("Simpson.png", plot = p, width = 8, height = 8, dpi = 1200, bg = "white")
###############################################################
library(ggplot2)
library(ggpubr)
library(dplyr)
library(multcompView)
index <- read.table('Pielou_index.txt', header = TRUE, row.names = 1)

# 绘制箱线图
p <- ggboxplot(index, x = "Type", y = "Pielou", color = "Group", width = 0.75,
               size = 1.5,  # 直接在ggboxplot中设置箱线图的线条粗细
               add = "jitter",
               add.params = list(size = 5, alpha = 0.7)) +
  theme_minimal(base_size = 16) +
  labs(x = "", y = "Pielou") +  # 设置y轴标题
  theme(legend.position = "top",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(colour = "black", linewidth = 1.5),
        axis.text.x = element_text(size = 22, face = "plain", color = "black", angle = 0, hjust = 0.5),
        axis.text.y = element_text(size = 22, face = "plain", color = "black"),
        axis.title.y = element_text(size = 36, face = "plain", color = "black"),  # 自定义Y轴标题文字大小
        legend.title = element_blank(),  # 移除图例标题
        axis.ticks = element_line(color = "black"),
        axis.ticks.length = unit(0.25, "cm"),
        legend.key.size = unit(1.2, "cm"),
        legend.text = element_text(size = 18)) +
  scale_color_manual(values = c("B73" = "#8FC9E2", "Mo17" = "#ECC97F"))

# 手动添加显著性标记
dodge_width <- 0.375
p <- p + 
  annotate("segment", x = 1 - dodge_width, xend = 1 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
  annotate("text", x = 1, y = 0.515, label = "**", size = 10, vjust = 0) +
  annotate("segment", x = 2 - dodge_width, xend = 2 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
  annotate("text", x = 2, y = 0.516, label = "ns", size = 10, vjust = 0) +
  annotate("segment", x = 3 - dodge_width, xend = 3 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
  annotate("text", x = 3, y = 0.516, label = "ns", size = 10, vjust = 0) +
  annotate("segment", x = 4 - dodge_width, xend = 4 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
  annotate("text", x = 4, y = 0.515, label = "*", size = 10, vjust = 0)
# 显示图表
print(p)
# 保存图形
ggsave("Pielou.png", plot = p, width = 8, height = 8, dpi = 1200, bg = "white")
###############################################################
dataset$otu_table[dataset$otu_table < 0] <- 0
dataset$cal_betadiv(unifrac = FALSE) #beta 多样性
bray_curtis <- dataset$beta_diversity$bray
#write.csv(bray_curtis, file = "bray.csv", row.names = TRUE)
jaccard <- dataset$beta_diversity$jaccard
#write.csv(jaccard, file = "jaccard.csv", row.names = TRUE)
stability_matrix <- 1 - as.matrix(bray_curtis)

t1 <- trans_beta$new(dataset = dataset, group = "Group", measure = "bray")
t1$cal_group_distance(within_group = TRUE)
t1$cal_group_distance_diff(method = "wilcox")
t1$plot_group_distance(add = "mean")

rm(t1)
library(dplyr)
print(t1$res_group_distance)
#write.table(t1$res_group_distance, file = "res_group_distance.txt", sep = "\t", quote = FALSE, row.names = FALSE)

# 计算稳定性并赋值到数据框
stability <- t1$res_group_distance %>%
  mutate(
    Stability = 1 - Value # 计算稳定性
  )

print(t1$res_group_distance_diff)
#write.table(t1$res_group_distance_diff, file = "res_group_distance_diff.txt", sep = "\t", quote = FALSE, row.names = FALSE)

# 计算每组的均值和标准差
gene_distance_summary <- stability %>%
  group_by(Group) %>%  # 按 Group 分组
  summarise(
    Mean_Distance = mean(Stability),      # 计算均值
    SD_Distance = sd(Stability),          # 计算标准差
    Lower_Bound = mean(Stability) - sd(Stability),  # 均值 - SD
    Upper_Bound = mean(Stability) + sd(Stability)   # 均值 + SD
  )

# 查看结果
print(gene_distance_summary)
write.table(gene_distance_summary, file = "gene_distance_summary.txt", sep = "\t", quote = FALSE, row.names = FALSE)

t1 <- trans_beta$new(dataset = dataset, group = "Subgroup", measure = "bray")
# 执行RMANOVA分析
t1$cal_manova(manova_all = FALSE)
# 获取分析结果
manova_results <- t1$res_manova
# 将结果保存到TXT文件
write.table(manova_results, file = "RMANOVA_results.txt", sep = "\t", quote = FALSE)

t1$cal_manova(manova_all = TRUE)
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_all.txt", sep = "\t", quote = FALSE)

t1$cal_manova(manova_all = FALSE, group = "Type", by_group = "Group")
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_Group.txt", sep = "\t", quote = FALSE)

t1$cal_manova(manova_all = FALSE, group = "Group", by_group = "Type")
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_Gene.txt", sep = "\t", quote = FALSE)

t1 <- trans_beta$new(dataset = dataset, group = "Group", measure = "bray")
t1$cal_manova(manova_all = FALSE)
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_P.txt", sep = "\t", quote = FALSE)
###############################################################
library(vegan)
library(tidyverse)
library(ggalt)
library(car)
library(ggforce)
library(ggpubr)
library(patchwork)

pcoa <- cmdscale(bray_curtis, k = 2, eig = TRUE) 
points <- as.data.frame(pcoa$points) %>% dplyr::rename(x = "V1", y = "V2") 
eig <- pcoa$eig

rownames(points) <- rownames(sample_table)
points$ID <- rownames(points)
points$Subgroup <- sample_table[match(rownames(points), rownames(sample_table)), "Subgroup"]
n <- 0.85
colors <- c("B73_DAS28"="#8FC9E2","B73_DAS42"="#8FC9E2","B73_DAS56"="#8FC9E2","B73_DAS70"="#8FC9E2","Mo17_DAS28"="#ECC97F","Mo17_DAS42"="#ECC97F","Mo17_DAS56"="#ECC97F","Mo17_DAS70"="#ECC97F")
# 定义了颜色和形状的映射关系,用于后续的可视化。
shapes <- c("B73_DAS28"=24, "B73_DAS42"=22, "B73_DAS56"=21, "B73_DAS70"=23, 
            "Mo17_DAS28"=24, "Mo17_DAS42"=22, "Mo17_DAS56"=21, "Mo17_DAS70"=23)
levels_order <- c("B73_DAS28", "B73_DAS42", "B73_DAS56", "B73_DAS70", "Mo17_DAS28", "Mo17_DAS42", "Mo17_DAS56", "Mo17_DAS70") #定义顺序
points$Subgroup <- factor(points$Subgroup, levels = levels_order)
# 在ggplot中使用这些形状
p1 <- ggplot(points, aes(x = x, y = y, fill = Subgroup, shape = Subgroup)) + #创建了一个ggplot对象,设置了数据框points作为数据,设置了x和y作为x轴和y轴的变量,设置了Group作为填充色和形状的变量。
  geom_point(alpha = .7, size = 5) + #添加了散点图层。alpha = .7设置了点的透明度,size = 5设置了点的大小。
  scale_shape_manual(values = shapes) +  # 设置了形状和填充色的映射关系。使用了之前定义的shapes和colors变量。
  scale_fill_manual(values = colors) +
  labs(x = paste("PCo1 (", format(100 * eig[1] / sum(eig), digits = 4), "%)", sep = ""),
       y = paste("PCo2 (", format(100 * eig[2] / sum(eig), digits = 4), "%)", sep = "")) +
  #设置了x轴、y轴和标题的标签。使用了paste函数将多个字符串连接在一起,使用了format函数将数字格式化为指定的小数位数。
  geom_mark_ellipse(aes(fill = Subgroup), alpha = 0.1, color = "grey", linetype = 3) +
  #添加了椭圆图层。每个椭圆代表一个组的样本,椭圆的位置和大小由组的样本的均值和标准差决定。
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        axis.text = element_text(color = "black", size = 25),
        axis.title.x = element_text(size = 28),
        axis.title.y = element_text(size = 28),
        plot.title = element_text(size = 16, face = "bold"),
        legend.key.size = unit(1, "cm"),
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        panel.border = element_blank(), # 去掉整个图的边框
        axis.ticks = element_line(size = 1.5),
        axis.ticks.length = unit(0.3, "cm"),
        legend.position = "top",
        legend.direction = "horizontal",
        axis.line = element_line(color = "black", size = 1)) # 添加此行以显示坐标轴线
p1
# 使用ggsave保存PCoA图为PNG格式
ggsave(filename = "PCoA_plot.png", plot = p1, width = 10, height = 10, units = "in", dpi = 1200)
rm(colors)
###############################################################
# 使用 prcomp 计算 PCA
pca <- prcomp(bray_curtis, scale. = TRUE)
# 提取 PCA 结果
points <- as.data.frame(pca$x) %>% dplyr::rename(x = "PC1", y = "PC2")
eig <- pca$sdev^2  # 提取特征值
rownames(points) <- rownames(sample_table)
points$ID <- rownames(points)
points$Subgroup <- sample_table[match(rownames(points), rownames(sample_table)), "Subgroup"]
# 定义颜色和形状的映射关系
colors <- c("B73_DAS28"="#8FC9E2","B73_DAS42"="#8FC9E2","B73_DAS56"="#8FC9E2","B73_DAS70"="#8FC9E2",
            "Mo17_DAS28"="#ECC97F","Mo17_DAS42"="#ECC97F","Mo17_DAS56"="#ECC97F","Mo17_DAS70"="#ECC97F")
shapes <- c("B73_DAS28"=24, "B73_DAS42"=22, "B73_DAS56"=21, "B73_DAS70"=23, 
            "Mo17_DAS28"=24, "Mo17_DAS42"=22, "Mo17_DAS56"=21, "Mo17_DAS70"=23)
levels_order <- c("B73_DAS28", "B73_DAS42", "B73_DAS56", "B73_DAS70", 
                  "Mo17_DAS28", "Mo17_DAS42", "Mo17_DAS56", "Mo17_DAS70")
points$Subgroup <- factor(points$Subgroup, levels = levels_order)
# 绘制 PCA 图
p1 <- ggplot(points, aes(x = x, y = y, fill = Subgroup, shape = Subgroup)) +
  geom_point(alpha = .7, size = 5) +
  scale_shape_manual(values = shapes) +
  scale_fill_manual(values = colors) +
  labs(x = paste("PC1 (", format(100 * eig[1] / sum(eig), digits = 4), "%)", sep = ""),
       y = paste("PC2 (", format(100 * eig[2] / sum(eig), digits = 4), "%)", sep = "")) +
  geom_mark_ellipse(aes(fill = Subgroup), alpha = 0.1, color = "grey", linetype = 3) +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        axis.text = element_text(color = "black", size = 25),
        axis.title.x = element_text(size = 28),
        axis.title.y = element_text(size = 28),
        plot.title = element_text(size = 16, face = "bold"),
        legend.key.size = unit(1, "cm"),
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        panel.border = element_blank(),
        axis.ticks = element_line(size = 1.5),
        axis.ticks.length = unit(0.3, "cm"),
        legend.position = "top",
        legend.direction = "horizontal",
        axis.line = element_line(color = "black", size = 1))

p1
# 保存 PCA 图为 PNG 格式
ggsave(filename = "PCA_plot.png", plot = p1, width = 10, height = 10, units = "in", dpi = 1200)
###############################################################

 

posted @ 2024-11-26 10:39  王哲MGG_AI  阅读(5)  评论(0编辑  收藏  举报