R:microtable包alpha多样性计算+Beta多样性(微生物丰度)

rm(list = ls())
setwd("C:\\Users\\Administrator\\Desktop\\machine learning\\Diversity")
library(microeco)
library(magrittr)
feature_table <- read.table('Bac_all.txt', header = TRUE, row.names = 1, sep = "\t", fill = TRUE) #特征表
# 检查并处理缺失值
if (any(is.na(feature_table))) {
  feature_table[is.na(feature_table)] <- 0  # 将缺失值填充为零
}
sample_table <- read.table('sample_table.txt', header = TRUE, row.names = 1, sep = "\t") #样本表
tax_table <- read.table('tax_table_a.txt', header = TRUE, row.names = 1, sep = "\t", fill = TRUE) #分类表

dataset <- microtable$new(sample_table = sample_table,
                          otu_table = feature_table, 
                          tax_table = tax_table)
dataset$tidy_dataset() 
dataset$sample_sums() %>% range
dataset$rarefy_samples(sample.size = 31628516)

dataset$save_table(dirpath = "basic_files", sep = ",")


dataset$cal_alphadiv(PD = FALSE) #计算 alpha 多样性
alpha_div_table <- dataset$alpha_diversity
write.csv(alpha_div_table, file = "alpha_diversity.csv", row.names = TRUE)


t1 <- trans_alpha$new(dataset = dataset, group = "Group") #设置数据与样本分组
t1 <- trans_alpha$new(dataset = dataset, group = "Subgroup", by_group = "Type") #设置数据与样本分组
t1 <- trans_alpha$new(dataset = dataset, group = "Group", by_group = "Type") #设置数据与样本分组
t1 <- trans_alpha$new(dataset = dataset, group = "Type", by_group = "Group") #设置数据与样本分组


head(t1$data_stat)

t1$cal_diff(method = "KW")
t1$cal_diff(method = "wilcox")


rm(t1)
#head(t1$res_diff)
write.table(t1$res_diff, file = "alpha_diversity_differences.txt", sep = "\t", quote = FALSE, row.names = FALSE)
###############################################################
library(ggplot2)
library(ggpubr)
library(dplyr)
library(multcompView)
index <- read.table('Shannon_index.txt', header = TRUE, row.names = 1)

# 绘制箱线图
p <- ggboxplot(index, x = "Type", y = "Shannon", color = "Group", width = 0.75,
               size = 1.5,  # 直接在ggboxplot中设置箱线图的线条粗细
               add = "jitter",
               add.params = list(size = 5, alpha = 0.7)) +
  theme_minimal(base_size = 16) +
  labs(x = "", y = "Shannon") +  # 设置y轴标题
  theme(legend.position = "top",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(colour = "black", linewidth = 1.5),
        axis.text.x = element_text(size = 22, face = "plain", color = "black", angle = 0, hjust = 0.5),
        axis.text.y = element_text(size = 22, face = "plain", color = "black"),
        axis.title.y = element_text(size = 36, face = "plain", color = "black"),  # 自定义Y轴标题文字大小
        legend.title = element_blank(),  # 移除图例标题
        axis.ticks = element_line(color = "black"),
        axis.ticks.length = unit(0.25, "cm"),
        legend.key.size = unit(1.2, "cm"),
        legend.text = element_text(size = 18)) +
  scale_color_manual(values = c("B73" = "#8FC9E2", "Mo17" = "#ECC97F"))

# 手动添加显著性标记
dodge_width <- 0.375
p <- p + 
  annotate("segment", x = 1 - dodge_width, xend = 1 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
  annotate("text", x = 1, y = 4.61, label = "**", size = 10, vjust = 0) +
  annotate("segment", x = 2 - dodge_width, xend = 2 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
  annotate("text", x = 2, y = 4.62, label = "ns", size = 10, vjust = 0) +
  annotate("segment", x = 3 - dodge_width, xend = 3 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
  annotate("text", x = 3, y = 4.62, label = "ns", size = 10, vjust = 0) +
  annotate("segment", x = 4 - dodge_width, xend = 4 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
  annotate("text", x = 4, y = 4.61, label = "*", size = 10, vjust = 0)
# 显示图表
print(p)
# 保存图形
ggsave("boxplot.png", plot = p, width = 8, height = 8, dpi = 1200, bg = "white")

rm(dodge_width)
rm(index)
rm(p)

###############################################################
library(ggplot2)
library(ggpubr)
library(dplyr)
library(multcompView)
index <- read.table('Simpson_index.txt', header = TRUE, row.names = 1)

# 绘制箱线图
p <- ggboxplot(index, x = "Type", y = "Simpson", color = "Group", width = 0.75,
               size = 1.5,  # 直接在ggboxplot中设置箱线图的线条粗细
               add = "jitter",
               add.params = list(size = 5, alpha = 0.7)) +
  theme_minimal(base_size = 16) +
  labs(x = "", y = "Simpson") +  # 设置y轴标题
  theme(legend.position = "top",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(colour = "black", linewidth = 1.5),
        axis.text.x = element_text(size = 22, face = "plain", color = "black", angle = 0, hjust = 0.5),
        axis.text.y = element_text(size = 22, face = "plain", color = "black"),
        axis.title.y = element_text(size = 36, face = "plain", color = "black"),  # 自定义Y轴标题文字大小
        legend.title = element_blank(),  # 移除图例标题
        axis.ticks = element_line(color = "black"),
        axis.ticks.length = unit(0.25, "cm"),
        legend.key.size = unit(1.2, "cm"),
        legend.text = element_text(size = 18)) +
  scale_color_manual(values = c("B73" = "#8FC9E2", "Mo17" = "#ECC97F"))

# 手动添加显著性标记
dodge_width <- 0.375
p <- p + 
  annotate("segment", x = 1 - dodge_width, xend = 1 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
  annotate("text", x = 1, y = 0.914, label = "*", size = 10, vjust = 0) +
  annotate("segment", x = 2 - dodge_width, xend = 2 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
  annotate("text", x = 2, y = 0.914, label = "*", size = 10, vjust = 0) +
  annotate("segment", x = 3 - dodge_width, xend = 3 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
  annotate("text", x = 3, y = 0.915, label = "ns", size = 10, vjust = 0) +
  annotate("segment", x = 4 - dodge_width, xend = 4 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
  annotate("text", x = 4, y = 0.914, label = "*", size = 10, vjust = 0)
# 显示图表
print(p)
# 保存图形
ggsave("Simpson.png", plot = p, width = 8, height = 8, dpi = 1200, bg = "white")
###############################################################
library(ggplot2)
library(ggpubr)
library(dplyr)
library(multcompView)
index <- read.table('Pielou_index.txt', header = TRUE, row.names = 1)

# 绘制箱线图
p <- ggboxplot(index, x = "Type", y = "Pielou", color = "Group", width = 0.75,
               size = 1.5,  # 直接在ggboxplot中设置箱线图的线条粗细
               add = "jitter",
               add.params = list(size = 5, alpha = 0.7)) +
  theme_minimal(base_size = 16) +
  labs(x = "", y = "Pielou") +  # 设置y轴标题
  theme(legend.position = "top",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(colour = "black", linewidth = 1.5),
        axis.text.x = element_text(size = 22, face = "plain", color = "black", angle = 0, hjust = 0.5),
        axis.text.y = element_text(size = 22, face = "plain", color = "black"),
        axis.title.y = element_text(size = 36, face = "plain", color = "black"),  # 自定义Y轴标题文字大小
        legend.title = element_blank(),  # 移除图例标题
        axis.ticks = element_line(color = "black"),
        axis.ticks.length = unit(0.25, "cm"),
        legend.key.size = unit(1.2, "cm"),
        legend.text = element_text(size = 18)) +
  scale_color_manual(values = c("B73" = "#8FC9E2", "Mo17" = "#ECC97F"))

# 手动添加显著性标记
dodge_width <- 0.375
p <- p + 
  annotate("segment", x = 1 - dodge_width, xend = 1 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
  annotate("text", x = 1, y = 0.515, label = "**", size = 10, vjust = 0) +
  annotate("segment", x = 2 - dodge_width, xend = 2 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
  annotate("text", x = 2, y = 0.516, label = "ns", size = 10, vjust = 0) +
  annotate("segment", x = 3 - dodge_width, xend = 3 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
  annotate("text", x = 3, y = 0.516, label = "ns", size = 10, vjust = 0) +
  annotate("segment", x = 4 - dodge_width, xend = 4 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
  annotate("text", x = 4, y = 0.515, label = "*", size = 10, vjust = 0)
# 显示图表
print(p)
# 保存图形
ggsave("Pielou.png", plot = p, width = 8, height = 8, dpi = 1200, bg = "white")
###############################################################
dataset$cal_betadiv(unifrac = FALSE) #beta 多样性
bray_curtis <- dataset$beta_diversity$bray
#write.csv(bray_curtis, file = "bray.csv", row.names = TRUE)
jaccard <- dataset$beta_diversity$jaccard
#write.csv(jaccard, file = "jaccard.csv", row.names = TRUE)
stability_matrix <- 1 - as.matrix(bray_curtis)

t1 <- trans_beta$new(dataset = dataset, group = "Group", measure = "bray")
t1$cal_group_distance(within_group = TRUE)
t1$cal_group_distance_diff(method = "wilcox")
t1$plot_group_distance(add = "mean")

library(dplyr)
print(t1$res_group_distance)
#write.table(t1$res_group_distance, file = "res_group_distance.txt", sep = "\t", quote = FALSE, row.names = FALSE)

# 计算稳定性并赋值到数据框
stability <- t1$res_group_distance %>%
  mutate(
    Stability = 1 - Value # 计算稳定性
  )

print(t1$res_group_distance_diff)
#write.table(t1$res_group_distance_diff, file = "res_group_distance_diff.txt", sep = "\t", quote = FALSE, row.names = FALSE)

# 计算每组的均值和标准差
gene_distance_summary <- stability %>%
  group_by(Group) %>%  # 按 Group 分组
  summarise(
    Mean_Distance = mean(Stability),      # 计算均值
    SD_Distance = sd(Stability),          # 计算标准差
    Lower_Bound = mean(Stability) - sd(Stability),  # 均值 - SD
    Upper_Bound = mean(Stability) + sd(Stability)   # 均值 + SD
  )

# 查看结果
print(gene_distance_summary)
write.table(gene_distance_summary, file = "gene_distance_summary.txt", sep = "\t", quote = FALSE, row.names = FALSE)

t1 <- trans_beta$new(dataset = dataset, group = "Subgroup", measure = "bray")
# 执行RMANOVA分析
t1$cal_manova(manova_all = FALSE)
# 获取分析结果
manova_results <- t1$res_manova
# 将结果保存到TXT文件
write.table(manova_results, file = "RMANOVA_results.txt", sep = "\t", quote = FALSE)

t1$cal_manova(manova_all = TRUE)
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_all.txt", sep = "\t", quote = FALSE)

t1$cal_manova(manova_all = FALSE, group = "Type", by_group = "Group")
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_Group.txt", sep = "\t", quote = FALSE)

t1$cal_manova(manova_all = FALSE, group = "Group", by_group = "Type")
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_Gene.txt", sep = "\t", quote = FALSE)

t1 <- trans_beta$new(dataset = dataset, group = "Group", measure = "bray")
t1$cal_manova(manova_all = FALSE)
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_P.txt", sep = "\t", quote = FALSE)
###############################################################
library(vegan)
library(tidyverse)
library(ggalt)
library(car)
library(ggforce)
library(ggpubr)
library(patchwork)

pcoa <- cmdscale(bray_curtis, k = 2, eig = TRUE) 
points <- as.data.frame(pcoa$points) %>% dplyr::rename(x = "V1", y = "V2") 
eig <- pcoa$eig

rownames(points) <- rownames(sample_table)
points$ID <- rownames(points)
points$Subgroup <- sample_table[match(rownames(points), rownames(sample_table)), "Subgroup"]
n <- 0.85
colors <- c("B73_DAS28"="#8FC9E2","B73_DAS42"="#8FC9E2","B73_DAS56"="#8FC9E2","B73_DAS70"="#8FC9E2","Mo17_DAS28"="#ECC97F","Mo17_DAS42"="#ECC97F","Mo17_DAS56"="#ECC97F","Mo17_DAS70"="#ECC97F")
# 定义了颜色和形状的映射关系,用于后续的可视化。
shapes <- c("B73_DAS28"=24, "B73_DAS42"=22, "B73_DAS56"=21, "B73_DAS70"=23, 
            "Mo17_DAS28"=24, "Mo17_DAS42"=22, "Mo17_DAS56"=21, "Mo17_DAS70"=23)
levels_order <- c("B73_DAS28", "B73_DAS42", "B73_DAS56", "B73_DAS70", "Mo17_DAS28", "Mo17_DAS42", "Mo17_DAS56", "Mo17_DAS70") #定义顺序
points$Subgroup <- factor(points$Subgroup, levels = levels_order)
# 在ggplot中使用这些形状
p1 <- ggplot(points, aes(x = x, y = y, fill = Subgroup, shape = Subgroup)) + #创建了一个ggplot对象,设置了数据框points作为数据,设置了x和y作为x轴和y轴的变量,设置了Group作为填充色和形状的变量。
  geom_point(alpha = .7, size = 5) + #添加了散点图层。alpha = .7设置了点的透明度,size = 5设置了点的大小。
  scale_shape_manual(values = shapes) +  # 设置了形状和填充色的映射关系。使用了之前定义的shapes和colors变量。
  scale_fill_manual(values = colors) +
  labs(x = paste("PCo1 (", format(100 * eig[1] / sum(eig), digits = 4), "%)", sep = ""),
       y = paste("PCo2 (", format(100 * eig[2] / sum(eig), digits = 4), "%)", sep = "")) +
  #设置了x轴、y轴和标题的标签。使用了paste函数将多个字符串连接在一起,使用了format函数将数字格式化为指定的小数位数。
  geom_mark_ellipse(aes(fill = Subgroup), alpha = 0.1, color = "grey", linetype = 3) +
  #添加了椭圆图层。每个椭圆代表一个组的样本,椭圆的位置和大小由组的样本的均值和标准差决定。
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        axis.text = element_text(color = "black", size = 25),
        axis.title.x = element_text(size = 28),
        axis.title.y = element_text(size = 28),
        plot.title = element_text(size = 16, face = "bold"),
        legend.key.size = unit(1, "cm"),
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        panel.border = element_blank(), # 去掉整个图的边框
        axis.ticks = element_line(size = 1.5),
        axis.ticks.length = unit(0.3, "cm"),
        legend.position = "top",
        legend.direction = "horizontal",
        axis.line = element_line(color = "black", size = 1)) # 添加此行以显示坐标轴线
p1
# 使用ggsave保存PCoA图为PNG格式
ggsave(filename = "PCoA_plot.png", plot = p1, width = 10, height = 10, units = "in", dpi = 1200)
rm(colors)
###############################################################
# 使用 prcomp 计算 PCA
pca <- prcomp(bray_curtis, scale. = TRUE)
# 提取 PCA 结果
points <- as.data.frame(pca$x) %>% dplyr::rename(x = "PC1", y = "PC2")
eig <- pca$sdev^2  # 提取特征值
rownames(points) <- rownames(sample_table)
points$ID <- rownames(points)
points$Subgroup <- sample_table[match(rownames(points), rownames(sample_table)), "Subgroup"]
# 定义颜色和形状的映射关系
colors <- c("B73_DAS28"="#8FC9E2","B73_DAS42"="#8FC9E2","B73_DAS56"="#8FC9E2","B73_DAS70"="#8FC9E2",
            "Mo17_DAS28"="#ECC97F","Mo17_DAS42"="#ECC97F","Mo17_DAS56"="#ECC97F","Mo17_DAS70"="#ECC97F")
shapes <- c("B73_DAS28"=24, "B73_DAS42"=22, "B73_DAS56"=21, "B73_DAS70"=23, 
            "Mo17_DAS28"=24, "Mo17_DAS42"=22, "Mo17_DAS56"=21, "Mo17_DAS70"=23)
levels_order <- c("B73_DAS28", "B73_DAS42", "B73_DAS56", "B73_DAS70", 
                  "Mo17_DAS28", "Mo17_DAS42", "Mo17_DAS56", "Mo17_DAS70")
points$Subgroup <- factor(points$Subgroup, levels = levels_order)
# 绘制 PCA 图
p1 <- ggplot(points, aes(x = x, y = y, fill = Subgroup, shape = Subgroup)) +
  geom_point(alpha = .7, size = 5) +
  scale_shape_manual(values = shapes) +
  scale_fill_manual(values = colors) +
  labs(x = paste("PC1 (", format(100 * eig[1] / sum(eig), digits = 4), "%)", sep = ""),
       y = paste("PC2 (", format(100 * eig[2] / sum(eig), digits = 4), "%)", sep = "")) +
  geom_mark_ellipse(aes(fill = Subgroup), alpha = 0.1, color = "grey", linetype = 3) +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        axis.text = element_text(color = "black", size = 25),
        axis.title.x = element_text(size = 28),
        axis.title.y = element_text(size = 28),
        plot.title = element_text(size = 16, face = "bold"),
        legend.key.size = unit(1, "cm"),
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        panel.border = element_blank(),
        axis.ticks = element_line(size = 1.5),
        axis.ticks.length = unit(0.3, "cm"),
        legend.position = "top",
        legend.direction = "horizontal",
        axis.line = element_line(color = "black", size = 1))

p1
# 保存 PCA 图为 PNG 格式
ggsave(filename = "PCA_plot.png", plot = p1, width = 10, height = 10, units = "in", dpi = 1200)
###############################################################

 

posted @ 2024-11-26 10:38  王哲MGG_AI  阅读(9)  评论(0编辑  收藏  举报