R:microtable包alpha多样性计算+Beta多样性(MetaCyc)
rm(list = ls())
setwd("C:\\Users\\Administrator\\Desktop\\machine learning\\Diversity_pathway")
library(microeco)
library(magrittr)
library(file2meco)
abund_file_path <- "HUMAnN_MetaCyc_abund.tsv"
match_file_path <- "metagenome_match_table.tsv"
sample_file_path <- "metagenome_sample_info.tsv"
#humann2meco(abund_file_path, db = "MetaCyc", sample_table = sample_file_path, match_table = match_file_path)
dataset <- humann2meco(abund_file_path, db = "MetaCyc", sample_table = sample_file_path, match_table = match_file_path)
dataset$tidy_dataset() #整理数据集
dataset$cal_abund(select_cols = 1:3, rel = FALSE)
dataset$taxa_abund$Superclass1 %<>% .[!grepl("unclass", rownames(.)), ]
dataset$taxa_abund$Superclass2 %<>% .[!grepl("unclass", rownames(.)), ]
dataset$taxa_abund$pathway %<>% .[!grepl("unclass", rownames(.)), ]
write.table(dataset$taxa_abund$Superclass1, file = "Superclass1.txt", sep = "\t", quote = FALSE, row.names = TRUE)
write.table(dataset$taxa_abund$Superclass2, file = "Superclass2.txt", sep = "\t", quote = FALSE, row.names = TRUE)
write.table(dataset$taxa_abund$pathway, file = "pathway.txt", sep = "\t", quote = FALSE, row.names = TRUE)
test1 <- trans_abund$new(test, taxrank = "Superclass1", ntaxa = 10, use_percentage = FALSE)
test1$ylabname <- "Abundance (RPK)"
test1$plot_bar(facet = "Group", bar_full = FALSE)
test$cal_abund(select_cols = c("Superclass1", "Phylum", "Genus"), rel = TRUE)
test1 <- trans_abund$new(test, taxrank = "Phylum", ntaxa = 10, delete_taxonomy_lineage = FALSE)
test1$plot_bar(facet = "Group")
# functional biomarker
dataset <- trans_diff$new(dataset, method = "lefse", group = "Group",taxa_level = "pathway")
dataset$plot_diff_bar(use_number = 1:30)
write.table(dataset$res_diff, file = "lefse.txt", sep = "\t", quote = FALSE, row.names = TRUE)
dataset$otu_table[dataset$otu_table < 0] <- 0
dataset$cal_alphadiv(PD = FALSE) #计算 alpha 多样性
alpha_div_table <- dataset$alpha_diversity
write.csv(alpha_div_table, file = "alpha_diversity.csv", row.names = TRUE)
t1 <- trans_alpha$new(dataset = dataset, group = "Group") #设置数据与样本分组
t1 <- trans_alpha$new(dataset = dataset, group = "Subgroup", by_group = "Type") #设置数据与样本分组
t1 <- trans_alpha$new(dataset = dataset, group = "Group", by_group = "Type") #设置数据与样本分组
t1 <- trans_alpha$new(dataset = dataset, group = "Type", by_group = "Group") #设置数据与样本分组
t1$cal_diff(method = "KW")
t1$cal_diff(method = "wilcox")
rm(t1)
#head(t1$res_diff)
write.table(t1$res_diff, file = "alpha_diversity_differences_gene.txt", sep = "\t", quote = FALSE, row.names = FALSE)
write.table(t1$res_diff, file = "alpha_diversity_differences_DAS.txt", sep = "\t", quote = FALSE, row.names = FALSE)
write.table(t1$res_diff, file = "alpha_diversity_differences_time.txt", sep = "\t", quote = FALSE, row.names = FALSE)
t1$plot_alpha(measure = "Pielou", add_sig_text_size = 6, add = "jitter", order_x_mean = TRUE)
###############################################################
library(ggplot2)
library(ggpubr)
library(dplyr)
library(multcompView)
index <- read.table('Shannon_index.txt', header = TRUE, row.names = 1)
# 绘制箱线图
p <- ggboxplot(index, x = "Type", y = "Shannon", color = "Group", width = 0.75,
size = 1.5, # 直接在ggboxplot中设置箱线图的线条粗细
add = "jitter",
add.params = list(size = 5, alpha = 0.7)) +
theme_minimal(base_size = 16) +
labs(x = "", y = "Shannon") + # 设置y轴标题
theme(legend.position = "top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black", linewidth = 1.5),
axis.text.x = element_text(size = 22, face = "plain", color = "black", angle = 0, hjust = 0.5),
axis.text.y = element_text(size = 22, face = "plain", color = "black"),
axis.title.y = element_text(size = 36, face = "plain", color = "black"), # 自定义Y轴标题文字大小
legend.title = element_blank(), # 移除图例标题
axis.ticks = element_line(color = "black"),
axis.ticks.length = unit(0.25, "cm"),
legend.key.size = unit(1.2, "cm"),
legend.text = element_text(size = 18)) +
scale_color_manual(values = c("B73" = "#8FC9E2", "Mo17" = "#ECC97F"))
# 手动添加显著性标记
dodge_width <- 0.375
p <- p +
annotate("segment", x = 1 - dodge_width, xend = 1 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
annotate("text", x = 1, y = 4.61, label = "**", size = 10, vjust = 0) +
annotate("segment", x = 2 - dodge_width, xend = 2 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
annotate("text", x = 2, y = 4.62, label = "ns", size = 10, vjust = 0) +
annotate("segment", x = 3 - dodge_width, xend = 3 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
annotate("text", x = 3, y = 4.62, label = "ns", size = 10, vjust = 0) +
annotate("segment", x = 4 - dodge_width, xend = 4 + dodge_width, y = 4.61, yend = 4.61, size = 1.2) +
annotate("text", x = 4, y = 4.61, label = "*", size = 10, vjust = 0)
# 显示图表
print(p)
# 保存图形
ggsave("boxplot.png", plot = p, width = 8, height = 8, dpi = 1200, bg = "white")
rm(dodge_width)
rm(index)
rm(p)
###############################################################
library(ggplot2)
library(ggpubr)
library(dplyr)
library(multcompView)
index <- read.table('Simpson_index.txt', header = TRUE, row.names = 1)
# 绘制箱线图
p <- ggboxplot(index, x = "Type", y = "Simpson", color = "Group", width = 0.75,
size = 1.5, # 直接在ggboxplot中设置箱线图的线条粗细
add = "jitter",
add.params = list(size = 5, alpha = 0.7)) +
theme_minimal(base_size = 16) +
labs(x = "", y = "Simpson") + # 设置y轴标题
theme(legend.position = "top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black", linewidth = 1.5),
axis.text.x = element_text(size = 22, face = "plain", color = "black", angle = 0, hjust = 0.5),
axis.text.y = element_text(size = 22, face = "plain", color = "black"),
axis.title.y = element_text(size = 36, face = "plain", color = "black"), # 自定义Y轴标题文字大小
legend.title = element_blank(), # 移除图例标题
axis.ticks = element_line(color = "black"),
axis.ticks.length = unit(0.25, "cm"),
legend.key.size = unit(1.2, "cm"),
legend.text = element_text(size = 18)) +
scale_color_manual(values = c("B73" = "#8FC9E2", "Mo17" = "#ECC97F"))
# 手动添加显著性标记
dodge_width <- 0.375
p <- p +
annotate("segment", x = 1 - dodge_width, xend = 1 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
annotate("text", x = 1, y = 0.914, label = "*", size = 10, vjust = 0) +
annotate("segment", x = 2 - dodge_width, xend = 2 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
annotate("text", x = 2, y = 0.914, label = "*", size = 10, vjust = 0) +
annotate("segment", x = 3 - dodge_width, xend = 3 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
annotate("text", x = 3, y = 0.915, label = "ns", size = 10, vjust = 0) +
annotate("segment", x = 4 - dodge_width, xend = 4 + dodge_width, y = 0.914, yend = 0.914, size = 1.2) +
annotate("text", x = 4, y = 0.914, label = "*", size = 10, vjust = 0)
# 显示图表
print(p)
# 保存图形
ggsave("Simpson.png", plot = p, width = 8, height = 8, dpi = 1200, bg = "white")
###############################################################
library(ggplot2)
library(ggpubr)
library(dplyr)
library(multcompView)
index <- read.table('Pielou_index.txt', header = TRUE, row.names = 1)
# 绘制箱线图
p <- ggboxplot(index, x = "Type", y = "Pielou", color = "Group", width = 0.75,
size = 1.5, # 直接在ggboxplot中设置箱线图的线条粗细
add = "jitter",
add.params = list(size = 5, alpha = 0.7)) +
theme_minimal(base_size = 16) +
labs(x = "", y = "Pielou") + # 设置y轴标题
theme(legend.position = "top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black", linewidth = 1.5),
axis.text.x = element_text(size = 22, face = "plain", color = "black", angle = 0, hjust = 0.5),
axis.text.y = element_text(size = 22, face = "plain", color = "black"),
axis.title.y = element_text(size = 36, face = "plain", color = "black"), # 自定义Y轴标题文字大小
legend.title = element_blank(), # 移除图例标题
axis.ticks = element_line(color = "black"),
axis.ticks.length = unit(0.25, "cm"),
legend.key.size = unit(1.2, "cm"),
legend.text = element_text(size = 18)) +
scale_color_manual(values = c("B73" = "#8FC9E2", "Mo17" = "#ECC97F"))
# 手动添加显著性标记
dodge_width <- 0.375
p <- p +
annotate("segment", x = 1 - dodge_width, xend = 1 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
annotate("text", x = 1, y = 0.515, label = "**", size = 10, vjust = 0) +
annotate("segment", x = 2 - dodge_width, xend = 2 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
annotate("text", x = 2, y = 0.516, label = "ns", size = 10, vjust = 0) +
annotate("segment", x = 3 - dodge_width, xend = 3 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
annotate("text", x = 3, y = 0.516, label = "ns", size = 10, vjust = 0) +
annotate("segment", x = 4 - dodge_width, xend = 4 + dodge_width, y = 0.515, yend = 0.515, size = 1.2) +
annotate("text", x = 4, y = 0.515, label = "*", size = 10, vjust = 0)
# 显示图表
print(p)
# 保存图形
ggsave("Pielou.png", plot = p, width = 8, height = 8, dpi = 1200, bg = "white")
###############################################################
dataset$otu_table[dataset$otu_table < 0] <- 0
dataset$cal_betadiv(unifrac = FALSE) #beta 多样性
bray_curtis <- dataset$beta_diversity$bray
#write.csv(bray_curtis, file = "bray.csv", row.names = TRUE)
jaccard <- dataset$beta_diversity$jaccard
#write.csv(jaccard, file = "jaccard.csv", row.names = TRUE)
stability_matrix <- 1 - as.matrix(bray_curtis)
t1 <- trans_beta$new(dataset = dataset, group = "Group", measure = "bray")
t1$cal_group_distance(within_group = TRUE)
t1$cal_group_distance_diff(method = "wilcox")
t1$plot_group_distance(add = "mean")
rm(t1)
library(dplyr)
print(t1$res_group_distance)
#write.table(t1$res_group_distance, file = "res_group_distance.txt", sep = "\t", quote = FALSE, row.names = FALSE)
# 计算稳定性并赋值到数据框
stability <- t1$res_group_distance %>%
mutate(
Stability = 1 - Value # 计算稳定性
)
print(t1$res_group_distance_diff)
#write.table(t1$res_group_distance_diff, file = "res_group_distance_diff.txt", sep = "\t", quote = FALSE, row.names = FALSE)
# 计算每组的均值和标准差
gene_distance_summary <- stability %>%
group_by(Group) %>% # 按 Group 分组
summarise(
Mean_Distance = mean(Stability), # 计算均值
SD_Distance = sd(Stability), # 计算标准差
Lower_Bound = mean(Stability) - sd(Stability), # 均值 - SD
Upper_Bound = mean(Stability) + sd(Stability) # 均值 + SD
)
# 查看结果
print(gene_distance_summary)
write.table(gene_distance_summary, file = "gene_distance_summary.txt", sep = "\t", quote = FALSE, row.names = FALSE)
t1 <- trans_beta$new(dataset = dataset, group = "Subgroup", measure = "bray")
# 执行RMANOVA分析
t1$cal_manova(manova_all = FALSE)
# 获取分析结果
manova_results <- t1$res_manova
# 将结果保存到TXT文件
write.table(manova_results, file = "RMANOVA_results.txt", sep = "\t", quote = FALSE)
t1$cal_manova(manova_all = TRUE)
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_all.txt", sep = "\t", quote = FALSE)
t1$cal_manova(manova_all = FALSE, group = "Type", by_group = "Group")
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_Group.txt", sep = "\t", quote = FALSE)
t1$cal_manova(manova_all = FALSE, group = "Group", by_group = "Type")
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_Gene.txt", sep = "\t", quote = FALSE)
t1 <- trans_beta$new(dataset = dataset, group = "Group", measure = "bray")
t1$cal_manova(manova_all = FALSE)
manova_results <- t1$res_manova
write.table(manova_results, file = "RMANOVA_results_P.txt", sep = "\t", quote = FALSE)
###############################################################
library(vegan)
library(tidyverse)
library(ggalt)
library(car)
library(ggforce)
library(ggpubr)
library(patchwork)
pcoa <- cmdscale(bray_curtis, k = 2, eig = TRUE)
points <- as.data.frame(pcoa$points) %>% dplyr::rename(x = "V1", y = "V2")
eig <- pcoa$eig
rownames(points) <- rownames(sample_table)
points$ID <- rownames(points)
points$Subgroup <- sample_table[match(rownames(points), rownames(sample_table)), "Subgroup"]
n <- 0.85
colors <- c("B73_DAS28"="#8FC9E2","B73_DAS42"="#8FC9E2","B73_DAS56"="#8FC9E2","B73_DAS70"="#8FC9E2","Mo17_DAS28"="#ECC97F","Mo17_DAS42"="#ECC97F","Mo17_DAS56"="#ECC97F","Mo17_DAS70"="#ECC97F")
# 定义了颜色和形状的映射关系,用于后续的可视化。
shapes <- c("B73_DAS28"=24, "B73_DAS42"=22, "B73_DAS56"=21, "B73_DAS70"=23,
"Mo17_DAS28"=24, "Mo17_DAS42"=22, "Mo17_DAS56"=21, "Mo17_DAS70"=23)
levels_order <- c("B73_DAS28", "B73_DAS42", "B73_DAS56", "B73_DAS70", "Mo17_DAS28", "Mo17_DAS42", "Mo17_DAS56", "Mo17_DAS70") #定义顺序
points$Subgroup <- factor(points$Subgroup, levels = levels_order)
# 在ggplot中使用这些形状
p1 <- ggplot(points, aes(x = x, y = y, fill = Subgroup, shape = Subgroup)) + #创建了一个ggplot对象,设置了数据框points作为数据,设置了x和y作为x轴和y轴的变量,设置了Group作为填充色和形状的变量。
geom_point(alpha = .7, size = 5) + #添加了散点图层。alpha = .7设置了点的透明度,size = 5设置了点的大小。
scale_shape_manual(values = shapes) + # 设置了形状和填充色的映射关系。使用了之前定义的shapes和colors变量。
scale_fill_manual(values = colors) +
labs(x = paste("PCo1 (", format(100 * eig[1] / sum(eig), digits = 4), "%)", sep = ""),
y = paste("PCo2 (", format(100 * eig[2] / sum(eig), digits = 4), "%)", sep = "")) +
#设置了x轴、y轴和标题的标签。使用了paste函数将多个字符串连接在一起,使用了format函数将数字格式化为指定的小数位数。
geom_mark_ellipse(aes(fill = Subgroup), alpha = 0.1, color = "grey", linetype = 3) +
#添加了椭圆图层。每个椭圆代表一个组的样本,椭圆的位置和大小由组的样本的均值和标准差决定。
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.text = element_text(color = "black", size = 25),
axis.title.x = element_text(size = 28),
axis.title.y = element_text(size = 28),
plot.title = element_text(size = 16, face = "bold"),
legend.key.size = unit(1, "cm"),
legend.title = element_blank(),
legend.text = element_text(size = 14),
panel.border = element_blank(), # 去掉整个图的边框
axis.ticks = element_line(size = 1.5),
axis.ticks.length = unit(0.3, "cm"),
legend.position = "top",
legend.direction = "horizontal",
axis.line = element_line(color = "black", size = 1)) # 添加此行以显示坐标轴线
p1
# 使用ggsave保存PCoA图为PNG格式
ggsave(filename = "PCoA_plot.png", plot = p1, width = 10, height = 10, units = "in", dpi = 1200)
rm(colors)
###############################################################
# 使用 prcomp 计算 PCA
pca <- prcomp(bray_curtis, scale. = TRUE)
# 提取 PCA 结果
points <- as.data.frame(pca$x) %>% dplyr::rename(x = "PC1", y = "PC2")
eig <- pca$sdev^2 # 提取特征值
rownames(points) <- rownames(sample_table)
points$ID <- rownames(points)
points$Subgroup <- sample_table[match(rownames(points), rownames(sample_table)), "Subgroup"]
# 定义颜色和形状的映射关系
colors <- c("B73_DAS28"="#8FC9E2","B73_DAS42"="#8FC9E2","B73_DAS56"="#8FC9E2","B73_DAS70"="#8FC9E2",
"Mo17_DAS28"="#ECC97F","Mo17_DAS42"="#ECC97F","Mo17_DAS56"="#ECC97F","Mo17_DAS70"="#ECC97F")
shapes <- c("B73_DAS28"=24, "B73_DAS42"=22, "B73_DAS56"=21, "B73_DAS70"=23,
"Mo17_DAS28"=24, "Mo17_DAS42"=22, "Mo17_DAS56"=21, "Mo17_DAS70"=23)
levels_order <- c("B73_DAS28", "B73_DAS42", "B73_DAS56", "B73_DAS70",
"Mo17_DAS28", "Mo17_DAS42", "Mo17_DAS56", "Mo17_DAS70")
points$Subgroup <- factor(points$Subgroup, levels = levels_order)
# 绘制 PCA 图
p1 <- ggplot(points, aes(x = x, y = y, fill = Subgroup, shape = Subgroup)) +
geom_point(alpha = .7, size = 5) +
scale_shape_manual(values = shapes) +
scale_fill_manual(values = colors) +
labs(x = paste("PC1 (", format(100 * eig[1] / sum(eig), digits = 4), "%)", sep = ""),
y = paste("PC2 (", format(100 * eig[2] / sum(eig), digits = 4), "%)", sep = "")) +
geom_mark_ellipse(aes(fill = Subgroup), alpha = 0.1, color = "grey", linetype = 3) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.text = element_text(color = "black", size = 25),
axis.title.x = element_text(size = 28),
axis.title.y = element_text(size = 28),
plot.title = element_text(size = 16, face = "bold"),
legend.key.size = unit(1, "cm"),
legend.title = element_blank(),
legend.text = element_text(size = 14),
panel.border = element_blank(),
axis.ticks = element_line(size = 1.5),
axis.ticks.length = unit(0.3, "cm"),
legend.position = "top",
legend.direction = "horizontal",
axis.line = element_line(color = "black", size = 1))
p1
# 保存 PCA 图为 PNG 格式
ggsave(filename = "PCA_plot.png", plot = p1, width = 10, height = 10, units = "in", dpi = 1200)
###############################################################