R:PLS-DA模型
# 清除所有变量
rm(list = ls())
# 设置工作目录
setwd("C:\\Users\\Administrator\\Desktop\\新建文件夹\\PLS_Pathway")
# 1. 加载所需的库
library(vegan)
library(tidyverse)
library(ggpubr)
library(patchwork)
library(ggforce)
library(mixOmics)
library(patchwork)
# 2. 读取和处理数据
otu <- read.table("./Path_table_A.txt", row.names = 1, sep = "\t", header = TRUE) %>% as.data.frame()
map <- read.table("./group.txt", sep = "\t", header = TRUE)
colnames(map)[1] <- "ID"
row.names(map) <- map$ID
idx <- rownames(map) %in% colnames(otu)
map1 <- map[idx,]
otu <- otu[, rownames(map1)]
# 准备PLS-DA分析的数据和分组信息
X <- t(otu) # Transposed OTU data
Y <- map1$Group # Group information
# 进行PLS-DA分析
plsda_result <- plsda(X, Y, ncomp = 2) # ncomp是你希望保留的成分数目
# 可视化PLS-DA结果
# plotIndiv函数用于绘制个体样本的分布,其中包括了样本的分类信息
plotIndiv(plsda_result,
group = Y,
title = 'PLS-DA',
ellipse = TRUE,
legend = TRUE)
# 对PLS-DA模型进行性能评估,这次使用正确的参数值'Mfold'
plsda.perf <- perf(plsda_result, validation = "Mfold", folds = 8, progressBar = FALSE)
# 获取第一和第二主成分的X和Y的解释率
R2X_comp1 <- round(plsda_result$prop_expl_var$X[1] * 100, 2)
R2X_comp2 <- round(plsda_result$prop_expl_var$X[2] * 100, 2)
R2Y_comp1 <- round(plsda_result$prop_expl_var$Y[1] * 100, 2)
R2Y_comp2 <- round(plsda_result$prop_expl_var$Y[2] * 100, 2)
# 构造坐标轴标题
xlab_text <- paste("Component 1 (R2X =", R2X_comp1, "%, R2Y =", R2Y_comp1, "%)")
ylab_text <- paste("Component 2 (R2X =", R2X_comp2, "%, R2Y =", R2Y_comp2, "%)")
# 假设已有plsda.perf结果
# 访问错误率,可能需要根据perf对象的具体内容调整
error_rates <- plsda.perf$error.rate
# 选择第一个成分的overall错误率和"max.dist"距离度量
selected_error_rate <- error_rates$overall["comp1", "max.dist"]
# 计算一个近似的Q2值
q2_approx <- 1 - selected_error_rate
# 打印近似的Q2值
print(q2_approx)
title_text <- paste("PLS-DA: Q2 =", round(q2_approx * 100, 2), "%")
# 直接从PLS-DA结果对象中提取X方向的得分
scores_plsda <- plsda_result$variates$X
# 首先将矩阵转换为数据框
scores_plsda_df <- as.data.frame(scores_plsda)
# 然后使用dplyr的rename函数重命名列
points_plsda <- dplyr::rename(scores_plsda_df, x = comp1, y = comp2)
# 将样本的分组信息添加到数据框架 - 确保这一步是在重命名之后执行
points_plsda$Group <- map1$Group[match(rownames(points_plsda), rownames(map1))]
# 定义颜色和形状
colors <- c("B73_DAS28"="#8FC9E2","B73_DAS42"="#8FC9E2","B73_DAS56"="#8FC9E2","B73_DAS70"="#8FC9E2","Mo17_DAS28"="#ECC97F","Mo17_DAS42"="#ECC97F","Mo17_DAS56"="#ECC97F","Mo17_DAS70"="#ECC97F")
shapes <- c("B73_DAS28"=24, "B73_DAS42"=22, "B73_DAS56"=21, "B73_DAS70"=23,
"Mo17_DAS28"=24, "Mo17_DAS42"=22, "Mo17_DAS56"=21, "Mo17_DAS70"=23)
levels_order <- c("B73_DAS28", "B73_DAS42", "B73_DAS56", "B73_DAS70", "Mo17_DAS28", "Mo17_DAS42", "Mo17_DAS56", "Mo17_DAS70") #定义顺序
points_plsda$Group <- factor(points_plsda$Group, levels = levels_order)
# 使用这些标题在ggplot中
p1_plsda <- ggplot(points_plsda, aes(x = x, y = y, fill = Group, shape = Group)) +
geom_point(alpha = .7, size = 6) +
scale_shape_manual(values = shapes) +
scale_fill_manual(values = colors) +
labs(x = xlab_text, y = ylab_text, title = title_text) + # 使用xlab_text和ylab_text以及title_text
geom_mark_ellipse(aes(fill = Group, label = Group), alpha = 0.1, color = "grey", linetype = 3) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text = element_text(color = "black", size = 12),
axis.title = element_text(size = 16),
legend.text = element_text(size = 14),
legend.title = element_blank(),
plot.title = element_text(size = 16, face = "bold"),
panel.border = element_rect(colour = "black", fill=NA, linewidth=2),
axis.ticks = element_line(linewidth = 2),
legend.key.size = unit(1, "cm")) +
coord_cartesian(xlim = c(-max(abs(points_plsda$x)) * 1.1, max(abs(points_plsda$x)) * 1.1),
ylim = c(-max(abs(points_plsda$y)) * 1.1, max(abs(points_plsda$y)) * 1.1)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
geom_hline(yintercept = 0, linetype = "dashed", color = "black")
# 显示PLS-DA绘制的图
print(p1_plsda)
# 保存PLS-DA图
ggsave(filename = "PLS-DA_plot.png", plot = p1_plsda, width = 10, height = 8, units = "in", dpi = 600)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· .NET10 - 预览版1新功能体验(一)