R:PLS-DA模型

# 清除所有变量
rm(list = ls()) 
# 设置工作目录
setwd("C:\\Users\\Administrator\\Desktop\\新建文件夹\\PLS_Pathway") 

# 1. 加载所需的库
library(vegan)
library(tidyverse)
library(ggpubr)
library(patchwork)
library(ggforce)
library(mixOmics)
library(patchwork)



# 2. 读取和处理数据
otu <- read.table("./Path_table_A.txt", row.names = 1, sep = "\t", header = TRUE) %>% as.data.frame()
map <- read.table("./group.txt", sep = "\t", header = TRUE) 
colnames(map)[1] <- "ID" 
row.names(map) <- map$ID 
idx <- rownames(map) %in% colnames(otu) 
map1 <- map[idx,]
otu <- otu[, rownames(map1)]

# 准备PLS-DA分析的数据和分组信息
X <- t(otu) # Transposed OTU data
Y <- map1$Group # Group information


# 进行PLS-DA分析
plsda_result <- plsda(X, Y, ncomp = 2) # ncomp是你希望保留的成分数目

# 可视化PLS-DA结果
# plotIndiv函数用于绘制个体样本的分布,其中包括了样本的分类信息
plotIndiv(plsda_result, 
          group = Y, 
          title = 'PLS-DA', 
          ellipse = TRUE, 
          legend = TRUE)

# 对PLS-DA模型进行性能评估,这次使用正确的参数值'Mfold'
plsda.perf <- perf(plsda_result, validation = "Mfold", folds = 8, progressBar = FALSE)

# 获取第一和第二主成分的X和Y的解释率
R2X_comp1 <- round(plsda_result$prop_expl_var$X[1] * 100, 2)
R2X_comp2 <- round(plsda_result$prop_expl_var$X[2] * 100, 2)
R2Y_comp1 <- round(plsda_result$prop_expl_var$Y[1] * 100, 2)
R2Y_comp2 <- round(plsda_result$prop_expl_var$Y[2] * 100, 2)

# 构造坐标轴标题
xlab_text <- paste("Component 1 (R2X =", R2X_comp1, "%, R2Y =", R2Y_comp1, "%)")
ylab_text <- paste("Component 2 (R2X =", R2X_comp2, "%, R2Y =", R2Y_comp2, "%)")


# 假设已有plsda.perf结果
# 访问错误率,可能需要根据perf对象的具体内容调整
error_rates <- plsda.perf$error.rate

# 选择第一个成分的overall错误率和"max.dist"距离度量
selected_error_rate <- error_rates$overall["comp1", "max.dist"]

# 计算一个近似的Q2值
q2_approx <- 1 - selected_error_rate

# 打印近似的Q2值
print(q2_approx)
title_text <- paste("PLS-DA: Q2 =", round(q2_approx * 100, 2), "%")



# 直接从PLS-DA结果对象中提取X方向的得分
scores_plsda <- plsda_result$variates$X

# 首先将矩阵转换为数据框
scores_plsda_df <- as.data.frame(scores_plsda)

# 然后使用dplyr的rename函数重命名列
points_plsda <- dplyr::rename(scores_plsda_df, x = comp1, y = comp2)

# 将样本的分组信息添加到数据框架 - 确保这一步是在重命名之后执行
points_plsda$Group <- map1$Group[match(rownames(points_plsda), rownames(map1))]

# 定义颜色和形状
colors <- c("B73_DAS28"="#8FC9E2","B73_DAS42"="#8FC9E2","B73_DAS56"="#8FC9E2","B73_DAS70"="#8FC9E2","Mo17_DAS28"="#ECC97F","Mo17_DAS42"="#ECC97F","Mo17_DAS56"="#ECC97F","Mo17_DAS70"="#ECC97F")
shapes <- c("B73_DAS28"=24, "B73_DAS42"=22, "B73_DAS56"=21, "B73_DAS70"=23, 
            "Mo17_DAS28"=24, "Mo17_DAS42"=22, "Mo17_DAS56"=21, "Mo17_DAS70"=23)
levels_order <- c("B73_DAS28", "B73_DAS42", "B73_DAS56", "B73_DAS70", "Mo17_DAS28", "Mo17_DAS42", "Mo17_DAS56", "Mo17_DAS70") #定义顺序
points_plsda$Group <- factor(points_plsda$Group, levels = levels_order)

# 使用这些标题在ggplot中
p1_plsda <- ggplot(points_plsda, aes(x = x, y = y, fill = Group, shape = Group)) + 
  geom_point(alpha = .7, size = 6) + 
  scale_shape_manual(values = shapes) + 
  scale_fill_manual(values = colors) +
  labs(x = xlab_text, y = ylab_text, title = title_text) +  # 使用xlab_text和ylab_text以及title_text
  geom_mark_ellipse(aes(fill = Group, label = Group), alpha = 0.1, color = "grey", linetype = 3) +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text = element_text(color = "black", size = 12),
        axis.title = element_text(size = 16),
        legend.text = element_text(size = 14),
        legend.title = element_blank(), 
        plot.title = element_text(size = 16, face = "bold"),
        panel.border = element_rect(colour = "black", fill=NA, linewidth=2),
        axis.ticks = element_line(linewidth = 2),
        legend.key.size = unit(1, "cm")) +
  coord_cartesian(xlim = c(-max(abs(points_plsda$x)) * 1.1, max(abs(points_plsda$x)) * 1.1), 
                  ylim = c(-max(abs(points_plsda$y)) * 1.1, max(abs(points_plsda$y)) * 1.1)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black")

# 显示PLS-DA绘制的图
print(p1_plsda)

# 保存PLS-DA图
ggsave(filename = "PLS-DA_plot.png", plot = p1_plsda, width = 10, height = 8, units = "in", dpi = 600)

 

posted @ 2024-03-27 17:15  王哲MGG_AI  阅读(135)  评论(0编辑  收藏  举报