R:计算均值和标准差并Wilcoxon秩和检验

rm(list = ls()) # 清除所有变量
setwd("C:\\Users\\Administrator\\Desktop\\新建文件夹\\FUNGuild function") # 设置工作目录

# 加载所需的包
library(tidyverse)
library(reshape2)
library(dplyr)

# 读取数据
abundance_file <- "vegan_relative_abundance.txt" # 功能丰度文件路径
group_file <- "metadata.txt" # 样本分组文件路径

abundance_data <- read.csv(abundance_file, sep = "\t", row.names = 1, check.names = FALSE)
group_data <- read.csv(group_file, sep = "\t", header = TRUE)
colnames(group_data) <- c("Sample", "Group")

# 转换丰度数据为长格式
abundance_long <- as.data.frame(t(abundance_data))
abundance_long$Sample <- rownames(abundance_long)
melted_data <- reshape2::melt(abundance_long, id.vars = "Sample")

# 合并分组信息
merged_data <- merge(melted_data, group_data, by = "Sample")

# 对每个功能进行分组统计
group_stats <- merged_data %>%
  group_by(variable, Group) %>%
  summarise(Mean = mean(value), SD = sd(value), .groups = 'drop')

# 准备输出数据框
output_data <- data.frame(Feature = character(),
                          Group1 = character(),
                          Group1_Mean = numeric(),
                          Group1_SD = numeric(),
                          Group2 = character(),
                          Group2_Mean = numeric(),
                          Group2_SD = numeric(),
                          P.value = numeric(),
                          Significance = character())

# 对每个功能进行Wilcoxon秩和检验
for(feature in unique(merged_data$variable)) {
  feature_data <- subset(merged_data, variable == feature)
  groups <- unique(feature_data$Group)
  if(length(groups) == 2) { # 确保只有两组数据
    test_result <- wilcox.test(value ~ Group, data = feature_data)
    p.value <- test_result$p.value
    # 根据P值添加显著性星号
    sig <- ifelse(p.value < 0.001, "***", ifelse(p.value < 0.01, "**", ifelse(p.value < 0.05, "*", "ns")))
    # 获取每组的均值和标准差
    stats1 <- filter(group_stats, variable == feature & Group == groups[1])
    stats2 <- filter(group_stats, variable == feature & Group == groups[2])
    output_data <- rbind(output_data, data.frame(Feature = feature,
                                                 Group1 = groups[1],
                                                 Group1_Mean = stats1$Mean,
                                                 Group1_SD = stats1$SD,
                                                 Group2 = groups[2],
                                                 Group2_Mean = stats2$Mean,
                                                 Group2_SD = stats2$SD,
                                                 P.value = p.value,
                                                 Significance = sig))
  }
}

# 输出结果到TXT文件
write.table(output_data, "output_results_wilcoxon_with_stats.txt", row.names = FALSE, sep = "\t", quote = FALSE)

 

posted @ 2024-03-19 15:45  王哲MGG_AI  阅读(29)  评论(0编辑  收藏  举报