R:贝叶斯线性回归
rm(list = ls())
library(dplyr)
library(broom)
library(ggplot2)
library(brms)
# 设置工作目录
setwd("C:\\Users\\Administrator\\Desktop\\machine learning\\Bayesian linear regression")
# 读取多样性数据
diversity_data <- read.table("alpha_diversity.txt", header = TRUE, sep = "\t")
# 读取分组数据
group_data <- read.table("group.txt", header = TRUE, sep = "\t")
# 合并两个数据集
merged_data <- merge(diversity_data, group_data, by.x = "SampleID", by.y = "SampleID")
# 确保因变量为数值型,自变量为因子型
merged_data$Time <- as.factor(merged_data$Time)
# 生成所有模型(包含时间差异)
diversity_metrics <- c("Shannon", "Simpson", "Pielou")
results <- lapply(diversity_metrics, function(metric) {
# 构建贝叶斯回归模型公式,关注时间(Time)变量的差异
formula <- as.formula(paste(metric, "~ Time"))
# 拟合贝叶斯回归模型
model <- brm(formula, data = merged_data, family = gaussian(),
prior = c(set_prior("normal(0, 5)", class = "b"),
set_prior("normal(0, 5)", class = "Intercept")),
iter = 2000, chains = 4) # 使用 4 条链,2000 次迭代
# 模型结果整理并加上指标名称
tidy(model) %>%
mutate(Metric = metric) # 添加指标名称
})
final_table <- bind_rows(results)
# 合并所有结果
final_table <- bind_rows(results)
# 筛选需要的列并重命名
final_table <- final_table %>%
select(Metric, term, estimate, std.error, conf.low, conf.high) %>% # 使用 conf.low 和 conf.high
rename(Variable = term,
Posterior_Mean = estimate, # 贝叶斯回归中的后验均值
Posterior_SD = std.error, # 贝叶斯回归中的后验标准差
CI_Lower = conf.low, # 置信区间下限
CI_Upper = conf.high) # 置信区间上限
# 输出结果表格
print(final_table)
# 将结果保存为CSV文件
write.csv(final_table, "alpha_regression_results.csv", row.names = TRUE)