R:Beta回归分析

rm(list = ls())
library(readr)      # 读取 CSV 文件
library(dplyr)      # 数据操作
library(tidyr)      # 数据整理
library(betareg)    # Beta 回归
library(tibble) 
setwd("C:\\Users\\Administrator\\Desktop\\machine learning\\Multiple Linear Regression")
# 数据导入
# 读取矩阵和分组文件
bray_matrix <- read.csv("bray.csv", row.names = 1)
jaccard_matrix <- read.csv("jaccard.csv", row.names = 1)
group_info <- read_delim("group_beta.txt", delim = "\t")

# --- 1. 计算组内稳定性 ---
calculate_stability <- function(matrix_data, group_info) {
  long_data <- matrix_data %>%
    as.matrix() %>%
    as.table() %>%
    as.data.frame() %>%
    setNames(c("Sample1", "Sample2", "Distance")) %>%
    left_join(group_info, by = c("Sample1" = "SampleID")) %>%
    rename(Group1 = Gene) %>%
    left_join(group_info, by = c("Sample2" = "SampleID")) %>%
    rename(Group2 = Gene) %>%
    filter(Group1 == Group2, Distance != 0) %>% # 只保留组内非零距离
    mutate(Stability = 1 - Distance)
  
  return(long_data)
}

stability_long <- calculate_stability(bray_matrix, group_info)

# --- 2. 转换距离矩阵为全样本长格式,并引入分组信息 ---
convert_to_long <- function(matrix_data, group_info) {
  long_data <- matrix_data %>%
    as.matrix() %>%
    as.table() %>%
    as.data.frame() %>%
    setNames(c("Sample1", "Sample2", "Distance")) %>%
    left_join(group_info, by = c("Sample1" = "SampleID")) %>%
    rename(Group1 = Gene) %>%
    left_join(group_info, by = c("Sample2" = "SampleID")) %>%
    rename(Group2 = Gene) %>%
    filter(Distance != 0) # 移除自己与自己的比较
  return(long_data)
}

bray_long <- convert_to_long(bray_matrix, group_info)
jaccard_long <- convert_to_long(jaccard_matrix, group_info)

# --- 3. Beta 回归分析函数 ---
perform_beta_regression <- function(data, response, predictors) {
  formula <- as.formula(paste(response, "~", paste(predictors, collapse = "+")))
  model <- betareg(formula, data = data)
  
  # 提取回归结果
  coefficients <- summary(model)$coefficients$mean
  result_table <- data.frame(
    Variable = rownames(coefficients),
    Beta = coefficients[, "Estimate"],
    SE = coefficients[, "Std. Error"],
    p_value = coefficients[, "Pr(>|z|)"]
  )
  return(result_table)
}

# --- 4. 进行回归分析 ---
# (1) 稳定性回归分析
stability_results <- perform_beta_regression(stability_long, "Stability", c("Group1"))

# (2) Bray–Curtis 距离回归分析
bray_results <- perform_beta_regression(bray_long, "Distance", c("Group1"))

# (3) Jaccard 距离回归分析
jaccard_results <- perform_beta_regression(jaccard_long, "Distance", c("Group1"))

# --- 5. 合并结果表 ---
final_table <- bind_rows(
  mutate(stability_results, Metric = "Stability (1 - Bray–Curtis, Within Group)"),
  mutate(bray_results, Metric = "Bray–Curtis distance"),
  mutate(jaccard_results, Metric = "Jaccard Index distance")
) %>%
  select(Metric, everything())

# 打印结果
print(final_table)

write.table(final_table, "Beta_Regression.txt", row.names = TRUE, col.names = TRUE,sep = "\t", quote = FALSE)

######################################################
# --- 6. 根据 Gene 计算均值和标准差 ---
calculate_group_stats <- function(data) {
  stats <- data %>%
    group_by(Group1) %>%
    summarise(
      Mean = mean(Stability, na.rm = TRUE),
      SD = sd(Stability, na.rm = TRUE)
    )
  return(stats)
}

group_stats <- calculate_group_stats(stability_long)

# --- 7. PERMANOVA 检验 ---
library(vegan)  # 加载 vegan 包用于 PERMANOVA

perform_permanova <- function(data, response, grouping_var, n_permutations = 999) {
  # 使用距离矩阵进行 PERMANOVA 检验
  distance_matrix <- dist(data[[response]])  # 计算距离矩阵
  permanova_result <- adonis2(distance_matrix ~ data[[grouping_var]], permutations = n_permutations)
  
  return(permanova_result)
}

# 执行 PERMANOVA 检验
permanova_result <- perform_permanova(stability_long, "Stability", "Group1", n_permutations = 999)

# --- 8. 保存均值和标准差到文件 ---
write.table(group_stats, "Stability_Stats.txt", row.names = FALSE, col.names = TRUE, sep = "\t", quote = FALSE)

# --- 9. 保存 PERMANOVA 检验结果到文件 ---

# 保存 PERMANOVA 检验结果
write.table(permanova_result, "permanova_result.txt", row.names = FALSE, col.names = TRUE, sep = "\t", quote = FALSE)

 

posted @ 2024-11-28 17:25  王哲MGG_AI  阅读(6)  评论(0编辑  收藏  举报