R常用统计 - 相关关系分析
数据格式
每行分别为表型和基因表达情况对应标量,每列分别为样品名的矩阵。假定前9列为phenotype,从第10行起为gene_id,编写简单for循环如下:
script
install.package("lessR")
library(lessR)
data = read.table("c:/Users/****/Desktop/yourfile.txt",
header = T,row.names = 1,na.strings = T,sep = "\t") #input your file
results = NULL
for (i in 1:9) {#
for (j in 10:nrow(data)){
if (median(as.numeric(data[j,]))>=1e-5) {
x = data[i,]
y = data[j,]
tem_res = Correlation(x,
y,
meth = "spearman",
adjt = "fdr",
heat_map = F)
term_result = data.frame(row.names(data)[i],row.names(data)[j],tem_res$r,tem_res$pvalue)
results = rbind(results,term_result)
}
}
}
colnames(results) = c("phenotype","gene_id","r_value","p_value")
write.csv(results,"correlation_results.csv",row.names = F)