生信自学记录

08肿瘤微环境相关性分析

一、准备输入文件

将转录组矩阵作为输入文件

 

二、免疫环境评分

panGenes14.estimate.R

#library(utils)
#rforge <- "http://r-forge.r-project.org"
#install.packages("estimate", repos=rforge, dependencies=TRUE)

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("limma")


#引用包
library(limma)
library(estimate)
#setwd("D:\\biowolf\\panGenes\\14.estimate")        #设置工作目录

#读取目录下的文件
files=dir()
files=grep("^symbol.",files,value=T)

outTab=data.frame()
for(i in files){
  #读取文件
  CancerType=gsub("symbol\\.|\\.txt","",i)
  #CancerType="ACC"
  rt=read.table(i,sep="\t",header=T,check.names=F)
  #rt=read.table("symbol.ACC.txt",sep="\t",header=T,check.names=F)
  
  #如果一个基因占了多行,取均值
  rt=as.matrix(rt)
  rownames(rt)=rt[,1]
  exp=rt[,2:ncol(rt)]
  dimnames=list(rownames(exp),colnames(exp))
  data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
  data=avereps(data)
  
  #删除正常,只保留肿瘤样品
  group=sapply(strsplit(colnames(data),"\\-"),"[",4)
  group=sapply(strsplit(group,""),"[",1)
  group=gsub("2","1",group)
  data=data[,group==0]
  out=data[rowMeans(data)>0,]
  out=rbind(ID=colnames(out),out)
  #输出整理后的矩阵文件
  write.table(out,file="uniq.symbol.txt",sep="\t",quote=F,col.names=F)
  
  #运行estimate包
  uniqFile="uniq.symbol.txt"
  inputDs="commonGenes.gct"
  outputDs="estimateScore.gct"
  filterCommonGenes(input.f=uniqFile, output.f=inputDs, id="GeneSymbol")
  estimateScore(input.ds =inputDs ,output.ds=outputDs)
  
  #输出每个样品的打分
  scores=read.table("estimateScore.gct",skip = 2,header = T)
  rownames(scores)=scores[,1]
  scores=t(scores[,3:ncol(scores)])
  rownames(scores)=gsub("\\.","\\-",rownames(scores))
  outTab=rbind(outTab,cbind(scores,CancerType))
  file.remove(uniqFile)
  file.remove(inputDs)
  file.remove(outputDs)
}
out=cbind(ID=row.names(outTab),outTab)
write.table(out,file="estimateScores.txt",sep="\t",quote=F,row.names=F)

三、肿瘤微环境相关性StromalScore和ImmuneScore

panGenes15.estimateCor.R

#install.packages("corrplot")

library(corrplot)                    #引用包
expFile="panGeneExp.txt"             #表达输入文件
scoreFile="estimateScores.txt"       #免疫微环境结果文件
scoreType="StromalScore"             #打分类型
#scoreType="ImmuneScore"             #打分类型

#setwd("D:\\biowolf\\panGenes\\15.estimateCor")     #设置工作目录

#读取表达文件
exp=read.table(expFile, header=T,sep="\t",row.names=1,check.names=F)
exp=exp[(exp[,"Type"]=="Tumor"),]

#读取肿瘤微环境文件
TME=read.table(scoreFile, header=T,sep="\t",row.names=1,check.names=F)

#样品取交集
sameSample=intersect(row.names(TME),row.names(exp))
TME=TME[sameSample,]
exp=exp[sameSample,]

#相关性检验
outTab=data.frame()
pTab=data.frame()
#按肿瘤类型循环
for(i in levels(factor(exp[,"CancerType"]))){
  exp1=exp[(exp[,"CancerType"]==i),]
  TME1=TME[(TME[,"CancerType"]==i),]
  x=as.numeric(TME1[,scoreType])
  pVector=data.frame(i)
  outVector=data.frame(i)
  #按基因循环
  genes=colnames(exp1)[1:(ncol(exp1)-2)]
  for(j in genes){
    y=as.numeric(exp1[,j])
    corT=cor.test(x,y,method="spearman")
    cor=corT$estimate
    pValue=corT$p.value
    pVector=cbind(pVector,pValue)
    outVector=cbind(outVector,cor)
  }
  pTab=rbind(pTab,pVector)
  outTab=rbind(outTab,outVector)
}
#输出相关性的表格
colNames=c("CancerType",colnames(exp1)[1:(ncol(exp1)-2)])
colnames(outTab)=colNames
write.table(outTab,file="estimateCor.cor.txt",sep="\t",row.names=F,quote=F)
#输出相关性检验p值的表格
colnames(pTab)=colNames
write.table(pTab,file="estimateCor.pval.txt",sep="\t",row.names=F,quote=F)

#绘制微环境相关性图形
pdf(file=paste0(scoreType,".pdf"),height=5,width=8)
row.names(outTab)=outTab[,1]
outTab=outTab[,-1]
corrplot(corr=as.matrix(t(outTab)),
         title=paste0("\n\n\n\n",scoreType),
         col=colorRampPalette(c("blue", "white", "red"))(50))
dev.off()

 

 四、肿瘤干细胞相关性(RNAss)

需要使用StemnessScores_RNAexp_20170127.2.tsv

panGenes16.RNAssCor.R

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("limma")

#install.packages("corrplot")


#引用包
library(limma)
library(corrplot)

expFile="panGeneExp.txt"                            #表达输入文件
scoreFile="StemnessScores_RNAexp_20170127.2.tsv"    #RNAss文件
scoreType="RNAss"                                   #干细胞指数
#setwd("D:\\biowolf\\panGenes\\16.RNAssCor")     #设置工作目录

#读取表达文件
exp=read.table(expFile, header=T,sep="\t",row.names=1,check.names=F)
exp=exp[(exp[,"Type"]=="Tumor"),]

#读取RNAss文件
STEM=read.table(scoreFile, header=T,sep="\t",row.names=1,check.names=F)
STEM=t(STEM)

#相关性检验
outTab=data.frame()
pTab=data.frame()
#按肿瘤类型循环
for(i in levels(factor(exp[,"CancerType"]))){
  exp1=exp[(exp[,"CancerType"]==i),]
  exp1=as.matrix(exp1[,1:(ncol(exp1)-2)])
  row.names(exp1)=gsub(".$","",row.names(exp1))
  exp1=avereps(exp1)
  
  #样品取交集
  sameSample=intersect(row.names(STEM),row.names(exp1))
  STEM1=STEM[sameSample,]
  exp1=exp1[sameSample,]
  
  x=as.numeric(STEM1[,scoreType])
  pVector=data.frame(i)
  outVector=data.frame(i)
  #按基因循环
  genes=colnames(exp1)
  for(j in genes){
    y=as.numeric(exp1[,j])
    corT=cor.test(x,y,method="spearman")
    cor=corT$estimate
    pValue=corT$p.value
    pVector=cbind(pVector,pValue)
    outVector=cbind(outVector,cor)
  }
  pTab=rbind(pTab,pVector)
  outTab=rbind(outTab,outVector)
}
#输出相关性的表格
colNames=c("CancerType",colnames(exp)[1:(ncol(exp)-2)])
colnames(outTab)=colNames
write.table(outTab,file="RNAssCor.cor.txt",sep="\t",row.names=F,quote=F)
#输出相关性检验p值的表格
colnames(pTab)=colNames
write.table(pTab,file="RNAssCor.pval.txt",sep="\t",row.names=F,quote=F)

#RNAss相关性图形
pdf("RNAssCor.pdf",height=5,width=8)
row.names(outTab)=outTab[,1]
outTab=outTab[,-1]
corrplot(corr=as.matrix(t(outTab)),
         title=paste0("\n\n\n\n",scoreType),
         col=colorRampPalette(c("blue", "white", "red"))(50))
dev.off()

 

 

五、肿瘤干细胞相关性(DNAss)

需要使用StemnessScores_DNAmeth_20170210.tsv

panGenes17.DNAssCor.R

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("limma")

#install.packages("corrplot")


#引用包
library(limma)
library(corrplot)

expFile="panGeneExp.txt"                            #表达输入文件
scoreFile="StemnessScores_DNAmeth_20170210.tsv"     #DNAss文件
scoreType="DNAss"                                   #干细胞指数
#setwd("D:\\biowolf\\panGenes\\17.DNAssCor")     #设置工作目录

#读取表达文件
exp=read.table(expFile, header=T,sep="\t",row.names=1,check.names=F)
exp=exp[(exp[,"Type"]=="Tumor"),]

#读取DNAss文件
STEM=read.table(scoreFile, header=T,sep="\t",row.names=1,check.names=F)
STEM=t(STEM)

#相关性检验
outTab=data.frame()
pTab=data.frame()
#按肿瘤类型循环
for(i in levels(factor(exp[,"CancerType"]))){
  exp1=exp[(exp[,"CancerType"]==i),]
  exp1=as.matrix(exp1[,1:(ncol(exp1)-2)])
  row.names(exp1)=gsub(".$","",row.names(exp1))
  exp1=avereps(exp1)
  
  #样品取交集
  sameSample=intersect(row.names(STEM),row.names(exp1))
  STEM1=STEM[sameSample,]
  exp1=exp1[sameSample,]
  
  x=as.numeric(STEM1[,scoreType])
  pVector=data.frame(i)
  outVector=data.frame(i)
  #按基因循环
  genes=colnames(exp1)
  for(j in genes){
    y=as.numeric(exp1[,j])
    corT=cor.test(x,y,method="spearman")
    cor=corT$estimate
    pValue=corT$p.value
    pVector=cbind(pVector,pValue)
    outVector=cbind(outVector,cor)
  }
  pTab=rbind(pTab,pVector)
  outTab=rbind(outTab,outVector)
}
#输出相关性的表格
colNames=c("CancerType",colnames(exp)[1:(ncol(exp)-2)])
colnames(outTab)=colNames
write.table(outTab,file="DNAssCor.cor.txt",sep="\t",row.names=F,quote=F)
#输出相关性检验p值的表格
colnames(pTab)=colNames
write.table(pTab,file="DNAssCor.pval.txt",sep="\t",row.names=F,quote=F)

#DNAss相关性图形
pdf("DNAssCor.pdf",height=5,width=8)
row.names(outTab)=outTab[,1]
outTab=outTab[,-1]
corrplot(corr=as.matrix(t(outTab)),
         title=paste0("\n\n\n\n",scoreType),
         col=colorRampPalette(c("blue", "white", "red"))(50))
dev.off()

 

 

posted on 2023-11-12 00:27  古城2022  阅读(86)  评论(0编辑  收藏  举报

导航