R-多元线性回归的建模和验证

问题描述

分析地面采集的光谱和LiDAR结构信息,估计由于病虫害引起的失叶率

  1. 由光谱信息建立模型
  2. 由结构信息建立模型
  3. 由光谱信息和结构信息相结合建立模型
  4. 分别画预测和验证集散点图,计算R2和RMSE

代码实现

train = read.csv("F:/RData/实验一/实验数据/Simplified_Canopy_defoliation_analysis_training-simp.csv")
test = read.csv("F:/RData/实验一/实验数据/Simplified_Canopy_defoliation_analysis_vald-simp.csv")
mydata_train = train[-15,]  #去除有NA值的一行

#library(Hmisc)
r = rcorr(as.matrix(mydata_train[,2:14]), type = 'pearson')
r
#library(corrplot)
corrplot(r$r, tl.pos = 'upper', tl.cex = 0.8, tl.col = 'black', bg = 'gray')
corrplot(r$P, add=TRUE, type="lower", method="number", diag = FALSE, tl.pos = "n", cl.pos="n", bg = 'gray')
#pearson相关系数表明自变量NONPHO_fraction_16和dNONPHO_fraction之间存在强负相关性

#光谱信息
model_1 = lm(Average.Defol..Status.x ~ NONPHO_fraction_15 + GV_fraction_15 + 
               Shade_fraction_15 + Fra_under_15 + 
               NONPHO_fraction_16 + GV_fraction_16 + 
               dFra_UDS, data = mydata_train)
summary(model_1)

#去除不显著的预测变量
model_11 = lm(Average.Defol..Status.x ~ Shade_fraction_15 + GV_fraction_16 + dFra_UDS, data = mydata_train)
summary(model_11)

#结构信息
model_2 = lm(Average.Defol..Status.x ~ b70_16 + dske + dkur + dint_p25, data = mydata_train)
summary(model_2)

model_21 = lm(Average.Defol..Status.x ~ b70_16 + dint_p25, data = mydata_train)
summary(model_21)

#光谱和结构
model_3 = lm(Average.Defol..Status.x ~ NONPHO_fraction_15 + GV_fraction_15 + 
               Shade_fraction_15 + Fra_under_15 + 
               NONPHO_fraction_16 + GV_fraction_16 + 
               dFra_UDS + b70_16 + dske + 
               dkur + dint_p25, data = mydata_train)
summary(model_3)

#去除不显著的预测变量和截距项
model_31 = lm(Average.Defol..Status.x ~  0 + Shade_fraction_15 + GV_fraction_16 + 
                dFra_UDS + b70_16, data = mydata_train)
summary(model_31)

#进行预测和对比
tru = test[,10]
n = length(tru)
predict_data = matrix(0, n, 3)
predict_data[,1] = predict(model_1, test)
predict_data[,2] = predict(model_2, test)
predict_data[,3] = predict(model_31, test)
tit = c('光谱信息','结构信息','光谱信息和结构信息')
for (i in 1:3) {
  plot(x = predict_data[,i], y = tru, xlab = 'Predict', ylab = 'True', xlim = c(-20,100), ylim = c(-20,100), main = tit[i])
  lines(x = c(-20,100), y = c(-20,100), lty = 5)
}

for (i in 1:3) {
  plot(predict_data[,i], col = 'blue', type = 'b', xlab = 'Row', ylab = 'Average.Defol..Status', ylim = c(-20,100), main = tit[i])
  lines(tru, col = 'red', type = 'b')
  legend('bottomleft', legend = c('predict','true'), pch = c(1,1), col = c('blue','red'))
}

#计算R_squre
pre = predict_data
sse = apply((pre-tru)^2, 2, sum)
sst = sum((tru- mean(tru))^2)
R_squre = 1- sse/sst
R_squre

#计算RMSE
RMSE=sqrt((apply((tru-pre)^2, 2, sum))/n)
RMSE

 

  

 

posted @ 2022-06-02 23:45  Khru  阅读(133)  评论(0编辑  收藏  举报