Python|对时序遥感图像实现逐像元线性回归
简介与技术流程
总的来说,就是把多幅影像叠加在一起,逐像元构建一组时间序列,然后对每一个时间序列进行线性回归。比方说,你研究区覆盖的像元是100*100的,时间序列是从2011到2015每年一幅影像,那么你就需要构建10000组时间序列,每组里面包含5个数据。把它放在一个坐标系中,横轴是时间,纵轴是每个时间下的像元值,一组时间序列构建一条折线,比较直观的可以参考下图。
代码
要调用的包
from osgeo import gdal
import numpy as np
from sklearn import linear_model
osgeo里的gdal是用来处理tif数据的。
numpy用来做数据分析统计。
sklearn里的linear_model是用来用直线拟合时间序列求斜率等的。
逐像元一元线性回归代码
逐像元的进行一元线性回归,得到每个像元对应时间序列的系数,截距,P值,R方并将其分别作为波段写入一个新的tif文件中,便于后期绘制专题图进行分析,注释比较详细,直接看代码:
"""
time: 2022-08-02
coder: welt
"""
import numpy as np
import sklearn
from osgeo import gdal
from sklearn.linear_model import LinearRegression
from tqdm import tqdm
from scipy.stats import f
from sklearn.metrics import r2_score
def regression(x, y):
"""
X为n行的numpy数组,为温度
Y为目标:n个元素的numpy数组,为NDVI
"""
regr = sklearn.linear_model.LinearRegression()
regr.fit(x, y)
a = regr.coef_
c = regr.intercept_
return a, c
def erhuigui(ndvipath, temppath, outtif):
dataset = gdal.Open(ndvipath)
projinfo = dataset.GetProjection()
geotransform = dataset.GetGeoTransform()
format = "GTiff"
driver = gdal.GetDriverByName(format)
depth = dataset.RasterCount
rows = int(dataset.RasterYSize)
colmns = int(dataset.RasterXSize)
ndvi = dataset.ReadAsArray()
src2 = gdal.Open(temppath)
temp = src2.ReadAsArray()
dst_ds1 = driver.Create(outtif, colmns, rows, 4,
gdal.GDT_Float32)
y_pre = np.zeros(depth)
p_value = np.zeros((rows, colmns))
r2 = np.zeros((rows, colmns))
dst_ds1.SetGeoTransform(geotransform)
dst_ds1.SetProjection(projinfo)
d0 = ndvi[0]
out_a = d0 * 0 - 2222.0
out_c = d0 * 0 - 2222.0
# 开始计算
for row in tqdm(range(rows)):
for col in range(colmns):
y = ndvi[:, row, col] * 1.0
x1 = temp[:, row, col] * 1.0
y = y.reshape(-1, 1)
x = x1
x = x.reshape(-1, 1)
a, c = -2222, -2222
if (np.array(x) > -9999).all() and (np.array(y) > -9999).all():
try:
a, c = regression(x, y)
except:
pass
out_a[row, col], out_c[row, col] = a, c
for d in range(depth):
y_pre[d] = a * x1[d] + c
# 计算组内样本方差
var1 = np.var(y, ddof = 1)
var2 = np.var(y_pre, ddof = 1)
# 计算统计量F
F = var1 / var2
# 计算自由度
df1 = len(x) - 1
df2 = len(y) - 1
# 计算p值
p_value[row, col] = 1 - 2 * abs(0.5 - f.cdf(F, df1, df2))
r2[row, col] = r2_score(y, y_pre)
'''
将a,c,p值,r2写入图像中
波段顺序为a,c,p值,r2
'''
dst_ds1.GetRasterBand(1).WriteArray(out_a)
dst_ds1.GetRasterBand(2).WriteArray(out_c)
dst_ds1.GetRasterBand(3).WriteArray(p_value)
dst_ds1.GetRasterBand(4).WriteArray(r2)
dst_ds1.GetRasterBand(1).SetNoDataValue(-2222)
dst_ds1.GetRasterBand(2).SetNoDataValue(-2222)
dst_ds1.GetRasterBand(3).SetNoDataValue(-2222)
dst_ds1.GetRasterBand(4).SetNoDataValue(-2222)
if __name__ == "__main__":
# 修改路径
NDVIPath = r"GMMg2000_2005.tif" # 植被覆盖度路径
TempPath = r"MODIS2000_2005.tif" # 气温路径
OutTif = r"out.tif"
erhuigui(NDVIPath, TempPath, OutTif)
逐像元多元线性回归代码
在一元线性回归的基础上加了一个自变量,此时需要对regression函数做出修改,返回两个系数以及截距,比较简单,同样直接看代码:
"""
time: 2022-07-16
coder: welt
"""
import numpy as np
import sklearn
from osgeo import gdal
from sklearn.linear_model import LinearRegression
from tqdm import tqdm
from scipy.stats import f
def regression(x, y):
"""
X为n行2列的numpy数组,n为年份数,第一列为气温,第二列为降水
Y为目标:n个元素的numpy数组,为NDVI
"""
regr = sklearn.linear_model.LinearRegression()
regr.fit(x, y)
a, b = regr.coef_
c = regr.intercept_
return a, b, c
def erhuigui(ndvipath, temppath, rainpath, outtif):
"""
:param ndvipath:NDVI的文件路径
:param temppath:温度的文件路径
:param rainpath:降水量的文件路径
:param outtif:输出结果的文件路径
"""
dataset = gdal.Open(ndvipath)
projinfo = dataset.GetProjection()
geotransform = dataset.GetGeoTransform()
format = "GTiff"
driver = gdal.GetDriverByName(format)
depth = dataset.RasterCount
rows = int(dataset.RasterYSize)
colmns = int(dataset.RasterXSize)
ndvi = dataset.ReadAsArray()
src2 = gdal.Open(temppath)
temp = src2.ReadAsArray()
src3 = gdal.Open(rainpath)
rain = src3.ReadAsArray()
dst_ds1 = driver.Create(outtif, colmns, rows, 3 + depth + 1,
gdal.GDT_Float32)
#predict = np.zeros((depth, rows, colmns))
Residual = np.zeros((depth, rows, colmns))
y_pre = np.zeros(depth)
p_value = np.zeros((rows, colmns))
dst_ds1.SetGeoTransform(geotransform)
dst_ds1.SetProjection(projinfo)
d0 = ndvi[0]
out_a = d0 * 0 - 2222.0
out_b = d0 * 0 - 2222.0
out_c = d0 * 0 - 2222.0
# 开始计算
for row in tqdm(range(rows)):
for col in range(colmns):
y = ndvi[:, row, col] * 1.0
x1 = temp[:, row, col] * 1.0
x2 = rain[:, row, col] * 1.0
x = np.vstack([x1, x2]).T
a, b, c = -2222, -2222, -2222
if (np.array(x) > -9999).all() and (np.array(y) > -9999).all():
try:
a, b, c = regression(x, y)
except:
pass
out_a[row, col], out_b[row, col], out_c[row, col] = a, b, c
for d in range(depth):
# predict[d, row, col] = a * x1[d] + b * x2[d] + c
Residual[d, row, col] = y[d]-(a * x1[d] + b * x2[d] + c)
y_pre[d] = a * x1[d] + b * x2[d] + c
# 计算组内样本方差
var1 = np.var(y, ddof=1)
var2 = np.var(y_pre, ddof=1)
# 计算统计量F
F = var1 / var2
# 计算自由度
df1 = len(x) - 1
df2 = len(y) - 1
# 计算p值
p_value[row, col] = 1 - 2 * abs(0.5 - f.cdf(F, df1, df2))
'''
将a,b,c,预测值,p值写入图像中
波段顺序为a,b,c,残差,p值
'''
dst_ds1.GetRasterBand(1).WriteArray(out_a)
dst_ds1.GetRasterBand(2).WriteArray(out_b)
dst_ds1.GetRasterBand(3).WriteArray(out_c)
'''
求每个时期的预测值
for i in range(depth):
dst_ds1.GetRasterBand(3 + i).WriteArray(predict[i])
'''
for i in range(depth):
dst_ds1.GetRasterBand(3 + i).WriteArray(Residual[i])
dst_ds1.GetRasterBand(3 + depth + 1).WriteArray(p_value)
dst_ds1.GetRasterBand(1).SetNoDataValue(-2222)
dst_ds1.GetRasterBand(2).SetNoDataValue(-2222)
dst_ds1.GetRasterBand(3).SetNoDataValue(-2222)
for i in range(depth):
dst_ds1.GetRasterBand(3 + i).SetNoDataValue(-2222)
dst_ds1.GetRasterBand(3 + depth + 1).SetNoDataValue(-2222)
if __name__ == "__main__":
# 修改路径
NDVIPath = r"jiusan_NDVI.tif" # 植被覆盖度路径
TempPath = r"jiusan_temp.tif" # 气温路径
RainPath = r"jiusan_rain.tif" # 降水路径
OutTif = r"out.tif"
erhuigui(NDVIPath, TempPath, RainPath, OutTif)
结果
以一元线性回归为例,结果如下,可以在Arcgis中分别对每一个波段绘制对应的专题图以便于后期的分析。
参考文章:https://blog.csdn.net/Leaze932822995/article/details/112392111