Python|对时序遥感图像实现逐像元线性回归

简介与技术流程

总的来说,就是把多幅影像叠加在一起,逐像元构建一组时间序列,然后对每一个时间序列进行线性回归。比方说,你研究区覆盖的像元是100*100的,时间序列是从2011到2015每年一幅影像,那么你就需要构建10000组时间序列,每组里面包含5个数据。把它放在一个坐标系中,横轴是时间,纵轴是每个时间下的像元值,一组时间序列构建一条折线,比较直观的可以参考下图。

代码

要调用的包

from osgeo import gdal
import numpy as np
from sklearn import linear_model

osgeo里的gdal是用来处理tif数据的。
numpy用来做数据分析统计。
sklearn里的linear_model是用来用直线拟合时间序列求斜率等的。

逐像元一元线性回归代码

逐像元的进行一元线性回归,得到每个像元对应时间序列的系数,截距,P值,R方并将其分别作为波段写入一个新的tif文件中,便于后期绘制专题图进行分析,注释比较详细,直接看代码:

"""
time: 2022-08-02
coder: welt
"""
import numpy as np
import sklearn
from osgeo import gdal
from sklearn.linear_model import LinearRegression
from tqdm import tqdm
from scipy.stats import f
from sklearn.metrics import r2_score


def regression(x, y):
    """
    X为n行的numpy数组,为温度
    Y为目标:n个元素的numpy数组,为NDVI
    """
    regr = sklearn.linear_model.LinearRegression()
    regr.fit(x, y)
    a = regr.coef_
    c = regr.intercept_
    return a, c


def erhuigui(ndvipath, temppath, outtif):
    dataset = gdal.Open(ndvipath)
    projinfo = dataset.GetProjection()
    geotransform = dataset.GetGeoTransform()

    format = "GTiff"
    driver = gdal.GetDriverByName(format)
    depth = dataset.RasterCount
    rows = int(dataset.RasterYSize)
    colmns = int(dataset.RasterXSize)

    ndvi = dataset.ReadAsArray()
    src2 = gdal.Open(temppath)
    temp = src2.ReadAsArray()


    dst_ds1 = driver.Create(outtif, colmns, rows, 4,
                            gdal.GDT_Float32)
    y_pre = np.zeros(depth)
    p_value = np.zeros((rows, colmns))
    r2 = np.zeros((rows, colmns))
    dst_ds1.SetGeoTransform(geotransform)
    dst_ds1.SetProjection(projinfo)
    d0 = ndvi[0]
    out_a = d0 * 0 - 2222.0
    out_c = d0 * 0 - 2222.0

    # 开始计算
    for row in tqdm(range(rows)):
        for col in range(colmns):
            y = ndvi[:, row, col] * 1.0
            x1 = temp[:, row, col] * 1.0
            y = y.reshape(-1, 1)
            x = x1
            x = x.reshape(-1, 1)
            a, c = -2222, -2222
            if (np.array(x) > -9999).all() and (np.array(y) > -9999).all():
                try:
                    a, c = regression(x, y)
                except:
                    pass
                out_a[row, col], out_c[row, col] = a, c
                for d in range(depth):

                    y_pre[d] = a * x1[d] + c

                # 计算组内样本方差
                var1 = np.var(y, ddof = 1)
                var2 = np.var(y_pre, ddof = 1)
                # 计算统计量F
                F = var1 / var2
                # 计算自由度
                df1 = len(x) - 1
                df2 = len(y) - 1
                # 计算p值
                p_value[row, col] = 1 - 2 * abs(0.5 - f.cdf(F, df1, df2))
                r2[row, col] = r2_score(y, y_pre)


    '''
    将a,c,p值,r2写入图像中
    波段顺序为a,c,p值,r2
    '''
    dst_ds1.GetRasterBand(1).WriteArray(out_a)
    dst_ds1.GetRasterBand(2).WriteArray(out_c)
    dst_ds1.GetRasterBand(3).WriteArray(p_value)
    dst_ds1.GetRasterBand(4).WriteArray(r2)
    dst_ds1.GetRasterBand(1).SetNoDataValue(-2222)
    dst_ds1.GetRasterBand(2).SetNoDataValue(-2222)
    dst_ds1.GetRasterBand(3).SetNoDataValue(-2222)
    dst_ds1.GetRasterBand(4).SetNoDataValue(-2222)

if __name__ == "__main__":
    # 修改路径
    NDVIPath = r"GMMg2000_2005.tif"  # 植被覆盖度路径
    TempPath = r"MODIS2000_2005.tif"  # 气温路径
    OutTif = r"out.tif"
    erhuigui(NDVIPath, TempPath, OutTif)

逐像元多元线性回归代码

在一元线性回归的基础上加了一个自变量,此时需要对regression函数做出修改,返回两个系数以及截距,比较简单,同样直接看代码:

"""
time: 2022-07-16
coder: welt
"""
import numpy as np
import sklearn
from osgeo import gdal
from sklearn.linear_model import LinearRegression
from tqdm import tqdm
from scipy.stats import f



def regression(x, y):
    """
    X为n行2列的numpy数组,n为年份数,第一列为气温,第二列为降水
    Y为目标:n个元素的numpy数组,为NDVI
    """
    regr = sklearn.linear_model.LinearRegression()
    regr.fit(x, y)
    a, b = regr.coef_
    c = regr.intercept_
    return a, b, c


def erhuigui(ndvipath, temppath, rainpath, outtif):
    """
    :param ndvipath:NDVI的文件路径
    :param temppath:温度的文件路径
    :param rainpath:降水量的文件路径
    :param outtif:输出结果的文件路径
    """
    dataset = gdal.Open(ndvipath)
    projinfo = dataset.GetProjection()
    geotransform = dataset.GetGeoTransform()

    format = "GTiff"
    driver = gdal.GetDriverByName(format)
    depth = dataset.RasterCount
    rows = int(dataset.RasterYSize)
    colmns = int(dataset.RasterXSize)

    ndvi = dataset.ReadAsArray()
    src2 = gdal.Open(temppath)
    temp = src2.ReadAsArray()

    src3 = gdal.Open(rainpath)
    rain = src3.ReadAsArray()

    dst_ds1 = driver.Create(outtif, colmns, rows, 3 + depth + 1,
                            gdal.GDT_Float32)
    #predict = np.zeros((depth, rows, colmns))
    Residual = np.zeros((depth, rows, colmns))
    y_pre = np.zeros(depth)
    p_value = np.zeros((rows, colmns))
    dst_ds1.SetGeoTransform(geotransform)
    dst_ds1.SetProjection(projinfo)
    d0 = ndvi[0]
    out_a = d0 * 0 - 2222.0
    out_b = d0 * 0 - 2222.0
    out_c = d0 * 0 - 2222.0
    # 开始计算
    for row in tqdm(range(rows)):
        for col in range(colmns):
            y = ndvi[:, row, col] * 1.0
            x1 = temp[:, row, col] * 1.0
            x2 = rain[:, row, col] * 1.0
            x = np.vstack([x1, x2]).T
            a, b, c = -2222, -2222, -2222
            if (np.array(x) > -9999).all() and (np.array(y) > -9999).all():
                try:
                    a, b, c = regression(x, y)
                except:
                    pass
                out_a[row, col], out_b[row, col], out_c[row, col] = a, b, c
                for d in range(depth):
                    # predict[d, row, col] = a * x1[d] + b * x2[d] + c
                    Residual[d, row, col] = y[d]-(a * x1[d] + b * x2[d] + c)
                    y_pre[d] = a * x1[d] + b * x2[d] + c

                # 计算组内样本方差
                var1 = np.var(y, ddof=1)
                var2 = np.var(y_pre, ddof=1)
                # 计算统计量F
                F = var1 / var2
                # 计算自由度
                df1 = len(x) - 1
                df2 = len(y) - 1
                # 计算p值
                p_value[row, col] = 1 - 2 * abs(0.5 - f.cdf(F, df1, df2))

    '''
    将a,b,c,预测值,p值写入图像中
    波段顺序为a,b,c,残差,p值
    '''
    dst_ds1.GetRasterBand(1).WriteArray(out_a)
    dst_ds1.GetRasterBand(2).WriteArray(out_b)
    dst_ds1.GetRasterBand(3).WriteArray(out_c)
    '''
    求每个时期的预测值
    for i in range(depth):
        dst_ds1.GetRasterBand(3 + i).WriteArray(predict[i])
    '''
    for i in range(depth):
        dst_ds1.GetRasterBand(3 + i).WriteArray(Residual[i])
    dst_ds1.GetRasterBand(3 + depth + 1).WriteArray(p_value)
    dst_ds1.GetRasterBand(1).SetNoDataValue(-2222)
    dst_ds1.GetRasterBand(2).SetNoDataValue(-2222)
    dst_ds1.GetRasterBand(3).SetNoDataValue(-2222)
    for i in range(depth):
        dst_ds1.GetRasterBand(3 + i).SetNoDataValue(-2222)
    dst_ds1.GetRasterBand(3 + depth + 1).SetNoDataValue(-2222)

if __name__ == "__main__":
    # 修改路径
    NDVIPath = r"jiusan_NDVI.tif"  # 植被覆盖度路径
    TempPath = r"jiusan_temp.tif"  # 气温路径
    RainPath = r"jiusan_rain.tif"  # 降水路径
    OutTif = r"out.tif"
    erhuigui(NDVIPath, TempPath, RainPath, OutTif)

结果

以一元线性回归为例,结果如下,可以在Arcgis中分别对每一个波段绘制对应的专题图以便于后期的分析。

参考文章:https://blog.csdn.net/Leaze932822995/article/details/112392111

posted @ 2022-08-02 15:03  Weltㅤ  阅读(2518)  评论(0编辑  收藏  举报