gdal笔记--chapter12
遥感图像分类(监督分类、非监督分类)
1.K均值聚类分析(非监督分类)
#非监督分类 #堆叠栅格波段的函数 from osgeo import gdal import numpy as np def stack_bands(filenames): bands = [] for fn in filenames: ds = gdal.Open(fn) for i in range(1, ds.RasterCount+1): bands.append(ds.GetRasterBand(i).ReadAsArray()) return np.dstack(bands) #Spectral的K均值聚类分析 import os import numpy as np import spectral from osgeo import gdal import ospybook as pb folder = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Landsat\Utah' raster_fn = ['LE70380322000181EDC02_60m.tif', 'LE70380322000181EDC02_TIR_60m.tif'] out_fn = 'kmeans_prediction_60m2.tif' os.chdir(folder) data = pb.stack_bands(raster_fn) #数据堆叠 classes, centers = spectral.kmeans(data) #运行模型 #默认10个集群,20次迭代 ds = gdal.Open(raster_fn[0]) #用于下面获取投影和仿射变换 out_ds = pb.make_raster(ds, out_fn, classes, gdal.GDT_Byte) levels = pb.compute_overview_levels(out_ds.GetRasterBand(1)) out_ds.BuildOverviews('NEAREST', levels) out_ds.FlushCache() out_ds.GetRasterBand(1).ComputeStatistics(False) del out_ds, ds
2.Sklearn进行CART决策树分类(监督分类)
#使用CART地图分类(监督分类) import csv import os import numpy as np from sklearn import tree from osgeo import gdal import ospybook as pb folder = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Landsat\Utah' raster_fn = ['LE70380322000181EDC02_60m.tif', 'LE70380322000181EDC02_TIR_60m.tif'] #遥感影像波段存储 out_fn = r'tree_prediction60.tif' train_fn = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\training_data.csv' #csv文件,坐标和数据 gap_fn = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\landcover60.tif' #颜色表数据 os.chdir(folder) xys = [] classes = [] with open(train_fn) as fp: reader = csv.reader(fp) #返回reader对象,遍历行 next(reader) #读取首行 for row in reader: #遍历csv各行 xys.append([float(n) for n in row[:2]]) #前两列是坐标 classes.append(int(row[2])) #第三列是类别 ds = gdal.Open(raster_fn[0]) pixel_trans = gdal.Transformer(ds, None, []) #设置None,将投影坐标转换成图像坐标(即像素偏移) offset, ok = pixel_trans.TransformPoints(True, xys) #计算像素偏移 cols, rows, z = zip(*offset) #分别计算出行列号和z,用于读取波段值 cols = [int(col) for col in cols] #行列号整数 rows = [int(row) for row in rows] data = pb.stack_bands(raster_fn) #堆叠两个波段的数组 sample = data[rows, cols, :] #采样读取样本点处的像元值(波段全选,按列表读取对应行列的值) print(np.shape(sample)) #cols*rows行,列为波段数(578,4) clf = tree.DecisionTreeClassifier(max_depth=5) #建立模型,树深度5 clf = clf.fit(sample, classes) #拟合模型clf #内存不够大时,逐行读取 # prediction = np.empty(data.shape[0:2]) #行*列 全0数组 # print(np.shape(prediction)) #(3631, 3996)原数据行列数 # for i in range(data.shape[0]): # prediction[i, :] = clf.predict(data[i, :, :]) #逐行读取原数据,拟合模型 rows, cols, bands = data.shape data2d = np.reshape(data, (rows*cols, bands)) #reshape波段数组,按波段数展开。列数是波段数,行数是点数,每行是对应点的像元值 prediction = clf.predict(data2d) prediction = np.reshape(prediction,(rows, cols)) #reshape回原样 prediction[np.sum(data, 2)==0] = 0 #求和,axis=2,如果原数据所有波段这个点和为0 ,那应该是nodata predict_ds = pb.make_raster(ds, out_fn, prediction, gdal.GDT_Byte, 0) predict_ds.FlushCache() #刷新缓存 levels = pb.compute_overview_levels(predict_ds.GetRasterBand(1)) #输出合适的levels(如2\4\8\16),创建概览图 print(levels) predict_ds.BuildOverviews('NEAREST', levels) gap_ds = gdal.Open(gap_fn) colors = gap_ds.GetRasterBand(1).GetRasterColorTable() #读取样本图颜色表 predict_ds.GetRasterBand(1).SetRasterColorTable(colors) #设置颜色表 del ds
3. Kappa系数、混淆矩阵计算
#混淆矩阵和kappa系数统计 #提取预测图上的样本点值,和实际样本值放在一起计算kappa系数、混淆矩阵 import csv import os import numpy as np from sklearn import metrics #计算混淆矩阵 import skll #计算kappa系数,kappa系数越小,分类越差 from osgeo import gdal folder = '' accuracy_fn = r'' #csv文件,样本点分类 matrix_fn = r'' #存储混淆矩阵 prediction_fn = r'' #预测的结果 os.chdir('') xys = [] #存储点的投影坐标 classes = [] with open(accuracy_fn) as fp: reader = csv.reader(fp) next(reader) for row in reader: xys.append([float(n) for n in row[:2]]) classes.append(int(row[2])) ds = gdal.Open(prediction_fn) #打开预测图 pixel_trans = gdal.Transformer(ds, None, []) #定义投影坐标到图像坐标的转换器,提取图像坐标 offsets, ok = pixel_trans.TransformPoints(True, xys) cols, rows, z = zip(*offsets) data = ds.GetRasterBand(1).ReadAsArray() sample = data[rows, cols] #读取样本点在预测图上的分类结果 del ds print('Kappa: ', skll.kappa(classes, sample)) #classes、sample(样本值、预测值)用来计算kappa系数 labels = np.unique(np.concatenate((classes, sample))) #classes\samples添加到一行,unique筛选所有出现的类别,作为行列 matrix = metrics.confusion_matrix(classes, sample, labels) #计算混淆矩阵 matrix = np.insert(matrix, 0, labels, 0) #labels插入第一行,作为行坐标 matrix = np.insert(matrix, 0, np.insert(labels, 0, 0), 1) #先插入0到起点,再把labels插入第一列,作为列坐标 np.savetxt(matrix_fn, matrix, fmt='1.0f%', delimeter=',')