GDAL笔记--chapter11
本章主要介绍了利用numpy和scipy库做地图运算,并讲述了局部分析、焦点分析、区域分析和全局分析几种不同的方法。
1.保存为新栅格
import gdal import numpy as np #保存为新栅格的函数 def make_raster(in_ds, fn, data, data_type, nodata=None): driver = gdal.GetDriverByName('GTiff') out_ds = driver.Create(fn, in_ds.RasterXSize, in_ds.RasterYSize, 1, data_type) out_ds.SetProjection(in_ds.GetProjection()) out_ds.SetGeoTransform(in_ds.GetGeoTransform()) out_band = out_ds.GetRasterBand(1) if nodata is not None: out_band.SetNoDataValue(nodata) out_band.WriteArray(data) out_band.FlushCache() out_band.ComputeStatistics(False) return out_ds
2.局部分析(多栅格运算)
#检查np.nan\np.inf ndvi = (nir - red)/(nir + red) ndvi = np.where(np.isnan(ndvi), -99, ndvi) ndvi = np.where(np.isinf(ndvi), -99, ndvi) out_band.WriteArray(ndvi) out_band.SetNoDataValue(-99) #设置-99为nodata!!! ndvi = np.where(nir+red > 0, (nir-red)/(nir+red), -99) #设置分母大于0条件 #给你NAIP图像计算NDVI值 import os import numpy as np from osgeo import gdal import ospybook as pb os.chdir('') in_fn = '' out_fn = 'ndvi.tif' ds = gdal.Open('') red = ds.GetRasterBand(1).ReadAsArray().astype(np.float) #转float nir = ds.GetRasterBand(4).ReadAsArray() red = np.ma.masked_where(nir+red==0, red) #蒙版nir+red==0区域屏蔽红色数组,那么这里不做运算 ndvi = (nir-red)/(nir+red) ndvi = ndvi.filled(-99) #对没有运算的单元,进行填充 out_ds = pb.make_raster(ds, out_fn, ndvi, gdal.GDT_Float32, -99) overviews = pb.compute_overview_levels(out_ds.GetRasterBand(1)) out_ds.BuildOverViews('average', overviews) del ds, out_ds #蒙版2(创建一个单独的掩码数组,应用到多个数组) mask = np.ma.equal(nir+red, 0) red = np.ma.masked_array(red, mask) #蒙版屏蔽目标区域红色波段
3.从催化剂数组得到任意大小切片数组(这里数组读取是连续的,没有步长;和有步长的切片方法不一样)
#从催化剂数组中得到任意大小切片的函数 def make_slices(data, win_size): rows = data.shape[0]-win_size[0]+1 cols = data.shape[1]-win_size[1]+1 slices=[] for i in range(win_size[0]): for j in range(win_size[1]): slices.append(data[i:rows+i, j:cols+j]) return slices stacked = np.dstack(slices) #堆叠在第三个维度中,返回一个可用于计算均值的三维数组 outdata = np.zeros(indata.shape, np.float32) outdata[1:-1, 1:-1] = np.mean(stacked, 2) #在高度维上取平均,由于切片比原图小,每侧切掉一行一列
4.焦点分析(使用围绕的像素计算,先计算切片,再运算)
4.1 平滑高程数据集
#平滑一个高程数据集 import os import numpy as np from osgeo import gdal import ospybook as pb in_fn='' out_fn='' in_ds = gdal.Open(in_fn) in_band = in_ds.GetRasterBand(1) in_data = in_band.ReadAsArray() slices = pb.make_slices(in_data, (3,3)) #(3*3)取切片 stacked_data = np.ma.dstack(slices) #dstack堆叠数据 rows, cols = in_band.YSize, in_band.XSize out_data = np.ones((rows, cols), np.int32)*-99 #初始化nodata,保证边缘多出部分为nodata out_data[1:-1,1:-1] = np.mean(stacked_data, 2) #取平均需要stack,然后mean pb.make_raster(in_ds, out_fn, out_data, gdal.GDT_Int32, -99) del in_ds
4.2 坡度计算
#从DEM计算坡度,这里不必把切片堆叠到三维数组(dstack),因为需要在坡度方程单独引用切片 import os import numpy as np from osgeo import gdal import ospybook as pb in_fn='' out_fn='' in_ds = gdal.Open('') cell_width = in_ds.GetGeoTransform()[1] #x方向分辨率 cell_height = in_ds.GetGeoTransform()[5] #y方向分辨率 band = in_ds.GetRasterBand(1) in_data = band.ReadAsArray().astype(np.float) out_data = np.ones((band.YSize, band.XSize))*-99 #初始化nodata slices = pb.make_slices(in_data, (3,3)) #切片 rise = slice[6]+ (2*slices[7] + slices[8] - slices[8]) - (slices[0] + 2*slices[1] +slices[2])/(8*cell_height) run = ... #计算偏导数 dist = np.sqrt(np.square(rise)+np.square(run)) out_data[1:-1, 1:-1] = np.arctan(dist)*180/np.pi pb.make_raster(in_ds, out_fn, out_data, gdal.GDT_Float32, -99) del in_ds
5.scipy具有傅里叶变换、插值、图像处理等功能,可用于焦点分析
#使用scipy平滑滤波器 import os import scipy.ndimage from osgeo import gdal import ospybook as pb in_fn='' out_fn='' in_ds = gdal.Open(in_fn) in_data = in_ds.GetRasterBand(1).ReadAsArray() out_data = scipy.ndimage.filters.uniform_filter( in_data, size=3, mode='nearest') #size=3代表移动窗口大小,最邻近像素填充边缘 pb.make_raster(in_ds, out_fn, out_data, gdal.GDT_Int32) del in_ds #利用scipy计算斜率 import os import numpy as np import scipy.ndimage from osgeo import gdal import ospybook as pb in_fn='' out_fn='' #定义坡度计算函数 def slope(data, cell_width, cell_height): rise = ((data[6]+2*data[7]+data[8])-data[0]+2*data[1]+data[2])/(8*cell_height) run = ... dist = np.sqrt(np.square(rise)+np.square(run)) return np.arctan(dist)*180/np.pi in_ds = gdal.Open(in_fn) in_band = in_ds.GetRasterBand(1) in_data = in_band.ReadAsArray().astype(np.float32) #float cell_width = in_ds.GetGeoTransform()[1] cell_height = in_ds.GetGeoTransform()[5] out_data = scipy.ndimage.filters.generic_filter( in_data, slope, size=3, mode='nearest', extra_arguments=(cell_width, cell_height) ) #scipy自定义滤波器 pb.make_raster(in_ds, out_fn, out_data, gdal.GDTFloat32) del in_ds
6.打破焦点分析
#打破焦点分析 # (没有足够的内存存放图像的情况下,把图像分成重叠的块) #分块的焦点分析 import os import numpy as np from osgeo import gdal import ospybook as pb in_fn='' out_fn='' in_ds = gdal.Open('') in_band = in_ds.GetRasterBand(1) xsize = in_band.SXize ysize = in_band.YSize driver = gdal.GetDriverByName('GTiff') out_ds = gdal.Create(out_fn, xsize, ysize,1, gdal.GDT_Int32) out_ds.SetProjection(in_ds.GetProjection()) out_ds.SetGeoTransform(in_ds.GetGeoTransform()) out_band = out_ds.GetRasterBand(1) out_band.SetNoDataValue(-99) n = 100 for i in range(0, yszie, n): if i+n+1 < ysize: rows = n + 2 #额外读取两行 else: rows = ysize - i #如果到顶就取余数 yoff = max(0, i-1) #在0行开始读取数据 in_data = in_band.ReadAsArray(0, yoff, xsize, rows) #每次从(0,yoff)开始读取,rows行数据 slices = pb.make_slices(in_data, (3,3)) stacked_data = np.ma.stack(slices) out_data = np.ones(in_data.shape, np.int32)*-99 out_data[1:-1,1:-1] = np.mean(stacked_data, 2) if yoff==0: #第一次读取 out_band.WriteArray(out_data) else: #否则从第二块开始写入 out_band.WriteArray(out_data[1:], 0, yoff+1) #继续从(0,yoff+1)开始写入数据 #out_data[1:]中1是第一行向后数据,这样不要覆盖上次的最后一行数据。即out_data[1:,: # ] out_band.FlushCache() out_band.ComputeStatistics(False) del out_ds, in_ds
7. 区域分析(histogram2d)
#区域分析 import numpy as np import scipy.stats from osgeo import gdal landcover_fn = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\landcover60.tif' ecoregion_fn = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\utah_ecoIII60.tif' out_fn ='histogram.csv' def get_bins(data): bins = np.unique(data) #保留唯一的数据 return np.append(bins, max(bins)+1) #新增最大值+1 #直方图的行(区域)对应传入的第一个数组、直方图的列(土地覆盖)对应传入的第二个数组 hist, zone_bins, landcover_bins = np.histogram2d( zones.flatten(), landcover.flatten(), [get_bins(zones), get_bins(landcover)] ) lc_ds = gdal.Open(landcover_fn) lc_band = lc_ds.GetRasterBand(1) lc_data = lc_band.ReadAsArray().flatten() bins = np.unique(lc_data) print(bins) print(np.append(bins[~np.isnan(bins)], max(bins)+1))
8.利用scipy做区域分析(scipy.stats.binned_statistic_2d)
# #利用scipy做区域分析 import numpy as np import scipy.stats from osgeo import gdal def get_bins(data): bins = np.unique(data) return np.append(bins, max(bins)+1) landcover_fn =r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\landcover60.tif' ecoregion_fn = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\utah_ecoIII60.tif' out_fn ='histogram.csv' eco_ds = gdal.Open(ecoregion_fn) eco_band = eco_ds.GetRasterBand(1) eco_data = eco_band.ReadAsArray().flatten() eco_bins = get_bins(eco_data) #获取组距 lc_ds = gdal.Open(landcover_fn) lc_band = lc_ds.GetRasterBand(1) lc_data = lc_band.ReadAsArray().flatten() lc_bins = get_bins(lc_data) #输入两个数据集、用于统计的第三个数组,count计数并指定组距。输出直方图、组距和额外的输出(指示数据落入哪个组距) #行为第一个数组,列为第二个数组,在此基础上统计第三个数组。 #e.g.若传递高程数据、mean作为第三个和第四个参数,可以 #计算每个生态区和土地覆盖区的平均高程。 hist, eco_bins2, lc_bins2, bn = \ scipy.stats.binned_statistic_2d( eco_data, lc_data, lc_data, 'count', [eco_bins, lc_bins] ) # print(hist) print(eco_bins2) #行 print(lc_bins2) #列 print(max(bn)) #落入哪个组距 hist = np.insert(hist, 0, lc_bins[:-1], 0) #把土地覆盖数据插入hist第一行 row_labels = np.insert(eco_bins[:-1], 0, 0) #0插入eco_bins第一个位置 hist = np.insert(hist, 0, row_labels, 1) #把生态数据插入hist第一列 np.savetxt(out_fn, hist, fmt='%1.0f', delimiter=',') #1代表至少打印一个数字,.0意味小数点后没有数字,F意味浮点数 #如果想知道每个生态区最常见的土地覆盖类型,不需要知道数量 def my_mode(data): return scipy.stats.mode(data)[0] #返回数组中最常出现的成员和个数 mode, bins, bn = scipy.stats.binned_statistic(eco_data, lc_data, my_mode, eco_bins) #把一个生态区进行组距,从而统计土地覆盖 print(mode) #直方图 print(bins) #eco数组组距 print(bn) #落入位置
9. 全局分析(用到了gdal.RasterizeLayer和gdal.ComputeProximity)
#全局分析 #邻近分析 import os import sys from osgeo import gdal, ogr folder = '' #shp数据文件夹 road_ln = '' #道路图层lyr wilderness_ln = '' #荒地图层lyr road_raster_fn = '' #道路栅格数据 proximity_fn = 'proximity.tif' #邻近度栅格 cell_size = 10 shp_ds = ogr.Open(folder) wild_lyr = shp_ds.GetLayerByName(wilderness_ln)#获取荒地图层 wild_lyr.SetAttributeFilter('WILD_NM ='Frank Church -RONR'')#属性查询 envelopes = [row.geometry().GetEnvelope() for row in wild_lyr] #图层里每个要素,获取几何体,获取最小外接四边形 coords = list(zip(*envelopes)) #zip迭代器取出四个点,划定荒地范围 minx, maxx = min(coords[0]), max(coords[1]) miny, maxy = min(coords[2]), max(coords[3]) road_lyr = shp_ds.GetLayerByName(roads_ln) #道路图层 road_lyr.SetSpatialFilterRect(minx, miny, maxx, maxy) #在荒地范围空间查询 os.chdir(folder) tif_driver = gdal.GetDriverByName('GTiff') cols = int((maxx-minx)/cellsize) #计算区域行列数 rows = int((maxy-miny)/cellsize) road_ds = tif_driver.Create(road_raster_fn, cols, rows) #创建道路栅格数据 road_ds.SetProjection(road_lyr.GetSpatialRef().ExportToWkt()) #设置投影。lyr.GetSpatialRef()返回空间参考对象,需要转换成wkt|数据集才能ds.GetProjection() road_ds.SetGeoTransform(minx, cellsize, 0, maxy, 0, -cellsize) #设置栅格数据GeoTransform地理变换 gdal.RasterizeLayer(road_ds, [1], road_lyr, burn_values=[1], callback=gdal.TermProgress) #栅格化道路图层,道路1表示,其他0。第一个[1]是第一个波段索引;第二个1是将有要素的地方转换成栅格值的列表 prox_ds = tif_driver.Create(proximity_fn, cols, rows, 1, gdal.GDT_Int32) #创建邻近度栅格 prox_ds.SetProjection(road_ds.GetProjection()) prox_ds.SetGeoTransform(road_ds.GetGeoTransform()) gdal.ComputeProximity( #计算proximity,结果存于proximity road_ds.GetRasterBand(1), prox_ds.GetRasterBand(1), ['DISTUNITS=GEO'], gdal.TermProgress) #DISTUNITS指定距离单位,默认为像素,这里设为地理坐标单位 wild_ds = gdal.GetDriverByName('MEM').Create('tmp', cols, rows) #只需要荒野区域内统计信息,所以用MEM驱动把数据存在内存 wild_ds.SetProjection(prox_ds.GetProjection()) wild_ds.SetGeoTransform(prox_ds.GetGeoTransform()) gdal.RasterizeLayer(wild_ds, [1], wild_lyr, burn_values=[1], callback=gdal.TermProgress) #栅格化荒野图层 wild_data = wild_ds.ReadAsArray() prox_data = prox_ds.ReadAsArray() prox_data[wild_data==0]=-99 #非荒野区域-99 prox_ds.GetRasterBand(1).WriteArray(prox_data) prox_ds.GetRasterBand(1).SetNoDataValue(-99) #设置非荒野区域nodata prox_ds.FlushCache() stats = prox_ds.GetRasterBand(1).ComputeStatistics(False, gdal.TermProgress) #计算精确值 print('Mean distance from roads is', stats[2]) del prox_ds, road_ds, shp_ds
10. 重采样制作分步切片以及获取新像素偏移的坐标
#新的重采样方法 data = np.reshape(np.arange(24), (4, 6)) data[::2,::2] #设置步长重采样,这里像素大小增大 np.repeat(data, 2, 1) #增加数组大小(减小像素大小)重采样。在列上重复2次 np.repeat(np.repeat(data, 2, 0), 2, 1) #先在行上重复2次,再列重复2次 #如果要对原始大小四倍像素重采样,取四个像素平均值 #与移动窗口切片不同,这些切片比原始数据小很多,且他们大小与输出数组相同 #制作分步切片。前面为连续数据切片,这里设置步长,是分布切片 def make_resample_slices(data, win_size): row = int(data.shape[0]/win_size[0])*win_size[0] #由于可能无法整除,计算新行 col = int(data.shape[1]/win_size[1])*win_size[1] slices = [] for i in range(win_size[0]): for j in range(win_size[1]): slices.append(data[i:row:win_size[0], j:col:win_size[1]]) return slices
#当新像素大小是原始像素小数倍时,这种技术会导致像素中心偏移。 #根据旧像素获取新像素偏移,得到的是偏移后的行列号坐标,即图像坐标 def get_indices(source_ds, target_width, target_height): source_geotransform = source_ds.GetGeoTransform() source_width = source_geotransform[1] #像素行分辨率 source_height = source_geotransform[5] dx = target_width/source_width #扩大倍数 dy = target_height/source_height target_x = np.arange(dx/2, source_ds.RasterXSize, dx) target_y = np.arange(dy/2, source_ds.RasterYSize, dy) return np.meshgrid(target_x, target_y) ds = gdal.Open(fn) data = ds.ReadAsArray() x, y = get_indices(ds, 25, -25) new_data = data[y.astype(int), x.astype(int)] #索引转换整数,最邻近采样
11. 双线性插值以及重采样
#还有双线性插值、三次卷积插值方法 #双线性插值,在找到新坐标后,找到最邻近的四个原始像素,与距离加权得到新的值 def bilinear(in_data, x, y): x -= 0.5 #索引减去0.5到输入像素中心(ds/2必定包含0.5) y -= 0.5 x0 = np.floor(x).astype(int) #取整 x1 = x0 + 1 #相邻坐标,获取围绕目标像素的四个像素 y0 = np.floor(y).astype(int) y1 = y0 + 1 ul = in_data[y0, x0]*(y1-y)*(x1-x) #乘两个方向上该像素到目标像素的距离 ur = in_data[y0, x1]*(y1-y)*(x-x0) ll = in_data[y1, x0]*(y-y0)*(x1-x) lr = in_data[y1, x1]*(y-y0)*(x-x0) return ul+ur+ll+lr #加权和即像素值 #双线性插值重采样 in_fn = '' out_fn = '' cell_size = (0.02, -0.02) #新像素大小 in_ds = gdal.Open(in_fn) x, y = get_indices(in_ds, *cell_size) #偏移的新像素x, y outdata = bilinear(in_ds.ReadAsArray(), x, y) #重采样 driver = gdal.GetDriverByName('GTiff') rows, cols = outdata.shape #新行列数 out_ds = driver.Create(out_fn, cols, rows, 1, gdal.GDT_Int32) out_ds.SetProjection(in_ds.GetProjection()) gt = list(in_ds.GetGeoTransform()) #列表化元组 gt[1] = cell_size[0] #更改geotransform像素分辨率 gt[5] = cell_size[1] out_ds.SetGeoTransform(gt) out_band = out_ds.GetRasterBand(1) out_band.WriteArray(outdata) #读入重采样的数据 out_band.FlushCache() out_band.ComputeStatistics(False) #此外scipy.ndimage还有其他插值方法可供使用
12. GDAL warp及python调用
#GDAL命令重采样 gdalwarp -tr 0.02 0.02 -r bilinear first.tif final.tif #python调用命令 import subprocess result = subprocess.call(gdalwarp -tr 0.02 0.02 -r bilinear first.tif final.tif)
总结:
1. 局部分析: 计算像素到像素的基础工作,NDVI
2. 焦点分析: 使用环绕像素计算输出值的移动窗口,如斜率
3. 区域分析: 处理同一区域的像素
4. 全局分析: 邻近度分析等,涉及整个数据集