用Python计算栅格数据的真实面积
用Python计算栅格数据的真实面积
在地理空间分析中,栅格数据的像素值通常代表某种属性,比如土地利用比例、植被覆盖率等。这些数据往往基于经纬度网格表示的比例值,而为了更直观地理解这些数据的空间意义,我们需要将这些比例值转化为实际面积(如平方米或公顷)。
对于高分辨率的大尺寸栅格文件(GeoTIFF),一次性加载整个文件会耗费大量内存,甚至导致处理失败。为了解决这个问题,我们可以通过分块处理 的方式,还可以使用多进程加快处理速度,计算像素的真实面积,确保即使是大文件也能顺利处理。
思路:先将输入tif转换为shp,然后计算shp每个格子(对应输入tif的每个像元面积),最后将shp转换为tif.
一. 应用场景
该方法尤其适用于以下场景:
1. 比例值栅格数据:
当栅格像素值表示某个区域的比例(例如森林覆盖率)时,通过面积计算,可以得出真实的覆盖面积。
2. 土地利用分析:
计算农田、草地、建筑用地等各类土地利用类型的真实面积。
3. 遥感影像处理:
在遥感影像中将像素值的属性映射到真实的地理空间。
二.代码实现
import rasterio
from rasterio.windows import Window
import geopandas as gpd
import numpy as np
from joblib import Parallel, delayed
def process_block(data, window, transform, crs, target_crs, unit_factor):
"""
Process a single raster block for area calculation.
Parameters:
data (numpy array): Raster data block.
window (rasterio.windows.Window): Window corresponding to the data block.
transform (Affine): Affine transformation matrix for the data block.
crs (CRS): Original CRS of the raster.
target_crs (CRS): Target projection CRS for area calculation.
unit_factor (float): Conversion factor for the desired area unit.
Returns:
numpy array: Area calculation results for the block.
"""
rows, cols = data.shape
polygons = []
for row in range(rows):
for col in range(cols):
x_min, y_min = transform * (col, row)
x_max, y_max = transform * (col + 1, row + 1)
polygon = {
'properties': {},
'geometry': {
'type': 'Polygon',
'coordinates': [[
[x_min, y_min],
[x_min, y_max],
[x_max, y_max],
[x_max, y_min],
[x_min, y_min]
]]
}
}
polygons.append(polygon)
gdf = gpd.GeoDataFrame.from_features(polygons, crs=crs)
if str(crs) != target_crs:
gdf = gdf.to_crs(target_crs)
gdf['area'] = gdf['geometry'].area * unit_factor
return np.array(gdf['area']).reshape(rows, cols)
def calculate_pixel_area(input_tif, target_crs='EPSG:3577', area_unit='m', block_size=None, num_jobs=None):
"""
Calculate the area for each raster pixel and save the output as a raster file.
Parameters:
input_tif (str): Input raster file path.
target_crs (str): Target projection CRS for area calculation (default: 'EPSG:3577').
area_unit (str): Desired area unit ('m' for square meters, 'hm' for hectares, 'km' for square kilometers).
block_size (int or None): Block size for processing (e.g., 512). If None, processes the entire raster at once.
num_jobs (int or None): Number of parallel jobs for processing. Effective only when block_size is specified.
"""
output_tif = input_tif.split('.')[0] + '_area.tif'
unit_factors = {
'm': 1, # Square meters
'hm': 1 / 10000, # Hectares
'km': 1 / 1e6 # Square kilometers
}
if area_unit not in unit_factors:
raise ValueError("area_unit must be one of 'm', 'hm', or 'km'.")
with rasterio.open(input_tif) as src:
transform = src.transform
crs = src.crs
nodata = src.nodata if src.nodata is not None else 0
dtype = 'float32'
if block_size is None:
# Process the entire raster without blocking
data = src.read(1)
area_array = process_block(data, None, transform, crs, target_crs, unit_factors[area_unit])
else:
# Process in blocks
height, width = src.height, src.width
windows = []
for row_off in range(0, height, block_size):
for col_off in range(0, width, block_size):
row_count = min(block_size, height - row_off)
col_count = min(block_size, width - col_off)
window = Window(col_off, row_off, col_count, row_count)
windows.append(window)
if num_jobs is None:
# Single-threaded block processing
results = [
process_block(
src.read(1, window=window),
window,
src.window_transform(window),
crs,
target_crs,
unit_factors[area_unit]
) for window in windows
]
else:
# Multi-threaded block processing
results = Parallel(n_jobs=num_jobs)(
delayed(process_block)(
src.read(1, window=window),
window,
src.window_transform(window),
crs,
target_crs,
unit_factors[area_unit]
) for window in windows
)
# Combine block results into a full array
area_array = np.zeros((height, width), dtype=dtype)
for result, window in zip(results, windows):
row_off, col_off = window.row_off, window.col_off
row_count, col_count = result.shape
area_array[row_off:row_off + row_count, col_off:col_off + col_count] = result
# Save the area raster file
with rasterio.open(
output_tif,
'w',
driver='GTiff',
height=src.height,
width=src.width,
count=1,
dtype=dtype,
crs=src.crs, # Preserve the original CRS
transform=transform,
nodata=nodata
) as dst:
dst.write(area_array, 1)
print(f"Area raster file saved to: {output_tif}, unit: {area_unit}²")
if __name__ == '__main__':
# Input raster file path
input_tif = 'rasters/Acrobates_pygmaeus.tif'
# Call the function
calculate_pixel_area(
input_tif=input_tif,
target_crs='EPSG:3577',
area_unit='km',
block_size=512, # Block size in pixels. None for no blocking.
num_jobs=4 # Number of parallel jobs, effective only when block_size is specified.
)
三、注意事项
! 必须根据研究区选择对应的坐标系用于计算,实例中使用的坐标系用于计算澳大利亚面积。
! 不分块和使用多进程就无需输入block_size和num_jobs。