用Python计算栅格数据的真实面积

用Python计算栅格数据的真实面积

在地理空间分析中,栅格数据的像素值通常代表某种属性,比如土地利用比例、植被覆盖率等。这些数据往往基于经纬度网格表示的比例值,而为了更直观地理解这些数据的空间意义,我们需要将这些比例值转化为实际面积(如平方米或公顷)。

对于高分辨率的大尺寸栅格文件(GeoTIFF),一次性加载整个文件会耗费大量内存,甚至导致处理失败。为了解决这个问题,我们可以通过分块处理 的方式,还可以使用多进程加快处理速度,计算像素的真实面积,确保即使是大文件也能顺利处理。

思路:先将输入tif转换为shp,然后计算shp每个格子(对应输入tif的每个像元面积),最后将shp转换为tif.

一. 应用场景

该方法尤其适用于以下场景:

1. 比例值栅格数据:

当栅格像素值表示某个区域的比例(例如森林覆盖率)时,通过面积计算,可以得出真实的覆盖面积。

2. 土地利用分析:

计算农田、草地、建筑用地等各类土地利用类型的真实面积。

3. 遥感影像处理:

在遥感影像中将像素值的属性映射到真实的地理空间。

二.代码实现

import rasterio
from rasterio.windows import Window
import geopandas as gpd
import numpy as np
from joblib import Parallel, delayed


def process_block(data, window, transform, crs, target_crs, unit_factor):
    """
    Process a single raster block for area calculation.

    Parameters:
        data (numpy array): Raster data block.
        window (rasterio.windows.Window): Window corresponding to the data block.
        transform (Affine): Affine transformation matrix for the data block.
        crs (CRS): Original CRS of the raster.
        target_crs (CRS): Target projection CRS for area calculation.
        unit_factor (float): Conversion factor for the desired area unit.

    Returns:
        numpy array: Area calculation results for the block.
    """
    rows, cols = data.shape
    polygons = []
    for row in range(rows):
        for col in range(cols):
            x_min, y_min = transform * (col, row)
            x_max, y_max = transform * (col + 1, row + 1)
            polygon = {
                'properties': {},
                'geometry': {
                    'type': 'Polygon',
                    'coordinates': [[
                        [x_min, y_min],
                        [x_min, y_max],
                        [x_max, y_max],
                        [x_max, y_min],
                        [x_min, y_min]
                    ]]
                }
            }
            polygons.append(polygon)

    gdf = gpd.GeoDataFrame.from_features(polygons, crs=crs)

    if str(crs) != target_crs:
        gdf = gdf.to_crs(target_crs)

    gdf['area'] = gdf['geometry'].area * unit_factor
    return np.array(gdf['area']).reshape(rows, cols)


def calculate_pixel_area(input_tif, target_crs='EPSG:3577', area_unit='m', block_size=None, num_jobs=None):
    """
    Calculate the area for each raster pixel and save the output as a raster file.

    Parameters:
        input_tif (str): Input raster file path.
        target_crs (str): Target projection CRS for area calculation (default: 'EPSG:3577').
        area_unit (str): Desired area unit ('m' for square meters, 'hm' for hectares, 'km' for square kilometers).
        block_size (int or None): Block size for processing (e.g., 512). If None, processes the entire raster at once.
        num_jobs (int or None): Number of parallel jobs for processing. Effective only when block_size is specified.
    """
    output_tif = input_tif.split('.')[0] + '_area.tif'
    unit_factors = {
        'm': 1,           # Square meters
        'hm': 1 / 10000,  # Hectares
        'km': 1 / 1e6     # Square kilometers
    }

    if area_unit not in unit_factors:
        raise ValueError("area_unit must be one of 'm', 'hm', or 'km'.")

    with rasterio.open(input_tif) as src:
        transform = src.transform
        crs = src.crs
        nodata = src.nodata if src.nodata is not None else 0
        dtype = 'float32'

        if block_size is None:
            # Process the entire raster without blocking
            data = src.read(1)
            area_array = process_block(data, None, transform, crs, target_crs, unit_factors[area_unit])
        else:
            # Process in blocks
            height, width = src.height, src.width
            windows = []
            for row_off in range(0, height, block_size):
                for col_off in range(0, width, block_size):
                    row_count = min(block_size, height - row_off)
                    col_count = min(block_size, width - col_off)
                    window = Window(col_off, row_off, col_count, row_count)
                    windows.append(window)

            if num_jobs is None:
                # Single-threaded block processing
                results = [
                    process_block(
                        src.read(1, window=window),
                        window,
                        src.window_transform(window),
                        crs,
                        target_crs,
                        unit_factors[area_unit]
                    ) for window in windows
                ]
            else:
                # Multi-threaded block processing
                results = Parallel(n_jobs=num_jobs)(
                    delayed(process_block)(
                        src.read(1, window=window),
                        window,
                        src.window_transform(window),
                        crs,
                        target_crs,
                        unit_factors[area_unit]
                    ) for window in windows
                )

            # Combine block results into a full array
            area_array = np.zeros((height, width), dtype=dtype)
            for result, window in zip(results, windows):
                row_off, col_off = window.row_off, window.col_off
                row_count, col_count = result.shape
                area_array[row_off:row_off + row_count, col_off:col_off + col_count] = result

        # Save the area raster file
        with rasterio.open(
                output_tif,
                'w',
                driver='GTiff',
                height=src.height,
                width=src.width,
                count=1,
                dtype=dtype,
                crs=src.crs,  # Preserve the original CRS
                transform=transform,
                nodata=nodata
        ) as dst:
            dst.write(area_array, 1)
        print(f"Area raster file saved to: {output_tif}, unit: {area_unit}²")


if __name__ == '__main__':
    # Input raster file path
    input_tif = 'rasters/Acrobates_pygmaeus.tif'

    # Call the function
    calculate_pixel_area(
        input_tif=input_tif,
        target_crs='EPSG:3577',
        area_unit='km',
        block_size=512,   # Block size in pixels. None for no blocking.
        num_jobs=4        # Number of parallel jobs, effective only when block_size is specified.
    )

三、注意事项

! 必须根据研究区选择对应的坐标系用于计算,实例中使用的坐标系用于计算澳大利亚面积。
! 不分块和使用多进程就无需输入block_size和num_jobs。

posted @ 2024-11-11 12:04  skypanxh  阅读(77)  评论(0编辑  收藏  举报