python (1)txt数据批量归一化处理(第一行为时间戳,第二行为数据)

批量处理雨滴谱数据进行归一化,数据格式为一个txt有多个时间戳,一个时间戳的下一行有一个数值,以此类推

公式:

#!usr/bin/env python
# -*- coding:utf-8 -*-
"""
@author: Suyue
@file: KAN_N.py
@time: 2025/04/02
@desc:雨滴谱数据归一化处理
"""
import os
import numpy as np


def process_raindrop_files(folder_path):
    """
    处理文件夹中的所有雨滴谱txt文件,对数浓度数据进行min-max归一化

    参数:
        folder_path: 包含雨滴谱txt文件的文件夹路径

    返回:
        一个字典,包含每个文件的原始数据和归一化后的数据
    """
    # 存储所有文件的数据
    all_data = {}

    # 获取文件夹中所有txt文件
    file_list = [f for f in os.listdir(folder_path) if f.endswith('.txt')]

    # 收集所有数浓度值用于计算全局最小最大值
    all_concentrations = []

    # 第一遍读取:收集所有数浓度值
    for filename in file_list:
        filepath = os.path.join(folder_path, filename)
        with open(filepath, 'r') as file:
            lines = file.readlines()

        concentrations = []
        # 每3行一个数据点(时间戳、数浓度、空行)
        for i in range(0, len(lines), 3):
            if i + 1 < len(lines):
                try:
                    concentration = float(lines[i + 1].strip())
                    concentrations.append(concentration)
                except (ValueError, IndexError):
                    continue

        all_concentrations.extend(concentrations)

    # 计算全局最小值和最大值
    if not all_concentrations:
        # print("未找到有效的数浓度数据")
        return None

    global_min = min(all_concentrations)
    global_max = max(all_concentrations)

    # print(f"全局最小数浓度: {global_min}, 全局最大数浓度: {global_max}")

    # 第二遍读取:处理每个文件并进行归一化
    for filename in file_list:
        filepath = os.path.join(folder_path, filename)
        with open(filepath, 'r') as file:
            lines = file.readlines()

        timestamps = []
        concentrations = []
        normalized_data = []

        # 处理每个数据点
        for i in range(0, len(lines), 3):
            if i + 1 < len(lines):
                try:
                    timestamp = lines[i].strip()
                    concentration = float(lines[i + 1].strip())

                    # 计算归一化值
                    if global_max != global_min:  # 避免除以零
                        normalized = (concentration - global_min) / (global_max - global_min)
                    else:
                        normalized = 0.0

                    timestamps.append(timestamp)
                    concentrations.append(concentration)
                    normalized_data.append(normalized)
                except (ValueError, IndexError):
                    continue

        # 存储结果
        all_data[filename] = {
            'timestamps': timestamps,
            'original_concentrations': concentrations,
            'normalized_concentrations': normalized_data
        }

        # 打印部分结果以供验证
        # print(f"\n文件: {filename}")
        # print("前5个数据点:")
        for i in range(min(5, len(timestamps))):
            print(f"时间: {timestamps[i]}, 原始值: {concentrations[i]:.4f}, 归一化值: {normalized_data[i]:.4f}")

    return all_data


# 使用示例
folder_path = "F:/lianxi/降水强度雨和雪"  # 替换为你的文件夹路径
processed_data = process_raindrop_files(folder_path)


# 如果需要保存归一化后的数据
def save_normalized_data(processed_data, output_folder):
    """保存归一化后的数据到新文件"""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename, data in processed_data.items():
        output_path = os.path.join(output_folder, f"normalized_{filename}")
        with open(output_path, 'w') as f:
            for timestamp, norm_val in zip(data['timestamps'], data['normalized_concentrations']):
                f.write(f"{timestamp}\n{norm_val:.6f}\n\n")

# 保存归一化数据(可选)
output_folder = "F:/lianxi/降水强度雨和雪归一化"
save_normalized_data(processed_data, output_folder)

 

posted @ 2025-04-21 17:41  秋刀鱼CCC  Views(15)  Comments(0)    收藏  举报