python (1)txt数据批量归一化处理(第一行为时间戳,第二行为数据)
批量处理雨滴谱数据进行归一化,数据格式为一个txt有多个时间戳,一个时间戳的下一行有一个数值,以此类推
公式:
#!usr/bin/env python # -*- coding:utf-8 -*- """ @author: Suyue @file: KAN_N.py @time: 2025/04/02 @desc:雨滴谱数据归一化处理 """ import os import numpy as np def process_raindrop_files(folder_path): """ 处理文件夹中的所有雨滴谱txt文件,对数浓度数据进行min-max归一化 参数: folder_path: 包含雨滴谱txt文件的文件夹路径 返回: 一个字典,包含每个文件的原始数据和归一化后的数据 """ # 存储所有文件的数据 all_data = {} # 获取文件夹中所有txt文件 file_list = [f for f in os.listdir(folder_path) if f.endswith('.txt')] # 收集所有数浓度值用于计算全局最小最大值 all_concentrations = [] # 第一遍读取:收集所有数浓度值 for filename in file_list: filepath = os.path.join(folder_path, filename) with open(filepath, 'r') as file: lines = file.readlines() concentrations = [] # 每3行一个数据点(时间戳、数浓度、空行) for i in range(0, len(lines), 3): if i + 1 < len(lines): try: concentration = float(lines[i + 1].strip()) concentrations.append(concentration) except (ValueError, IndexError): continue all_concentrations.extend(concentrations) # 计算全局最小值和最大值 if not all_concentrations: # print("未找到有效的数浓度数据") return None global_min = min(all_concentrations) global_max = max(all_concentrations) # print(f"全局最小数浓度: {global_min}, 全局最大数浓度: {global_max}") # 第二遍读取:处理每个文件并进行归一化 for filename in file_list: filepath = os.path.join(folder_path, filename) with open(filepath, 'r') as file: lines = file.readlines() timestamps = [] concentrations = [] normalized_data = [] # 处理每个数据点 for i in range(0, len(lines), 3): if i + 1 < len(lines): try: timestamp = lines[i].strip() concentration = float(lines[i + 1].strip()) # 计算归一化值 if global_max != global_min: # 避免除以零 normalized = (concentration - global_min) / (global_max - global_min) else: normalized = 0.0 timestamps.append(timestamp) concentrations.append(concentration) normalized_data.append(normalized) except (ValueError, IndexError): continue # 存储结果 all_data[filename] = { 'timestamps': timestamps, 'original_concentrations': concentrations, 'normalized_concentrations': normalized_data } # 打印部分结果以供验证 # print(f"\n文件: {filename}") # print("前5个数据点:") for i in range(min(5, len(timestamps))): print(f"时间: {timestamps[i]}, 原始值: {concentrations[i]:.4f}, 归一化值: {normalized_data[i]:.4f}") return all_data # 使用示例 folder_path = "F:/lianxi/降水强度雨和雪" # 替换为你的文件夹路径 processed_data = process_raindrop_files(folder_path) # 如果需要保存归一化后的数据 def save_normalized_data(processed_data, output_folder): """保存归一化后的数据到新文件""" if not os.path.exists(output_folder): os.makedirs(output_folder) for filename, data in processed_data.items(): output_path = os.path.join(output_folder, f"normalized_{filename}") with open(output_path, 'w') as f: for timestamp, norm_val in zip(data['timestamps'], data['normalized_concentrations']): f.write(f"{timestamp}\n{norm_val:.6f}\n\n") # 保存归一化数据(可选) output_folder = "F:/lianxi/降水强度雨和雪归一化" save_normalized_data(processed_data, output_folder)