adcp24小时数据自动化处理，从winriver转码的文件中处理

import numpy as np
import pandas as pd
import datetime
import re
import time
import glob
import copy


df = pd.read_csv(r'.\3.csv')


a = []
b = []
list1 = []
for i in range(len(df['date'])):  # 重读日期格式，生成date1列
    a.append(datetime.datetime.strptime(df.date[i], '%Y-%m-%d %H:%M:%S'))
s = pd.Series(a)
df['date1'] = s
df.drop('date', inplace=True, axis=1)

#  数据筛选 按照日期来进行
rmcols = ['ens', 'dist']
df.drop(rmcols, inplace=True, axis=1)
df.rename(columns={'value': 'val'}, inplace=True)
# print(df)
time_list_1 = []
time_list_2 = []
time_list_3 = []
time_list_4 = []
list_1 = []
list_2 = []
list_3 = []
list_4 = []
list_5 = []
d1 = ['hb', 'h1', 'val', 'date']

for i in range(24):
    time_list_1.append('start_time' + str(i))
    time_list_2.append('end_time' + str(i))
    time_list_3.append('zhen_dian' + str(i))
    time_list_4.append('zhen_dian' + str(i))
    list_1.append('df' + str(i))
    list_2.append('df' + str(i))
    list_3.append('df' + str(i))
    list_4.append('df' + str(i))
    list_5.append('df' + str(i))

time_list_1[0] = datetime.datetime.strptime("08:55:00", "%H:%M:%S").time()  # 实现 确定初始时间
time_list_2[0] = datetime.datetime.strptime("09:05:09", "%H:%M:%S").time()  # 实现 确定结束时间
time_list_3[0] = datetime.datetime.strptime("08:00:00", "%H:%M:%S").time()  # 实现 整点时间
time_list_4[0] = datetime.datetime.strptime("08:00:09", "%H:%M:%S").time()  # 实现 整点到0.9s的时间序列，进行筛选

for i in range(1, 24):
    time_list_1[i] = (datetime.datetime.strptime("08:55:00", "%H:%M:%S") + datetime.timedelta(
        hours=int(i))).time()
    time_list_2[i] = (datetime.datetime.strptime("09:05:09", "%H:%M:%S") + datetime.timedelta(
        hours=int(i))).time()
    time_list_3[i] = (datetime.datetime.strptime("08:00:00", "%H:%M:%S") + datetime.timedelta(
        hours=int(i))).time()
    time_list_4[i] = (datetime.datetime.strptime("08:00:09", "%H:%M:%S") + datetime.timedelta(
        hours=int(i))).time()
print(time_list_1)
print(time_list_2)
print(time_list_3)
list_1[0] = df[
    (df['date1'].dt.time >= time_list_1[0]) & (df['date1'].dt.time <= time_list_2[0]) & (df['variable'] == 'v')]
for x1 in range(1, 24):
    list_1[x1] = df[
        (df['date1'].dt.time >= time_list_1[x1]) & (df['date1'].dt.time <= time_list_2[x1]) & (df['variable'] == 'v')]
# print(list_1)
s1 = list_1[0]
s2 = len(list_1[1])
p1 = []
for xp2 in range(1, 24):
    if len(list_1[xp2]) != 0:
        s1 = s1.append(list_1[xp2])  # 用原来的值拷贝，头疼。傻子
    elif len(list_1[xp2]) == 0:
        a = 0
rmcols1 = ['variable', 'lon', 'lat']
s1.drop(rmcols1, inplace=True, axis=1)
s1.reset_index(inplace=True, drop=True)

for l1 in range(24):
    # c = list_1[l1]
    # print(c)
    if len(list_1[l1]) > 0:
        list_1[l1].reset_index(inplace=True, drop=True)  # 总结果
        s4 = list_1[l1]
        # print(s4)
        s3 = list_1[l1].loc[0, 'date1']
        # print(s3)
        time_1 = datetime.datetime.strftime(s3, "%Y-%m-%d %H")
        time_2 = time_1.split()[0] + ' ' + str(int(time_1.split()[1]) + 1) + ':' + '00'
        time_3 = datetime.datetime.strptime(time_2, "%Y-%m-%d %H:%M")
        i1 = list_1[l1]['h'].mean()
        # print(i1)
        c1 = list_1[l1].groupby('hb')
        # print(c1.groups)
        c2 = c1.mean()
        # print(c2)

        c3 = np.full([len(c2['h']), 1], np.nan)
        c2['h1'] = c3
        c2['date'] = c3
        c2['date'] = c2['date'].apply(lambda x: time_3)
        c2['h1'] = c2['h1'].apply(lambda x: i1)
        list_2[l1] = c2
    elif len(list_1[l1]) == 0:
        list_2[l1] = list_1[l1]
# print(list_2)


# 对于分类号的数据进行重新的索引和排序工作
pt1 = list_2[0]
for oe1 in range(1, 24):
    if len(list_2[oe1]) != 0:
        pt1 = pt1.append(list_2[oe1])
# # print(pt1)
# 对于分类号的数据进行加首行和尾行信息 # 第一生成同类型的数据信息并将其进行汇总
rmcols2 = ['lat', 'lon', 'h']
pt1.drop(rmcols2, inplace=True, axis=1)
pt1 = pt1.reset_index()
# print(pt1)
#
# 建立一行4四列的功能
np_1 =  np.full([1, 4], np.nan)
pt2 = pd.DataFrame(np_1,columns=d1)
# print(pt2)
# 进行加首尾行的工作
for op1 in range(24):
   list_3[op1] =pt1[(pt1['date'].dt.time >= time_list_3[op1])&(pt1['date'].dt.time <= time_list_4[op1])]
   list_4[op1] = pt2.append(list_3[op1],ignore_index=True)

   list_3[op1] = list_4[op1].append(pt2,ignore_index=True)
# print(list_3)
# c_1= list_3[1]

# 统计信息，写入相对水深的值，计算相对水深的流速信息
ax = np.full([6,4],np.nan)
ax1 = pd.DataFrame(ax,columns=d1)
for i in range(24):
    list_5[i] = copy.deepcopy(ax1)
# print(list_5)
for i in range(24):
    len_1 = len(list_3[i]['hb'])
    # print(len_1)
    if len_1==2:
        list_3[i] =list_3[i]
    else:  # 实现了对0/1h层流速的赋值
        list_3[i].iat[0, 0] = 0  # 剖面水深
        list_3[i].iat[0, 1] = list_3[i].iat[1, 1]  # 0h 总水深
        list_3[i].iat[0, 2] = list_3[i].iat[1, 2] * 1.02  # 表层流速
        list_3[i].iat[0, 3] = list_3[i].iat[1, 3]  # 测量时间
        list_3[i].iat[(len_1 - 1), 0] = 1  # 底层剖面水深
        list_3[i].iat[(len_1 - 1), 1] = list_3[i].iat[(len_1 - 2), 1]
        list_3[i].iat[(len_1 - 1), 2] = list_3[i].iat[(len_1 - 2), 2]
        list_3[i].iat[(len_1 - 1), 3] = list_3[i].iat[(len_1 - 2), 3]
        # 实现 相对水深的转化
        for j in range(len(list_3[i]['h1'])):
            a1 = list_3[i].iat[j,0]
            h2 = list_3[i].iat[j,1]
            # print(a1)
            # # print(h2)
            if a1 ==0:
                list_5[i].iat[0, 0] = 0
                list_5[i].iat[0, 1] = list_3[i].iat[1, 1]  # 0h的总水深
                list_5[i].iat[0, 2] = list_3[i].iat[0, 2]  # 0h的流速
                list_5[i].iat[0, 3] = list_3[i].iat[0, 3]  # 0 h的时间
            elif a1 ==1:

                id_1 = list_3[i].loc[list_3[i]['hb'] == 1]
                # print(id_1)
                list_5[i].iat[5, 0] = 1                   # 1h
                list_5[i].iat[5, 1] = id_1.iat[0, 1]      # 1h的总水深
                list_5[i].iat[5, 2] = id_1.iat[0, 2]      # 1h的流速
                list_5[i].iat[5, 3] = id_1.iat[0, 3]       # 1h的日期
            elif 0.18 * h2 < a1 < 0.21 * h2:

                id_2 = list_3[i].loc[list_3[i]['hb'] == a1]
                # print(id_2)
                list_5[i].iat[1, 0] = 0.2
                list_5[i].iat[1, 1] = id_2.iat[0, 1]
                list_5[i].iat[1, 2] = id_2.iat[0, 2]
                list_5[i].iat[1, 3] = id_2.iat[0, 3]

            elif 0.38 * h2 < a1 < 0.41 * h2:

                id_3 = list_3[i].loc[list_3[i]['hb'] == a1]
                # print(id_3)
                list_5[i].iat[2, 0] = 0.4
                list_5[i].iat[2, 1] = id_3.iat[0, 1]
                list_5[i].iat[2, 2] = id_3.iat[0, 2]
                list_5[i].iat[2, 3] = id_3.iat[0, 3]
            elif 0.58 * h2 < a1 < 0.61 * h2:
                # print(a1)
                id_4 = list_3[i].loc[list_3[i]['hb'] == a1]
                # print(id_4)
                list_5[i].iat[3, 0] = 0.6
                list_5[i].iat[3, 1] = id_4.iat[0, 1]
                list_5[i].iat[3, 2] = id_4.iat[0, 2]
                list_5[i].iat[3, 3] = id_4.iat[0, 3]
            elif 0.79 * h2 < a1 < 0.81 * h2:
                # print(a1)
                id_5 = list_3[i].loc[list_3[i]['hb'] == a1]
                # print(id_5)
                list_5[i].iat[4, 0] = 0.8
                list_5[i].iat[4, 1] = id_5.iat[0, 1]
                list_5[i].iat[4, 2] = id_5.iat[0, 2]
                list_5[i].iat[4, 3] = id_5.iat[0, 3]




# print(list_5)
srt =list_5[0]
for i in range(1,24):
    srt = srt.append(list_5[i])


srt = srt.dropna()
srt.reset_index(inplace=True,drop=True)

op2 = srt.groupby(['hb'])
for date1, op3 in op2:
    print(date1)

    print(op3)
需要自己把数据改写成24小时制，csv文件需要自己命名。之后看能否用面向对象的语言去写它。改写成exe文件
posted @ 2020-09-08 19:02 云飞01 阅读(486) 评论(0) 编辑收藏举报
刷新页面返回顶部
云飞01

adcp24小时数据自动化处理，从winriver转码的文件中处理

公告