adcp24小时数据自动化处理,从winriver转码的文件中处理

import numpy as np
import pandas as pd
import datetime
import re
import time
import glob
import copy


df = pd.read_csv(r'.\3.csv')


a = []
b = []
list1 = []
for i in range(len(df['date'])): # 重读日期格式,生成date1列
a.append(datetime.datetime.strptime(df.date[i], '%Y-%m-%d %H:%M:%S'))
s = pd.Series(a)
df['date1'] = s
df.drop('date', inplace=True, axis=1)

# 数据筛选 按照日期来进行
rmcols = ['ens', 'dist']
df.drop(rmcols, inplace=True, axis=1)
df.rename(columns={'value': 'val'}, inplace=True)
# print(df)
time_list_1 = []
time_list_2 = []
time_list_3 = []
time_list_4 = []
list_1 = []
list_2 = []
list_3 = []
list_4 = []
list_5 = []
d1 = ['hb', 'h1', 'val', 'date']

for i in range(24):
time_list_1.append('start_time' + str(i))
time_list_2.append('end_time' + str(i))
time_list_3.append('zhen_dian' + str(i))
time_list_4.append('zhen_dian' + str(i))
list_1.append('df' + str(i))
list_2.append('df' + str(i))
list_3.append('df' + str(i))
list_4.append('df' + str(i))
list_5.append('df' + str(i))

time_list_1[0] = datetime.datetime.strptime("08:55:00", "%H:%M:%S").time() # 实现 确定初始时间
time_list_2[0] = datetime.datetime.strptime("09:05:09", "%H:%M:%S").time() # 实现 确定结束时间
time_list_3[0] = datetime.datetime.strptime("08:00:00", "%H:%M:%S").time() # 实现 整点时间
time_list_4[0] = datetime.datetime.strptime("08:00:09", "%H:%M:%S").time() # 实现 整点到0.9s的时间序列,进行筛选

for i in range(1, 24):
time_list_1[i] = (datetime.datetime.strptime("08:55:00", "%H:%M:%S") + datetime.timedelta(
hours=int(i))).time()
time_list_2[i] = (datetime.datetime.strptime("09:05:09", "%H:%M:%S") + datetime.timedelta(
hours=int(i))).time()
time_list_3[i] = (datetime.datetime.strptime("08:00:00", "%H:%M:%S") + datetime.timedelta(
hours=int(i))).time()
time_list_4[i] = (datetime.datetime.strptime("08:00:09", "%H:%M:%S") + datetime.timedelta(
hours=int(i))).time()
print(time_list_1)
print(time_list_2)
print(time_list_3)
list_1[0] = df[
(df['date1'].dt.time >= time_list_1[0]) & (df['date1'].dt.time <= time_list_2[0]) & (df['variable'] == 'v')]
for x1 in range(1, 24):
list_1[x1] = df[
(df['date1'].dt.time >= time_list_1[x1]) & (df['date1'].dt.time <= time_list_2[x1]) & (df['variable'] == 'v')]
# print(list_1)
s1 = list_1[0]
s2 = len(list_1[1])
p1 = []
for xp2 in range(1, 24):
if len(list_1[xp2]) != 0:
s1 = s1.append(list_1[xp2]) # 用原来的值拷贝,头疼。傻子
elif len(list_1[xp2]) == 0:
a = 0
rmcols1 = ['variable', 'lon', 'lat']
s1.drop(rmcols1, inplace=True, axis=1)
s1.reset_index(inplace=True, drop=True)

for l1 in range(24):
# c = list_1[l1]
# print(c)
if len(list_1[l1]) > 0:
list_1[l1].reset_index(inplace=True, drop=True) # 总结果
s4 = list_1[l1]
# print(s4)
s3 = list_1[l1].loc[0, 'date1']
# print(s3)
time_1 = datetime.datetime.strftime(s3, "%Y-%m-%d %H")
time_2 = time_1.split()[0] + ' ' + str(int(time_1.split()[1]) + 1) + ':' + '00'
time_3 = datetime.datetime.strptime(time_2, "%Y-%m-%d %H:%M")
i1 = list_1[l1]['h'].mean()
# print(i1)
c1 = list_1[l1].groupby('hb')
# print(c1.groups)
c2 = c1.mean()
# print(c2)

c3 = np.full([len(c2['h']), 1], np.nan)
c2['h1'] = c3
c2['date'] = c3
c2['date'] = c2['date'].apply(lambda x: time_3)
c2['h1'] = c2['h1'].apply(lambda x: i1)
list_2[l1] = c2
elif len(list_1[l1]) == 0:
list_2[l1] = list_1[l1]
# print(list_2)


# 对于分类号的数据进行重新的索引和排序工作
pt1 = list_2[0]
for oe1 in range(1, 24):
if len(list_2[oe1]) != 0:
pt1 = pt1.append(list_2[oe1])
# # print(pt1)
# 对于分类号的数据进行加首行和尾行信息 # 第一生成同类型的数据信息并将其进行汇总
rmcols2 = ['lat', 'lon', 'h']
pt1.drop(rmcols2, inplace=True, axis=1)
pt1 = pt1.reset_index()
# print(pt1)
#
# 建立一行4四列的功能
np_1 = np.full([1, 4], np.nan)
pt2 = pd.DataFrame(np_1,columns=d1)
# print(pt2)
# 进行加首尾行的工作
for op1 in range(24):
list_3[op1] =pt1[(pt1['date'].dt.time >= time_list_3[op1])&(pt1['date'].dt.time <= time_list_4[op1])]
list_4[op1] = pt2.append(list_3[op1],ignore_index=True)

list_3[op1] = list_4[op1].append(pt2,ignore_index=True)
# print(list_3)
# c_1= list_3[1]

# 统计信息,写入相对水深的值,计算相对水深的流速信息
ax = np.full([6,4],np.nan)
ax1 = pd.DataFrame(ax,columns=d1)
for i in range(24):
list_5[i] = copy.deepcopy(ax1)
# print(list_5)
for i in range(24):
len_1 = len(list_3[i]['hb'])
# print(len_1)
if len_1==2:
list_3[i] =list_3[i]
else: # 实现了对0/1h层流速的赋值
list_3[i].iat[0, 0] = 0 # 剖面水深
list_3[i].iat[0, 1] = list_3[i].iat[1, 1] # 0h 总水深
list_3[i].iat[0, 2] = list_3[i].iat[1, 2] * 1.02 # 表层流速
list_3[i].iat[0, 3] = list_3[i].iat[1, 3] # 测量时间
list_3[i].iat[(len_1 - 1), 0] = 1 # 底层剖面水深
list_3[i].iat[(len_1 - 1), 1] = list_3[i].iat[(len_1 - 2), 1]
list_3[i].iat[(len_1 - 1), 2] = list_3[i].iat[(len_1 - 2), 2]
list_3[i].iat[(len_1 - 1), 3] = list_3[i].iat[(len_1 - 2), 3]
# 实现 相对水深的转化
for j in range(len(list_3[i]['h1'])):
a1 = list_3[i].iat[j,0]
h2 = list_3[i].iat[j,1]
# print(a1)
# # print(h2)
if a1 ==0:
list_5[i].iat[0, 0] = 0
list_5[i].iat[0, 1] = list_3[i].iat[1, 1] # 0h的总水深
list_5[i].iat[0, 2] = list_3[i].iat[0, 2] # 0h的流速
list_5[i].iat[0, 3] = list_3[i].iat[0, 3] # 0 h的时间
elif a1 ==1:

id_1 = list_3[i].loc[list_3[i]['hb'] == 1]
# print(id_1)
list_5[i].iat[5, 0] = 1 # 1h
list_5[i].iat[5, 1] = id_1.iat[0, 1] # 1h的总水深
list_5[i].iat[5, 2] = id_1.iat[0, 2] # 1h的流速
list_5[i].iat[5, 3] = id_1.iat[0, 3] # 1h的日期
elif 0.18 * h2 < a1 < 0.21 * h2:

id_2 = list_3[i].loc[list_3[i]['hb'] == a1]
# print(id_2)
list_5[i].iat[1, 0] = 0.2
list_5[i].iat[1, 1] = id_2.iat[0, 1]
list_5[i].iat[1, 2] = id_2.iat[0, 2]
list_5[i].iat[1, 3] = id_2.iat[0, 3]

elif 0.38 * h2 < a1 < 0.41 * h2:

id_3 = list_3[i].loc[list_3[i]['hb'] == a1]
# print(id_3)
list_5[i].iat[2, 0] = 0.4
list_5[i].iat[2, 1] = id_3.iat[0, 1]
list_5[i].iat[2, 2] = id_3.iat[0, 2]
list_5[i].iat[2, 3] = id_3.iat[0, 3]
elif 0.58 * h2 < a1 < 0.61 * h2:
# print(a1)
id_4 = list_3[i].loc[list_3[i]['hb'] == a1]
# print(id_4)
list_5[i].iat[3, 0] = 0.6
list_5[i].iat[3, 1] = id_4.iat[0, 1]
list_5[i].iat[3, 2] = id_4.iat[0, 2]
list_5[i].iat[3, 3] = id_4.iat[0, 3]
elif 0.79 * h2 < a1 < 0.81 * h2:
# print(a1)
id_5 = list_3[i].loc[list_3[i]['hb'] == a1]
# print(id_5)
list_5[i].iat[4, 0] = 0.8
list_5[i].iat[4, 1] = id_5.iat[0, 1]
list_5[i].iat[4, 2] = id_5.iat[0, 2]
list_5[i].iat[4, 3] = id_5.iat[0, 3]




# print(list_5)
srt =list_5[0]
for i in range(1,24):
srt = srt.append(list_5[i])


srt = srt.dropna()
srt.reset_index(inplace=True,drop=True)

op2 = srt.groupby(['hb'])
for date1, op3 in op2:
print(date1)

print(op3)
需要自己把数据改写成24小时制,csv文件需要自己命名。之后看能否用面向对象的语言去写它。改写成exe文件
posted @ 2020-09-08 19:02  云飞01  阅读(486)  评论(0编辑  收藏  举报