adcp24小时数据自动化处理1

import numpy as np
import pandas as pd
import datetime
import re
import time
import glob
import copy
def getshuju(path_in):
time_parse = lambda date: datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
df = pd.read_csv(path_in, encoding='utf-8', parse_dates=['date'], date_parser=time_parse)
pd.set_option('display.max_columns', None)
rmcols = ['lon', 'lat', 'ens', 'dist']
df.drop(rmcols, inplace=True, axis=1)
time_p = str(df['date'][0]).split()[0] + ' ' + '0' + ':' + '55' + ':' + '00' # 实现从数据中获得开始时间
time_p_1 = str(df['date'][0]).split()[0] + ' ' + '1' + ':' + '05' + ':' + '09' # 实现从数据中获得结束时间
time_list_1 = []
time_list_2 = []
list_1 = []
list_2 = []
list_3 = []
for i in range(72):
time_list_1.append('start_time' + str(i))
time_list_2.append('end_time' + str(i))
list_1.append('df' + str(i))
list_2.append('df' + str(i))
list_3.append('df' + str(i))
time_list_1[0] = datetime.datetime.strptime(time_p, "%Y-%m-%d %H:%M:%S") # 实现 确定初始时间
time_list_2[0] = datetime.datetime.strptime(time_p_1, "%Y-%m-%d %H:%M:%S") # 实现 确定结束时间

list_1[0] = df[
(df['date'] >= time_list_1[0]) & (df['date'] <= time_list_2[0]) & (df['variable'] == 'v')]
list_2[0] = df[
(df['date'] >= time_list_1[0]) & (df['date'] <= time_list_2[0]) & (df['variable'] == 'd')] # 选择方向
for i in range(1, 72):
time_list_1[i] = (datetime.datetime.strptime(time_p, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
hours=int(i)))
time_list_2[i] = (datetime.datetime.strptime(time_p_1, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
hours=int(i)))

for x1 in range(1, 72):
list_1[x1] = df[
(df['date'] >= time_list_1[x1]) & (df['date'] <= time_list_2[x1]) & (df['variable'] == 'v')] # 选择速度
list_2[x1] = df[
(df['date'] >= time_list_1[x1]) & (df['date'] <= time_list_2[x1]) & (df['variable'] == 'd')] # 选择方向

for i in range(72):

if len(list_1[i]) > 0:
if len(list_1[i]) < 1200:
list_1[i].reset_index(inplace=True, drop=True)
list_2[i].reset_index(inplace=True, drop=True)
b1 = list_1[i].loc[0, 'date']
time_4 = datetime.datetime.strftime(b1, "%Y-%m-%d %H")
if int(time_4.split()[1]) + 1 == 24:
opr1 = time_4.split()[0]
opr2 = opr1.split('-')[-1]
time_5 = str(opr1.split('-')[0]) + '-' + str(opr1.split('-')[1]) + '-' + str(
int(opr2) + 1) + ' ' + '0' + ':' + '00' + ':' + '00'
else:
time_5 = time_4.split()[0] + ' ' + str(int(time_4.split()[1]) + 1) + ':' + '00' + ':' + '00'
time_6 = datetime.datetime.strptime(time_5, "%Y-%m-%d %H:%M:%S")
i1 = list_1[i]['h'].mean()
# # print(i1)
c1 = list_1[i].groupby('hb')
c4 = list_2[i].groupby('hb')
# # print(c1.groups)
c2 = c1.mean() # 对于每次采样时间的同一剖面流速进行平均
c5 = c4.mean()
c5.rename(columns={'value': 'dir'}, inplace=True)
c6 = pd.merge(c2, c5, how='outer', on=['hb', 'h'])
c3 = np.full([len(c6['h']), 1], np.nan)
c6['h'] = c3
c6['date'] = c3
c6['date'] = c6['date'].apply(lambda x: time_6)
c6['h'] = c6['h'].apply(lambda x: i1)
list_3[i] = c6

else:
list_1[i].reset_index(inplace=True, drop=True)
list_2[i].reset_index(inplace=True, drop=True)
s3 = list_1[i].loc[0, 'date']
time_1 = datetime.datetime.strftime(s3, "%Y-%m-%d %H")
if int(time_1.split()[1]) + 1 == 24:
opr1 = time_1.split()[0]
opr2 = opr1.split('-')[-1]
time_2 = str(opr1.split('-')[0]) + '-' + str(opr1.split('-')[1]) + '-' + str(
int(opr2) + 1) + ' ' + '0' + ':' + '00' + ':' + '00'
else:
time_2 = time_1.split()[0] + ' ' + str(int(time_1.split()[1]) + 1) + ':' + '00' + ':' + '00'

time_3 = datetime.datetime.strptime(time_2, "%Y-%m-%d %H:%M:%S")
i1 = list_1[i]['h'].mean()
# # print(i1)
c1 = list_1[i].groupby('hb')
c4 = list_2[i].groupby('hb')
# # print(c1.groups)
c2 = c1.mean() # 对于每次采样时间的同一剖面流速进行平均
c5 = c4.mean()
c5.rename(columns={'value': 'dir'}, inplace=True)
c6 = pd.merge(c2, c5, how='outer', on=['hb', 'h'])
c3 = np.full([len(c6['h']), 1], np.nan)
c6['h'] = c3
c6['date'] = c3
c6['date'] = c6['date'].apply(lambda x: time_3)
c6['h'] = c6['h'].apply(lambda x: i1)
list_3[i] = c6
elif len(list_1[i]) == 0:
list_3[i] = pd.DataFrame(np.full([4, 4], np.nan), columns=['h', 'value', 'dir', 'date'])
pt1 = list_3[0]
for i in range(1, 72):
pt1 = pt1.append(list_3[i])
pt2 = pt1.reset_index()
pt2 = pt2.rename(columns={'index': 'hb'})
return pt2
def save_weniian(path_in,path_out):
with open(path_in,'r')as f:
df = pd.DataFrame()
df = df.append(getshuju(path_in))
# print(df)
df.to_csv(path_out,index=False)
print('Finished ' + path_in)
if __name__ == "__main__":
ff = glob.glob("*_ASC.csv")
if not ff:
print('No files to convert.')
exit()
else:
print("Detected ASCII *_ASC.txt files: \n", "\n".join(ff))
for f in ff:
path_in = f
path_out = re.sub(r'(?i).csv', 't.csv', f)
s = getshuju(f)
d = save_weniian(path_in, path_out)
posted @ 2020-12-22 18:14  云飞01  阅读(147)  评论(0编辑  收藏  举报