adcp24小时数据自动化处理1

import numpy as np
import pandas as pd
import datetime
import re
import time
import glob
import copy
def getshuju(path_in):
    time_parse = lambda date: datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
    df = pd.read_csv(path_in, encoding='utf-8', parse_dates=['date'], date_parser=time_parse)
    pd.set_option('display.max_columns', None)
    rmcols = ['lon', 'lat', 'ens', 'dist']
    df.drop(rmcols, inplace=True, axis=1)
    time_p = str(df['date'][0]).split()[0] + ' ' + '0' + ':' + '55' + ':' + '00'  # 实现从数据中获得开始时间
    time_p_1 = str(df['date'][0]).split()[0] + ' ' + '1' + ':' + '05' + ':' + '09'  # 实现从数据中获得结束时间
    time_list_1 = []
    time_list_2 = []
    list_1 = []
    list_2 = []
    list_3 = []
    for i in range(72):
        time_list_1.append('start_time' + str(i))
        time_list_2.append('end_time' + str(i))
        list_1.append('df' + str(i))
        list_2.append('df' + str(i))
        list_3.append('df' + str(i))
        time_list_1[0] = datetime.datetime.strptime(time_p, "%Y-%m-%d %H:%M:%S")  # 实现 确定初始时间
        time_list_2[0] = datetime.datetime.strptime(time_p_1, "%Y-%m-%d %H:%M:%S")  # 实现 确定结束时间

    list_1[0] = df[
        (df['date'] >= time_list_1[0]) & (df['date'] <= time_list_2[0]) & (df['variable'] == 'v')]
    list_2[0] = df[
        (df['date'] >= time_list_1[0]) & (df['date'] <= time_list_2[0]) & (df['variable'] == 'd')]  # 选择方向
    for i in range(1, 72):
        time_list_1[i] = (datetime.datetime.strptime(time_p, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
            hours=int(i)))
        time_list_2[i] = (datetime.datetime.strptime(time_p_1, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
            hours=int(i)))

    for x1 in range(1, 72):
        list_1[x1] = df[
            (df['date'] >= time_list_1[x1]) & (df['date'] <= time_list_2[x1]) & (df['variable'] == 'v')]  # 选择速度
        list_2[x1] = df[
            (df['date'] >= time_list_1[x1]) & (df['date'] <= time_list_2[x1]) & (df['variable'] == 'd')]  # 选择方向

    for i in range(72):

        if len(list_1[i]) > 0:
            if len(list_1[i]) < 1200:
                list_1[i].reset_index(inplace=True, drop=True)
                list_2[i].reset_index(inplace=True, drop=True)
                b1 = list_1[i].loc[0, 'date']
                time_4 = datetime.datetime.strftime(b1, "%Y-%m-%d %H")
                if int(time_4.split()[1]) + 1 == 24:
                    opr1 = time_4.split()[0]
                    opr2 = opr1.split('-')[-1]
                    time_5 = str(opr1.split('-')[0]) + '-' + str(opr1.split('-')[1]) + '-' + str(
                        int(opr2) + 1) + ' ' + '0' + ':' + '00' + ':' + '00'
                else:
                    time_5 = time_4.split()[0] + ' ' + str(int(time_4.split()[1]) + 1) + ':' + '00' + ':' + '00'
                time_6 = datetime.datetime.strptime(time_5, "%Y-%m-%d %H:%M:%S")
                i1 = list_1[i]['h'].mean()
                #        # print(i1)
                c1 = list_1[i].groupby('hb')
                c4 = list_2[i].groupby('hb')
                #        # print(c1.groups)
                c2 = c1.mean()  # 对于每次采样时间的同一剖面流速进行平均
                c5 = c4.mean()
                c5.rename(columns={'value': 'dir'}, inplace=True)
                c6 = pd.merge(c2, c5, how='outer', on=['hb', 'h'])
                c3 = np.full([len(c6['h']), 1], np.nan)
                c6['h'] = c3
                c6['date'] = c3
                c6['date'] = c6['date'].apply(lambda x: time_6)
                c6['h'] = c6['h'].apply(lambda x: i1)
                list_3[i] = c6

            else:
                list_1[i].reset_index(inplace=True, drop=True)
                list_2[i].reset_index(inplace=True, drop=True)
                s3 = list_1[i].loc[0, 'date']
                time_1 = datetime.datetime.strftime(s3, "%Y-%m-%d %H")
                if int(time_1.split()[1]) + 1 == 24:
                    opr1 = time_1.split()[0]
                    opr2 = opr1.split('-')[-1]
                    time_2 = str(opr1.split('-')[0]) + '-' + str(opr1.split('-')[1]) + '-' + str(
                        int(opr2) + 1) + ' ' + '0' + ':' + '00' + ':' + '00'
                else:
                    time_2 = time_1.split()[0] + ' ' + str(int(time_1.split()[1]) + 1) + ':' + '00' + ':' + '00'

                time_3 = datetime.datetime.strptime(time_2, "%Y-%m-%d %H:%M:%S")
                i1 = list_1[i]['h'].mean()
                #        # print(i1)
                c1 = list_1[i].groupby('hb')
                c4 = list_2[i].groupby('hb')
                #        # print(c1.groups)
                c2 = c1.mean()  # 对于每次采样时间的同一剖面流速进行平均
                c5 = c4.mean()
                c5.rename(columns={'value': 'dir'}, inplace=True)
                c6 = pd.merge(c2, c5, how='outer', on=['hb', 'h'])
                c3 = np.full([len(c6['h']), 1], np.nan)
                c6['h'] = c3
                c6['date'] = c3
                c6['date'] = c6['date'].apply(lambda x: time_3)
                c6['h'] = c6['h'].apply(lambda x: i1)
                list_3[i] = c6
        elif len(list_1[i]) == 0:
            list_3[i] = pd.DataFrame(np.full([4, 4], np.nan), columns=['h', 'value', 'dir', 'date'])
    pt1 = list_3[0]
    for i in range(1, 72):
        pt1 = pt1.append(list_3[i])
    pt2 = pt1.reset_index()
    pt2 = pt2.rename(columns={'index': 'hb'})
    return pt2
def save_weniian(path_in,path_out):
    with open(path_in,'r')as f:
        df = pd.DataFrame()
        df = df.append(getshuju(path_in))
        # print(df)
        df.to_csv(path_out,index=False)
        print('Finished ' + path_in)
if __name__ == "__main__":
    ff = glob.glob("*_ASC.csv")
    if not ff:
        print('No files to convert.')
        exit()
    else:
        print("Detected ASCII *_ASC.txt files: \n", "\n".join(ff))
        for f in ff:
            path_in = f
            path_out = re.sub(r'(?i).csv', 't.csv', f)
            s = getshuju(f)
            d = save_weniian(path_in, path_out)
posted @ 2020-12-22 18:14 云飞01 阅读(147) 评论(0) 编辑收藏举报
会员力量，点亮园子希望
刷新页面返回顶部
云飞01

adcp24小时数据自动化处理1

公告