数据分析-时序序列、调频、补值
# -*- coding:utf-8 -*- from datetime import datetime import json import numpy as np import pandas as pd import matplotlib.pyplot as plt ResultData = "[\"[\\\"asset-8nmfb4hb\\\",0,0,\\\"2022-04-28 15:57:00.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,0,\\\"2022-04-28 15:56:30.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,1,\\\"2022-04-28 15:56:00.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,0.3333333,\\\"2022-04-28 15:55:30.000\\\"]\"]" ResultSchema = "[\"{\\\"Name\\\":\\\"c_deviceid\\\",\\\"Type\\\":\\\"String\\\"}\",\"{\\\"Name\\\":\\\"integer1\\\",\\\"Type\\\":\\\"Int64\\\"}\",\"{\\\"Name\\\":\\\"float1\\\",\\\"Type\\\":\\\"Float32\\\"}\",\"{\\\"Name\\\":\\\"c_time\\\",\\\"Type\\\":\\\"DateTime64(3, 'Asia/Shanghai')\\\"}\"]" def lists_format_to_json(ResultSchema, ResultData): # 字符串数据格式化 result_data = json.loads(ResultData) result_schema = json.loads(ResultSchema) # print(result_data) # print(result_schema) # 取出表结构字段 key_schema = [] for schema in result_schema: # unicode码点str转bytes流str(python默认处理的对象是bytes流str) #key_schema.append(schema.encode('utf-8')) schema = json.loads(schema) # 把字符串直接序列化 key_schema.append(schema['Name']) #print(key_schema) # 数据跟表头一一对应 data_schema = [] for data in result_data: data = json.loads(data) key_data = {} for i in range(0, len(key_schema)): key_data[key_schema[i]] = data[i] data_schema.append(key_data) print('data_schema:',data_schema) return data_schema def generate_sequence_data(data_schema, c_time, key): # 生成时间序列数据 index = [] value = [] for data in data_schema: # 字符串类型str转换为dateTime类型 p = data[c_time] dateTime_p = datetime.strptime(p, '%Y-%m-%d %H:%M:%S.%f') #print(dateTime_p) index.append(dateTime_p) value.append(data[key]) index.reverse() #print(index) value.reverse() sequence_data = pd.Series(value, index=index) print('sequence_data:',sequence_data) return sequence_data def adjust_time_frequent(sequence_data, frequent): """ H hourly frequency T, min minutely frequency S secondly frequency :param sequence_data: 时间序列数 :param frequent: 调频间隔 :return: """ frequent_data = sequence_data.asfreq(frequent) print('frequent_data:',frequent_data) return frequent_data def fill_front_data(frequent_data): front_data = frequent_data.fillna(method="ffill") print('front_data: ', front_data) def fill_back_data(frequent_data): back_data = frequent_data.fillna(method="bfill") print('back_data: ', back_data) def draw_picture(): v = np.random.randn(20) tx = pd.Series(v) tx.index = pd.date_range('2018-12-01', periods=20, freq="d") # print "tx", "-" * 20, "\n", tx rm = tx.rolling(window=5, center=False).mean() rm.plot() tx.plot() plt.show() if __name__ == '__main__': data_schema = lists_format_to_json(ResultSchema, ResultData) c_time = 'c_time' key = 'float1' sequence_data = generate_sequence_data(data_schema, c_time, key) #print('-----------------') # print(sequence_data['2022-04-28 15:57:00.000']) frequent = '15S' frequent_data = adjust_time_frequent(sequence_data, frequent) fill_front_data(frequent_data) fill_back_data(frequent_data) draw_picture()
本文来自博客园,作者:ReluStarry,转载请注明原文链接:https://www.cnblogs.com/relustarry/p/16252520.html