数据分析-时序序列、调频、补值
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | # -*- coding:utf-8 -*- from datetime import datetime import json import numpy as np import pandas as pd import matplotlib.pyplot as plt ResultData = "[\"[\\\"asset-8nmfb4hb\\\",0,0,\\\"2022-04-28 15:57:00.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,0,\\\"2022-04-28 15:56:30.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,1,\\\"2022-04-28 15:56:00.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,0.3333333,\\\"2022-04-28 15:55:30.000\\\"]\"]" ResultSchema = "[\"{\\\"Name\\\":\\\"c_deviceid\\\",\\\"Type\\\":\\\"String\\\"}\",\"{\\\"Name\\\":\\\"integer1\\\",\\\"Type\\\":\\\"Int64\\\"}\",\"{\\\"Name\\\":\\\"float1\\\",\\\"Type\\\":\\\"Float32\\\"}\",\"{\\\"Name\\\":\\\"c_time\\\",\\\"Type\\\":\\\"DateTime64(3, 'Asia/Shanghai')\\\"}\"]" def lists_format_to_json(ResultSchema, ResultData): # 字符串数据格式化 result_data = json.loads(ResultData) result_schema = json.loads(ResultSchema) # print(result_data) # print(result_schema) # 取出表结构字段 key_schema = [] for schema in result_schema: # unicode码点str转bytes流str(python默认处理的对象是bytes流str) #key_schema.append(schema.encode('utf-8')) schema = json.loads(schema) # 把字符串直接序列化 key_schema.append(schema[ 'Name' ]) #print(key_schema) # 数据跟表头一一对应 data_schema = [] for data in result_data: data = json.loads(data) key_data = {} for i in range ( 0 , len (key_schema)): key_data[key_schema[i]] = data[i] data_schema.append(key_data) print ( 'data_schema:' ,data_schema) return data_schema def generate_sequence_data(data_schema, c_time, key): # 生成时间序列数据 index = [] value = [] for data in data_schema: # 字符串类型str转换为dateTime类型 p = data[c_time] dateTime_p = datetime.strptime(p, '%Y-%m-%d %H:%M:%S.%f' ) #print(dateTime_p) index.append(dateTime_p) value.append(data[key]) index.reverse() #print(index) value.reverse() sequence_data = pd.Series(value, index = index) print ( 'sequence_data:' ,sequence_data) return sequence_data def adjust_time_frequent(sequence_data, frequent): """ H hourly frequency T, min minutely frequency S secondly frequency :param sequence_data: 时间序列数 :param frequent: 调频间隔 :return: """ frequent_data = sequence_data.asfreq(frequent) print ( 'frequent_data:' ,frequent_data) return frequent_data def fill_front_data(frequent_data): front_data = frequent_data.fillna(method = "ffill" ) print ( 'front_data: ' , front_data) def fill_back_data(frequent_data): back_data = frequent_data.fillna(method = "bfill" ) print ( 'back_data: ' , back_data) def draw_picture(): v = np.random.randn( 20 ) tx = pd.Series(v) tx.index = pd.date_range( '2018-12-01' , periods = 20 , freq = "d" ) # print "tx", "-" * 20, "\n", tx rm = tx.rolling(window = 5 , center = False ).mean() rm.plot() tx.plot() plt.show() if __name__ = = '__main__' : data_schema = lists_format_to_json(ResultSchema, ResultData) c_time = 'c_time' key = 'float1' sequence_data = generate_sequence_data(data_schema, c_time, key) #print('-----------------') # print(sequence_data['2022-04-28 15:57:00.000']) frequent = '15S' frequent_data = adjust_time_frequent(sequence_data, frequent) fill_front_data(frequent_data) fill_back_data(frequent_data) draw_picture() |
本文来自博客园,作者:ReluStarry,转载请注明原文链接:https://www.cnblogs.com/relustarry/p/16252520.html
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?