数据分析-时序序列、调频、补值

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# -*- coding:utf-8 -*-
from datetime import datetime
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
 
 
ResultData = "[\"[\\\"asset-8nmfb4hb\\\",0,0,\\\"2022-04-28 15:57:00.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,0,\\\"2022-04-28 15:56:30.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,1,\\\"2022-04-28 15:56:00.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,0.3333333,\\\"2022-04-28 15:55:30.000\\\"]\"]"
ResultSchema = "[\"{\\\"Name\\\":\\\"c_deviceid\\\",\\\"Type\\\":\\\"String\\\"}\",\"{\\\"Name\\\":\\\"integer1\\\",\\\"Type\\\":\\\"Int64\\\"}\",\"{\\\"Name\\\":\\\"float1\\\",\\\"Type\\\":\\\"Float32\\\"}\",\"{\\\"Name\\\":\\\"c_time\\\",\\\"Type\\\":\\\"DateTime64(3, 'Asia/Shanghai')\\\"}\"]"
 
 
def lists_format_to_json(ResultSchema, ResultData):
    # 字符串数据格式化
    result_data = json.loads(ResultData)
    result_schema = json.loads(ResultSchema)
    # print(result_data)
    # print(result_schema)
 
    # 取出表结构字段
    key_schema = []
    for schema in result_schema:
        # unicode码点str转bytes流str(python默认处理的对象是bytes流str)
        #key_schema.append(schema.encode('utf-8'))
        schema = json.loads(schema)    # 把字符串直接序列化
        key_schema.append(schema['Name'])
 
    #print(key_schema)
 
    # 数据跟表头一一对应
    data_schema = []
    for data in result_data:
        data = json.loads(data)
        key_data = {}
        for i in range(0, len(key_schema)):
            key_data[key_schema[i]] = data[i]
        data_schema.append(key_data)
    print('data_schema:',data_schema)
    return data_schema
 
 
def generate_sequence_data(data_schema, c_time, key):
    # 生成时间序列数据
    index = []
    value = []
    for data in data_schema:
        # 字符串类型str转换为dateTime类型
        p = data[c_time]
        dateTime_p = datetime.strptime(p, '%Y-%m-%d %H:%M:%S.%f')
        #print(dateTime_p)
        index.append(dateTime_p)
        value.append(data[key])
 
    index.reverse()
    #print(index)
    value.reverse()
 
    sequence_data = pd.Series(value, index=index)
    print('sequence_data:',sequence_data)
    return sequence_data
 
 
def adjust_time_frequent(sequence_data, frequent):
    """
    H   hourly frequency
    T, min  minutely frequency
    S   secondly frequency
    :param sequence_data: 时间序列数
    :param frequent: 调频间隔
    :return:
    """
    frequent_data = sequence_data.asfreq(frequent)
    print('frequent_data:',frequent_data)
    return frequent_data
 
 
def fill_front_data(frequent_data):
    front_data = frequent_data.fillna(method="ffill")
    print('front_data: ', front_data)
 
def fill_back_data(frequent_data):
    back_data = frequent_data.fillna(method="bfill")
    print('back_data: ', back_data)
 
 
def draw_picture():
    v = np.random.randn(20)
    tx = pd.Series(v)
    tx.index = pd.date_range('2018-12-01', periods=20, freq="d")
    # print "tx", "-" * 20, "\n", tx
    rm = tx.rolling(window=5, center=False).mean()
    rm.plot()
    tx.plot()
    plt.show()
 
if __name__ == '__main__':
    data_schema = lists_format_to_json(ResultSchema, ResultData)
    c_time = 'c_time'
    key = 'float1'
    sequence_data = generate_sequence_data(data_schema, c_time, key)
    #print('-----------------')
    # print(sequence_data['2022-04-28 15:57:00.000'])
    frequent = '15S'
    frequent_data = adjust_time_frequent(sequence_data, frequent)
    fill_front_data(frequent_data)
    fill_back_data(frequent_data)
    draw_picture()

  

posted @   ReluStarry  阅读(23)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
点击右上角即可分享
微信分享提示