数据分析-时序序列、调频、补值

# -*- coding:utf-8 -*-
from datetime import datetime
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt



ResultData = "[\"[\\\"asset-8nmfb4hb\\\",0,0,\\\"2022-04-28 15:57:00.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,0,\\\"2022-04-28 15:56:30.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,1,\\\"2022-04-28 15:56:00.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,0.3333333,\\\"2022-04-28 15:55:30.000\\\"]\"]"
ResultSchema = "[\"{\\\"Name\\\":\\\"c_deviceid\\\",\\\"Type\\\":\\\"String\\\"}\",\"{\\\"Name\\\":\\\"integer1\\\",\\\"Type\\\":\\\"Int64\\\"}\",\"{\\\"Name\\\":\\\"float1\\\",\\\"Type\\\":\\\"Float32\\\"}\",\"{\\\"Name\\\":\\\"c_time\\\",\\\"Type\\\":\\\"DateTime64(3, 'Asia/Shanghai')\\\"}\"]"


def lists_format_to_json(ResultSchema, ResultData):
    # 字符串数据格式化
    result_data = json.loads(ResultData)
    result_schema = json.loads(ResultSchema)
    # print(result_data)
    # print(result_schema)

    # 取出表结构字段
    key_schema = []
    for schema in result_schema:
        # unicode码点str转bytes流str(python默认处理的对象是bytes流str)
        #key_schema.append(schema.encode('utf-8'))
        schema = json.loads(schema)    # 把字符串直接序列化
        key_schema.append(schema['Name'])

    #print(key_schema)

    # 数据跟表头一一对应
    data_schema = []
    for data in result_data:
        data = json.loads(data)
        key_data = {}
        for i in range(0, len(key_schema)):
            key_data[key_schema[i]] = data[i]
        data_schema.append(key_data)
    print('data_schema:',data_schema)
    return data_schema


def generate_sequence_data(data_schema, c_time, key):
    # 生成时间序列数据
    index = []
    value = []
    for data in data_schema:
        # 字符串类型str转换为dateTime类型
        p = data[c_time]
        dateTime_p = datetime.strptime(p, '%Y-%m-%d %H:%M:%S.%f')
        #print(dateTime_p)
        index.append(dateTime_p)
        value.append(data[key])

    index.reverse()
    #print(index)
    value.reverse()

    sequence_data = pd.Series(value, index=index)
    print('sequence_data:',sequence_data)
    return sequence_data


def adjust_time_frequent(sequence_data, frequent):
    """
    H	hourly frequency
    T, min	minutely frequency
    S	secondly frequency
    :param sequence_data: 时间序列数
    :param frequent: 调频间隔
    :return:
    """
    frequent_data = sequence_data.asfreq(frequent)
    print('frequent_data:',frequent_data)
    return frequent_data


def fill_front_data(frequent_data):
    front_data = frequent_data.fillna(method="ffill")
    print('front_data: ', front_data)

def fill_back_data(frequent_data):
    back_data = frequent_data.fillna(method="bfill")
    print('back_data: ', back_data)


def draw_picture():
    v = np.random.randn(20)
    tx = pd.Series(v)
    tx.index = pd.date_range('2018-12-01', periods=20, freq="d")
    # print "tx", "-" * 20, "\n", tx
    rm = tx.rolling(window=5, center=False).mean()
    rm.plot()
    tx.plot()
    plt.show()

if __name__ == '__main__':
    data_schema = lists_format_to_json(ResultSchema, ResultData)
    c_time = 'c_time'
    key = 'float1'
    sequence_data = generate_sequence_data(data_schema, c_time, key)
    #print('-----------------')
    # print(sequence_data['2022-04-28 15:57:00.000'])
    frequent = '15S'
    frequent_data = adjust_time_frequent(sequence_data, frequent)
    fill_front_data(frequent_data)
    fill_back_data(frequent_data)
    draw_picture()

  

posted @ 2022-05-10 10:43  ReluStarry  阅读(23)  评论(0编辑  收藏  举报