pyculiarity 时间序列(异常流量)异常检测初探——感觉还可以,和Facebook的fbprophet本质上一样
demo:
from pyculiarity import detect_ts import matplotlib.pyplot as plt import pandas as pd import matplotlib matplotlib.style.use('ggplot') __author__ = 'willmcginnis' if __name__ == '__main__': # first run the models twitter_example_data = pd.read_csv('./raw_data.csv', usecols=['timestamp', 'count']) results = detect_ts(twitter_example_data, max_anoms=0.05, alpha=0.001, direction='both', only_last=None) # format the twitter data nicely twitter_example_data['timestamp'] = pd.to_datetime(twitter_example_data['timestamp']) twitter_example_data.set_index('timestamp', drop=True) # make a nice plot f, ax = plt.subplots(2, 1, sharex=True) ax[0].plot(twitter_example_data['timestamp'], twitter_example_data['value'], 'b') ax[0].plot(results['anoms'].index, results['anoms']['anoms'], 'ro') ax[0].set_title('Detected Anomalies') ax[1].set_xlabel('Time Stamp') ax[0].set_ylabel('Count') ax[1].plot(results['anoms'].index, results['anoms']['anoms'], 'b') ax[1].set_ylabel('Anomaly Magnitude') plt.show()
demo2代码如下:
from matplotlib import pyplot as plt from pyculiarity import detect_ts import pandas as pd import numpy as np twitter_example_data = pd.read_csv('raw_data.csv', usecols=['timestamp', 'count']) plt.plot(range(0, len(twitter_example_data)), twitter_example_data['count'], "k.", label='points') results = detect_ts(twitter_example_data, max_anoms=0.02, direction='both') print(results['anoms'][0:10]) print(results['anoms'][-10:]) print(len(results['anoms'])) for timestamp, anomal_val in zip(results['anoms']['timestamp'], results['anoms']['anoms']): print(timestamp, anomal_val) index_list = np.where(twitter_example_data["timestamp"] == timestamp) assert len(index_list) == 1 plt.plot([index_list[0]], anomal_val, "rX", label='abnormal points') plt.show()
效果图:
原始数据图:
红色为检测出来的异常点: