%E5%9B%BE%E7%89%87.png

In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
In [2]:
dtime = pd.date_range(start="20171201",end="20180131",freq="D")
print(type(dtime))
print(dtime)
 
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
DatetimeIndex(['2017-12-01', '2017-12-02', '2017-12-03', '2017-12-04',
               '2017-12-05', '2017-12-06', '2017-12-07', '2017-12-08',
               '2017-12-09', '2017-12-10', '2017-12-11', '2017-12-12',
               '2017-12-13', '2017-12-14', '2017-12-15', '2017-12-16',
               '2017-12-17', '2017-12-18', '2017-12-19', '2017-12-20',
               '2017-12-21', '2017-12-22', '2017-12-23', '2017-12-24',
               '2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',
               '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01',
               '2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05',
               '2018-01-06', '2018-01-07', '2018-01-08', '2018-01-09',
               '2018-01-10', '2018-01-11', '2018-01-12', '2018-01-13',
               '2018-01-14', '2018-01-15', '2018-01-16', '2018-01-17',
               '2018-01-18', '2018-01-19', '2018-01-20', '2018-01-21',
               '2018-01-22', '2018-01-23', '2018-01-24', '2018-01-25',
               '2018-01-26', '2018-01-27', '2018-01-28', '2018-01-29',
               '2018-01-30', '2018-01-31'],
              dtype='datetime64[ns]', freq='D')
In [3]:
dtime = pd.date_range(start="20171201",end="20180131",freq="10D")
print(dtime)
 
DatetimeIndex(['2017-12-01', '2017-12-11', '2017-12-21', '2017-12-31',
               '2018-01-10', '2018-01-20', '2018-01-30'],
              dtype='datetime64[ns]', freq='10D')
In [4]:
dtime = pd.date_range(start="20171201",periods=10,freq="M")
print(dtime)
 
DatetimeIndex(['2017-12-31', '2018-01-31', '2018-02-28', '2018-03-31',
               '2018-04-30', '2018-05-31', '2018-06-30', '2018-07-31',
               '2018-08-31', '2018-09-30'],
              dtype='datetime64[ns]', freq='M')
In [5]:
dtime = pd.date_range(start="2017-12-01 10:10:10",periods=50,freq="H")
print(dtime)
 
DatetimeIndex(['2017-12-01 10:10:10', '2017-12-01 11:10:10',
               '2017-12-01 12:10:10', '2017-12-01 13:10:10',
               '2017-12-01 14:10:10', '2017-12-01 15:10:10',
               '2017-12-01 16:10:10', '2017-12-01 17:10:10',
               '2017-12-01 18:10:10', '2017-12-01 19:10:10',
               '2017-12-01 20:10:10', '2017-12-01 21:10:10',
               '2017-12-01 22:10:10', '2017-12-01 23:10:10',
               '2017-12-02 00:10:10', '2017-12-02 01:10:10',
               '2017-12-02 02:10:10', '2017-12-02 03:10:10',
               '2017-12-02 04:10:10', '2017-12-02 05:10:10',
               '2017-12-02 06:10:10', '2017-12-02 07:10:10',
               '2017-12-02 08:10:10', '2017-12-02 09:10:10',
               '2017-12-02 10:10:10', '2017-12-02 11:10:10',
               '2017-12-02 12:10:10', '2017-12-02 13:10:10',
               '2017-12-02 14:10:10', '2017-12-02 15:10:10',
               '2017-12-02 16:10:10', '2017-12-02 17:10:10',
               '2017-12-02 18:10:10', '2017-12-02 19:10:10',
               '2017-12-02 20:10:10', '2017-12-02 21:10:10',
               '2017-12-02 22:10:10', '2017-12-02 23:10:10',
               '2017-12-03 00:10:10', '2017-12-03 01:10:10',
               '2017-12-03 02:10:10', '2017-12-03 03:10:10',
               '2017-12-03 04:10:10', '2017-12-03 05:10:10',
               '2017-12-03 06:10:10', '2017-12-03 07:10:10',
               '2017-12-03 08:10:10', '2017-12-03 09:10:10',
               '2017-12-03 10:10:10', '2017-12-03 11:10:10'],
              dtype='datetime64[ns]', freq='H')
In [6]:
data = pd.DataFrame(np.arange(100).reshape(50,2),index=dtime,columns=list("AB"))
print(data)
 
                      A   B
2017-12-01 10:10:10   0   1
2017-12-01 11:10:10   2   3
2017-12-01 12:10:10   4   5
2017-12-01 13:10:10   6   7
2017-12-01 14:10:10   8   9
2017-12-01 15:10:10  10  11
2017-12-01 16:10:10  12  13
2017-12-01 17:10:10  14  15
2017-12-01 18:10:10  16  17
2017-12-01 19:10:10  18  19
2017-12-01 20:10:10  20  21
2017-12-01 21:10:10  22  23
2017-12-01 22:10:10  24  25
2017-12-01 23:10:10  26  27
2017-12-02 00:10:10  28  29
2017-12-02 01:10:10  30  31
2017-12-02 02:10:10  32  33
2017-12-02 03:10:10  34  35
2017-12-02 04:10:10  36  37
2017-12-02 05:10:10  38  39
2017-12-02 06:10:10  40  41
2017-12-02 07:10:10  42  43
2017-12-02 08:10:10  44  45
2017-12-02 09:10:10  46  47
2017-12-02 10:10:10  48  49
2017-12-02 11:10:10  50  51
2017-12-02 12:10:10  52  53
2017-12-02 13:10:10  54  55
2017-12-02 14:10:10  56  57
2017-12-02 15:10:10  58  59
2017-12-02 16:10:10  60  61
2017-12-02 17:10:10  62  63
2017-12-02 18:10:10  64  65
2017-12-02 19:10:10  66  67
2017-12-02 20:10:10  68  69
2017-12-02 21:10:10  70  71
2017-12-02 22:10:10  72  73
2017-12-02 23:10:10  74  75
2017-12-03 00:10:10  76  77
2017-12-03 01:10:10  78  79
2017-12-03 02:10:10  80  81
2017-12-03 03:10:10  82  83
2017-12-03 04:10:10  84  85
2017-12-03 05:10:10  86  87
2017-12-03 06:10:10  88  89
2017-12-03 07:10:10  90  91
2017-12-03 08:10:10  92  93
2017-12-03 09:10:10  94  95
2017-12-03 10:10:10  96  97
2017-12-03 11:10:10  98  99
In [7]:
#降采样
resample_data = data.resample("D").count()
print(type(resample_data),"\n",resample_data)
 
<class 'pandas.core.frame.DataFrame'> 
              A   B
2017-12-01  14  14
2017-12-02  24  24
2017-12-03  12  12
In [8]:
#将时间戳转为字符串
time_index = resample_data["A"].index
time_str = [i.strftime("%Y%m%d") for i in time_index]
print(time_str)
 
['20171201', '20171202', '20171203']
In [9]:
#将字符串转为时间戳
print(pd.to_datetime(time_str))
 
DatetimeIndex(['2017-12-01', '2017-12-02', '2017-12-03'], dtype='datetime64[ns]', freq=None)
In [10]:
read_data = pd.read_csv("./BeijingPM20100101_20151231.csv")
#整合时间字段
datetime = pd.PeriodIndex(year=read_data["year"],month=read_data["month"],day=read_data["day"],hour=read_data["hour"],freq="H")

read_data["datetime"] = datetime
read_data.set_index("datetime",inplace=True)

#数据中含有nan,但不影响求平均数
resample_data = read_data.resample("7D").mean()
pm_CN = resample_data["PM_Dongsi"]
pm_US = resample_data["PM_US Post"]
x_ticks = [i.strftime("%Y%m%d") for i in pm_CN.index]

plt.figure(figsize=(20,8),dpi=80)
plt.plot(x_ticks,pm_CN,label="PM_CN")
plt.plot(x_ticks,pm_US,label="PM_US")
plt.xticks(x_ticks[::10],rotation=45)
plt.legend(loc="best")

plt.show()