Time Series / Date functionality
详细内容见:http://pandas.pydata.org/pandas-docs/stable/timeseries.html
以下是一些可能会用到的代码:
代码1
df = pd.DataFrame({'year': [2015, 2016],'month': [2, 3],'day': [4, 5],'hour': [2, 3]})
print(pd.to_datetime(df))
0 2015-02-04 02:00:00
1 2016-03-05 03:00:00
dtype: datetime64[ns]
print(pd.to_datetime(df[['year', 'month', 'day']]))
0 2015-02-04
1 2016-03-05
dtype: datetime64[ns]
代码2
stamps = pd.date_range('2012-10-08 18:15:05', periods=4, freq='D')
print(stamps)
DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
'2012-10-10 18:15:05', '2012-10-11 18:15:05'],
dtype='datetime64[ns]', freq='D')
代码3
dates = [datetime(2012, 5, 1), datetime(2012, 5, 2), datetime(2012, 5, 3)]
idx = pd.DatetimeIndex(dates)
print(idx)
DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)
index = pd.Index(dates)
print(index)
DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)
代码4
start = datetime(2011, 1, 1)
end = datetime(2012, 1, 1)
idx = pd.date_range(start, end) # calendar day
print(idx)
DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
'2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
'2011-01-09', '2011-01-10',
...
'2011-12-23', '2011-12-24', '2011-12-25', '2011-12-26',
'2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30',
'2011-12-31', '2012-01-01'],
dtype='datetime64[ns]', length=366, freq='D')
index = pd.bdate_range(start, end) # business day
print(index)
DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
'2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
'2011-01-13', '2011-01-14',
...
'2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
'2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
'2011-12-29', '2011-12-30'],
dtype='datetime64[ns]', length=260, freq='B')
代码5
rng2 = pd.date_range('2011-01-01', '2012-01-01', freq='W')
ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)
trun = ts2.truncate(before='2011-11', after='2011-12')
print(trun)
2011-11-06 0.754754
2011-11-13 -0.265821
2011-11-20 -0.164999
2011-11-27 1.410138
Freq: W-SUN, dtype: float64
slc = ts2['2011-11':'2011-12']
print(slc)
2011-11-06 0.754754
2011-11-13 -0.265821
2011-11-20 -0.164999
2011-11-27 1.410138
2011-12-04 -0.561173
2011-12-11 0.095135
2011-12-18 -1.553011
2011-12-25 -0.804772
Freq: W-SUN, dtype: float64
代码6
from pandas.tseries.offsets import *
d = datetime(2008, 8, 18, 9, 0)
d + DateOffset(months=4, days=5)
print(d + DateOffset(months=4, days=5))
2008-12-23 09:00:00
# it has rollforward and rollback methods for moving a date forward or backward to the next or previous “offset date”
d = datetime(2008, 8, 18, 9, 0)
offset = BMonthEnd()
print(offset.rollforward(d))
2008-08-29 09:00:00
print(offset.rollback(d))
2008-07-31 09:00:00
代码7
d = datetime(2008, 8, 18, 9, 0)
print(d+Week())
2008-08-25 09:00:00
print(d+Week(weekday=4))
2008-08-22 09:00:00
print(d-Week())
2008-08-11 09:00:00
代码8
from pandas.tseries.offsets import CustomBusinessDay
weekmask_egypt = 'Sun Mon Tue Wed Thu'
holidays = ['2012-05-01', datetime(2013, 5, 1), np.datetime64('2014-05-01')]
bday_egypt = CustomBusinessDay(holidays=holidays, weekmask=weekmask_egypt)
dt = datetime(2013, 4, 30)
print(dt + 2 * bday_egypt)
2013-05-05 00:00:00
dts = pd.date_range(dt, periods=5, freq=bday_egypt)
print(dts)
DatetimeIndex(['2013-04-30', '2013-05-02', '2013-05-05', '2013-05-06',
'2013-05-07'],
dtype='datetime64[ns]', freq='C')
print(pd.Series(dts.weekday, dts).map(pd.Series('Mon Tue Wed Thu Fri Sat Sun'.split())))
2013-04-30 Tue
2013-05-02 Thu
2013-05-05 Sun
2013-05-06 Mon
2013-05-07 Tue
Freq: C, dtype: object
代码9
bh = BusinessHour()
print(bh)
<BusinessHour: BH=09:00-17:00>
print(pd.Timestamp('2014-08-01 10:00') + BusinessHour(-3))
2014-07-31 15:00:00
代码10
df = pd.DataFrame(np.random.randn(1000, 3),
index=pd.date_range('1/1/2012', freq='S', periods=1000),
columns=['A', 'B', 'C'])
r = df.resample('3T')
print(r['A'].agg([np.sum, np.mean, np.std]))
sum mean std
2012-01-01 00:00:00 0.731268 0.004063 0.896257
2012-01-01 00:03:00 14.198307 0.078879 1.016851
2012-01-01 00:06:00 10.647889 0.059155 1.033933
2012-01-01 00:09:00 22.267552 0.123709 1.043074
2012-01-01 00:12:00 22.012834 0.122294 1.014366
2012-01-01 00:15:00 0.172829 0.001728 1.021002
print(r.agg({'A' : np.sum, 'B' : lambda x: np.std(x, ddof=1)}))
A B
2012-01-01 00:00:00 -8.869162 1.013999
2012-01-01 00:03:00 -15.543503 0.977319
2012-01-01 00:06:00 -6.948761 0.951148
2012-01-01 00:09:00 -2.672707 1.031540
2012-01-01 00:12:00 -22.573518 0.878950
2012-01-01 00:15:00 -11.068128 1.000327
print(r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] }))
A B
sum std mean std
2012-01-01 00:00:00 12.533505 0.951647 0.008967 1.061614
2012-01-01 00:03:00 -1.539616 0.975275 0.047445 1.067111
2012-01-01 00:06:00 -14.698385 1.046088 -0.099095 1.001694
2012-01-01 00:09:00 4.611009 1.082504 0.070930 0.931013
2012-01-01 00:12:00 0.894418 1.058882 -0.032890 1.032428
2012-01-01 00:15:00 11.446964 0.997862 0.025005 0.801506
代码11
s = pd.Series(pd.date_range('20130101 09:10:12', periods=4))
print(s)
0 2013-01-01 09:10:12
1 2013-01-02 09:10:12
2 2013-01-03 09:10:12
3 2013-01-04 09:10:12
dtype: datetime64[ns]
print(s.dt.hour)
0 9
1 9
2 9
3 9
dtype: int64
print(s.dt.second)
0 12
1 12
2 12
3 12
dtype: int64
print(s.dt.day)
0 1
1 2
2 3
3 4
dtype: int64
print(s[s.dt.day==2])
1 2013-01-02 09:10:12
dtype: datetime64[ns]