pandas 代码

 1 def get_train_data():
 2     df = pd.read_csv('data/train.csv', encoding='utf_8')
 3 
 4 #    df1 = pd.read_csv('data/test.csv', encoding='utf_8')
 5 #    df2 = pd.read_csv('data/count.csv', encoding='utf_8')
 6 #    df1['casual'] = df2['casual']
 7 #    df1['registered'] = df2['registered']
 8 #    df1['count'] = df2['count']
 9 #    df = pd.concat([df, df1])
10     
11     cur_time = pd.to_datetime(df['datetime'])
12       
13     df['year'] = cur_time.map(lambda x:x.year)
14     df['month'] = cur_time.map(lambda x:x.month)
15     df['day'] = cur_time.map(lambda x:x.day)
16     df['hour'] = cur_time.map(lambda x:x.hour)    
17     df['dayOfWeek'] = cur_time.map(lambda x:x.isoweekday())
18     df['segOfDay'] = df['hour'].apply(classfy)  
19     
20     year = pd.get_dummies(data=df.iloc[:,])
21 
22 #    df['spring'] = df['season'].map({1:1})
23 #    df['summer'] = df['season'].map({2:1})
24 #    df['autumn'] = df['season'].map({3:1})
25 #    df['winter'] = df['season'].map({4:1})
26 #    df = df.fillna(value={'spring':0,'summer':0,'autumn':0,'winter':0})
27 #    
28 #    df['weather1'] = df['weather'].map({1:1})
29 #    df['weather2'] = df['weather'].map({2:1})
30 #    df['weather3'] = df['weather'].map({3:1})
31 #    df['weather4'] = df['weather'].map({4:1})
32 #    df = df.fillna(value={'weather1':0,'weather2':0,'weather3':0,'weather4':0})
33 #    
34     df = df.replace({'windspeed':0}, 12.799)
35     
36     casual = df['casual']
37     registered  = df['registered']
38     df = df.drop(['datetime', 'season', 'weather','casual', 'registered', 'count'], axis=1)
39     
40     log_cas = casual.map(lambda x: math.log(x+1))
41     log_reg = registered.map(lambda x: math.log(x+1))
42     
43     train_casual = log_cas.values
44     train_registered = log_reg.values
45     train_data = df.values
46     
47     return train_casual, train_registered, train_data

 

posted @ 2015-01-12 14:53  yyxayz  阅读(342)  评论(0编辑  收藏  举报