pandas 代码
1 def get_train_data(): 2 df = pd.read_csv('data/train.csv', encoding='utf_8') 3 4 # df1 = pd.read_csv('data/test.csv', encoding='utf_8') 5 # df2 = pd.read_csv('data/count.csv', encoding='utf_8') 6 # df1['casual'] = df2['casual'] 7 # df1['registered'] = df2['registered'] 8 # df1['count'] = df2['count'] 9 # df = pd.concat([df, df1]) 10 11 cur_time = pd.to_datetime(df['datetime']) 12 13 df['year'] = cur_time.map(lambda x:x.year) 14 df['month'] = cur_time.map(lambda x:x.month) 15 df['day'] = cur_time.map(lambda x:x.day) 16 df['hour'] = cur_time.map(lambda x:x.hour) 17 df['dayOfWeek'] = cur_time.map(lambda x:x.isoweekday()) 18 df['segOfDay'] = df['hour'].apply(classfy) 19 20 year = pd.get_dummies(data=df.iloc[:,]) 21 22 # df['spring'] = df['season'].map({1:1}) 23 # df['summer'] = df['season'].map({2:1}) 24 # df['autumn'] = df['season'].map({3:1}) 25 # df['winter'] = df['season'].map({4:1}) 26 # df = df.fillna(value={'spring':0,'summer':0,'autumn':0,'winter':0}) 27 # 28 # df['weather1'] = df['weather'].map({1:1}) 29 # df['weather2'] = df['weather'].map({2:1}) 30 # df['weather3'] = df['weather'].map({3:1}) 31 # df['weather4'] = df['weather'].map({4:1}) 32 # df = df.fillna(value={'weather1':0,'weather2':0,'weather3':0,'weather4':0}) 33 # 34 df = df.replace({'windspeed':0}, 12.799) 35 36 casual = df['casual'] 37 registered = df['registered'] 38 df = df.drop(['datetime', 'season', 'weather','casual', 'registered', 'count'], axis=1) 39 40 log_cas = casual.map(lambda x: math.log(x+1)) 41 log_reg = registered.map(lambda x: math.log(x+1)) 42 43 train_casual = log_cas.values 44 train_registered = log_reg.values 45 train_data = df.values 46 47 return train_casual, train_registered, train_data