1. 表相关的函数 Table-wise function application
df_p = pd.DataFrame({'city_and_code': ['JiuJiang, 0792']})
city_and_code
0 JiuJiang, 0792
def extract_city_name(df):
df['city_name'] = df['city_and_code'].str.split(",").str.get(0)
return df
def add_country_name(df, country_name=None):
col = 'city_name'
df['city_and_country'] = df[col] + country_name
return df
df_p.pipe(extract_city_name)
.pipe(add_country_name, country_name="China")
city_and_code city_name city_and_country
0 JiuJiang, 0792 JiuJiang JiuJiangChina
2. 行(列)相关的函数 Row or column-wise function application
df = pd.DataFrame(
np.random.random((3, 4)),
index=['1', '2', '3'],
columns=['A', 'B', 'C', 'D']
)
A B C D
1 0.633890 0.283490 0.951358 0.154136
2 0.562955 0.054361 0.656329 0.419395
3 0.045393 0.878217 0.678498 0.465347
df.apply(np.mean) # 以列为单位进行计算(列索引不变) ; df.apply(np.mean, axis=0) 结果得到一行
A 0.414079
B 0.405356
C 0.762062
D 0.346293
df.apply(np.mean, axis=1) # 以行为单位计算(行索引不变) 结果得到一列
1 0.505719
2 0.423260
3 0.516864
3. 聚合 Aggregation API
tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], index=pd.date_range('1/1/2000', periods=10))
tsdf.iloc[3:7] = np.nan
# print(tsdf)
agg1 = tsdf.agg(np.sum) # equal tsdf.sum()
agg2 = tsdf['A'].agg(np.sum)
agg3 = tsdf.agg([np.sum, np.mean])
agg4 = tsdf['A'].agg([np.sum, np.mean])
agg5 = tsdf['A'].agg([np.sum, lambda x: x.mean()])
agg6 = tsdf.agg({'A': np.mean, 'B': np.sum})
tsdf['A'].transform(np.abs)
tsdf.transform([np.abs, lambda x: x + 1])
tsdf['A'].transform([np.abs, lambda x: x + 1])
tsdf.transform({'A': np.abs, 'B': lambda x: x + 1})
tsdf.transform({'A': np.abs, 'B': [lambda x: x + 1, 'sqrt']})
5. 元素相关的函数 Applying elementwise functions
df = pd.DataFrame(np.random.random((5,3)), columns=['one', 'two', 'three'])
def f(x):
return len(str(x))
df1 = df['one'].map(f)
df2 = df.applymap(f)