pandas 的三角恋

1. 表相关的函数 Table-wise function application

df_p = pd.DataFrame({'city_and_code': ['JiuJiang, 0792']})
  city_and_code
0  JiuJiang, 0792
def extract_city_name(df):
   df['city_name'] = df['city_and_code'].str.split(",").str.get(0)
   return df
   
def add_country_name(df, country_name=None):
   col = 'city_name'
   df['city_and_country'] = df[col] + country_name
   return df

df_p.pipe(extract_city_name)
   .pipe(add_country_name, country_name="China")
  city_and_code city_name city_and_country
0  JiuJiang, 0792  JiuJiang    JiuJiangChina

2. 行(列)相关的函数 Row or column-wise function application

df = pd.DataFrame(
    np.random.random((3, 4)),
    index=['1', '2', '3'],
    columns=['A', 'B', 'C', 'D']
)
	A         B         C         D
1  0.633890  0.283490  0.951358  0.154136
2  0.562955  0.054361  0.656329  0.419395
3  0.045393  0.878217  0.678498  0.465347
 df.apply(np.mean) # 以列为单位进行计算(列索引不变) ; df.apply(np.mean, axis=0)  结果得到一行
A    0.414079
B    0.405356
C    0.762062
D    0.346293
 df.apply(np.mean, axis=1) # 以行为单位计算(行索引不变) 结果得到一列
1    0.505719
2    0.423260
3    0.516864

3. 聚合 Aggregation API

tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], index=pd.date_range('1/1/2000', periods=10))
tsdf.iloc[3:7] = np.nan
# print(tsdf)

agg1 = tsdf.agg(np.sum)  # equal tsdf.sum()

agg2 = tsdf['A'].agg(np.sum)

agg3 = tsdf.agg([np.sum, np.mean])

agg4 = tsdf['A'].agg([np.sum, np.mean])

agg5 = tsdf['A'].agg([np.sum, lambda x: x.mean()])

agg6 = tsdf.agg({'A': np.mean, 'B': np.sum})

4. 同时变换 Transform API

tsdf['A'].transform(np.abs)
tsdf.transform([np.abs, lambda x: x + 1])
tsdf['A'].transform([np.abs, lambda x: x + 1])
tsdf.transform({'A': np.abs, 'B': lambda x: x + 1})
tsdf.transform({'A': np.abs, 'B': [lambda x: x + 1, 'sqrt']})

5. 元素相关的函数 Applying elementwise functions

df = pd.DataFrame(np.random.random((5,3)), columns=['one', 'two', 'three'])

def f(x):
    return len(str(x))

df1 = df['one'].map(f)
df2 = df.applymap(f)
posted @ 2020-03-13 16:59  人微言轻1  阅读(177)  评论(0编辑  收藏  举报