一、交叉表
1 import pandas as pd 2 import numpy as np 3 4 # 加载数据 5 detail = pd.read_excel("../day05/meal_order_detail.xlsx") 6 # print("detail: \n", detail) 7 print("detail的列名称: \n", detail.columns) 8 9 # 构建交叉表 10 # index ——指定某列进行行分组 11 # columns ——指定某列进行列分组 12 # 如果不传其他参数,只传index 与columns, 那么交叉表构建 index 与 columns的相对数量 13 14 res = pd.crosstab( 15 index=detail.loc[:, "dishes_name"], 16 columns=detail.loc[:, "amounts"], 17 ) 18 print("res: \n", res) 19 20 # 以index这一列为行分组,以columns这一列为列分组,对values进行统计aggfunc指标 21 # values 与 aggfunc必须同时存在 22 # 只能按照单列进行行分组,只能按照按照单列进行列分组,只能统计某一列的aggfunc指标 23 res = pd.crosstab( 24 index=detail.loc[:, "dishes_name"], 25 columns=detail.loc[:, "amounts"], 26 values=detail.loc[:, "counts"], 27 aggfunc=np.max 28 ) 29 30 print("res: \n", res)
二、透视表
1 import pandas as pd 2 import numpy as np 3 4 # 创建一个dataframe 5 df = pd.DataFrame( 6 data={ 7 "cls_id": ["A", "B", "C", "A", "B", "C", "A", "B", "C", "A", "B","C"], 8 "group_id": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2], 9 "name": ["xixi", "haha", "taotao", "huihui", "ranran", "Island", "Tree" ,"bamao", "simao", "hanhan", "qimao", "sanmao"], 10 "score": [92, 93, 39, 89, 90.5, 80, 91, 92, 65, 73, 34.5, 56], 11 "height": [165, 166, 167, 168, 152, 193, 192, 190, 173, 172, 170, 169] 12 }, 13 ) 14 15 res = df.groupby(by="cls_id")[["score", "height"]].mean() 16 print("groupby-res: \n", res) 17 18 # data ——df 19 # index 指定行分组 ——可以按照多列进行行分组 20 # values ——aggfunc 所针对的主体,可以是多列 21 # aggfunc ——默认为均值 22 23 # 创建透视表 24 res = pd.pivot_table( 25 data=df, 26 # index="cls_id", # 按照行或列分组 27 # columns="cls_id", 28 columns=["cls_id", "group_id"], 29 values=["score", "height"], 30 aggfunc="mean", 31 margins=True, 32 margins_name="xixi" 33 ) 34 35 print("res: \n", res) 36 37 print("~"*60) 38 39 # 加载数据 40 detail = pd.read_excel("../day05/meal_order_detail.xlsx") 41 # print("detail: \n", detail) 42 print("detail的列名称: \n", detail.columns) 43 44 res = pd.pivot_table( 45 data=detail, 46 # index="order_id", 47 # index="cost", 48 columns="dishes_id", 49 # values="amounts", 50 values="cost", 51 aggfunc="max", 52 fill_value=-1 # 将全为空的数据进行填充 53 ) 54 55 print("res: \n", res)