1 import pandas as pd 2 import numpy as np 3 4 # 加载数据 5 detail = pd.read_excel("../day05/meal_order_detail.xlsx") 6 print("detail: \n", detail) 7 print("detail的列名称: \n", detail.columns) 8 9 # 删除法 10 # 先进行判断 11 drop_list = [] 12 for column in detail.columns: 13 # print(column) 14 # 统计每一列非空数据的数量 15 res = detail.loc[:, column].count() 16 # print("res: \n", res) 17 if res == 0: 18 drop_list.append(column) 19 20 print(drop_list) 21 22 # 再进行删除: 23 detail.drop(labels=drop_list, axis=1, inplace=True) 24 print("删除全部为空列之后的结果: \n" ,detail.shape) 25 print("删除全部为空列之后的结果的列名称: \n" ,detail.columns) 26 print("^"*60) 27 28 # 分组进行统计指标 29 # 按照单列进行分组——统计菜品id的最大值 30 res_ = detail.groupby(by="order_id")["dishes_id"].max() 31 res_ = detail.groupby(by=detail["order_id"])["dishes_id"].max() 32 33 print("res_: \n", res_) 34 35 # 统计所欲python班级各个小组的平均成绩 36 df = pd.DataFrame( 37 data={ 38 "cls_id": ["A", "B", "C", "A", "B", "C", "A", "B", "C", "A", "B","C"], 39 "group_id": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2], 40 "name": ["xixi", "haha", "taotao", "huihui", "ranran", "Island", "Tree" ,"bamao", "simao", "hanhan", "qimao", "sanmao"], 41 "score": [92, 93, 39, 89, 90.5, 80, 91, 92, 65, 73, 34.5, 56], 42 "height": [165, 166, 167, 168, 152, 193, 192, 190, 173, 172, 170, 169] 43 }, 44 ) 45 print("df: \n", df) 46 47 # 按照班级分组,统计班级的平均分 48 # 按照单列进行分组 49 res = df.groupby(by="cls_id")["score"].mean() 50 print(res) 51 52 # 先按照班级分组,再统计各小组的平均成绩 53 res = df.groupby(by=["cls_id", "group_id"])["score"].mean() 54 print("res: \n", res) 55 56 # 按照多列分组,既要统计成绩的平均值,又要统计身高的平均值 57 res = df.groupby(by=["cls_id", "group_id"])[["score", "height"]].mean() 58 print("res: \n", res) 59 60 # 对成绩求最大值,身高求平均值 61 # res = detail.agg({"counts": np.max, "height": np.mean}) 62 # print("res: \n", res) 63 64 # 对不同的列求取不同的指标 65 res = detail.agg({"counts": np.sum, "amounts": np.mean}) 66 67 # 对不同的列求取多个相同的指标 68 res = detail[["counts", "amounts"]].agg([np.max, np.mean]) 69 # 70 # # 对不同单列求取不同个数的指标 71 res = detail.agg({"counts": [np.mean, np.max], "amounts": np.min}) 72 73 print("res :\n", res) 74 75 # 对某列进行指定的运算 76 res = detail[["counts", "amounts"]].apply(lambda x: x+1) 77 res = detail[["counts", "amounts"]].transform(lambda x: x+1) 78 # res = detail[["counts", "amounts"]].apply(lambda x, y: x+y) # 错误的, 不能跨列运算 79 80 print('detail[["counts", "amounts"]]: \n', detail[["counts", "amounts"]]) 81 print("res :\n", res) 82 print(detail["counts"])