Python dataframe 处理记录
1、分组求和
import pandas as pd from datetime import datetime, timedelta data = pd.DataFrame({ "company": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], "salary": [10, 12, 13, 36, 12, 10, 12, 13, 36, 12, 36, 12], "age": [3, 1, 1, 1, 1, 2, 2, 1, 3, 1, 11, 4], "coll_time": ['2022-09-03 20:30:00', '2022-09-03 20:30:10', '2022-09-03 20:30:20', '2022-09-03 20:30:30', '2022-09-03 20:30:40', '2022-09-03 20:30:50', '2022-09-03 20:31:00', '2022-09-03 20:31:00', '2022-09-03 20:31:10', '2022-09-03 20:31:20', '2022-09-03 20:32:10', '2022-09-03 20:32:40'] }) # 时间格式转化 data['coll_time'] = data['coll_time'].astype('datetime64') print(data) # 行时间差 data['time_diff'] = data['coll_time'].diff() data["time_diff_seconds"] = data[["time_diff"]].apply(lambda x: x["time_diff"].total_seconds(), axis=1) print(data) # 均匀切片求值 res = [] for i in range(0, int(len(data)) + 1, 6): df_slice = data[i:i + 6] v = df_slice['salary'].max()/df_slice['time_diff_seconds'].sum() res.append(v) # 取众数 print('众数', data["time_diff_seconds"].mode().pop(0)) # 分组求和 dtd = data.groupby(['salary']).agg({'time_diff_seconds': 'sum', 'salary': 'max'}) # 两列相乘 dtd["mul"] = dtd[["salary", 'time_diff_seconds']].apply(lambda x: x["salary"] * x["time_diff_seconds"], axis=1) print(dtd)
控制台输出
company salary age coll_time 0 A 10 3 2022-09-03 20:30:00 1 A 12 1 2022-09-03 20:30:10 2 A 13 1 2022-09-03 20:30:20 3 A 36 1 2022-09-03 20:30:30 4 A 12 1 2022-09-03 20:30:40 5 A 10 2 2022-09-03 20:30:50 6 A 12 2 2022-09-03 20:31:00 7 A 13 1 2022-09-03 20:31:00 8 A 36 3 2022-09-03 20:31:10 9 A 12 1 2022-09-03 20:31:20 10 A 36 11 2022-09-03 20:32:10 11 A 12 4 2022-09-03 20:32:40 company salary age coll_time time_diff time_diff_seconds 0 A 10 3 2022-09-03 20:30:00 NaT NaN 1 A 12 1 2022-09-03 20:30:10 0 days 00:00:10 10.0 2 A 13 1 2022-09-03 20:30:20 0 days 00:00:10 10.0 3 A 36 1 2022-09-03 20:30:30 0 days 00:00:10 10.0 4 A 12 1 2022-09-03 20:30:40 0 days 00:00:10 10.0 5 A 10 2 2022-09-03 20:30:50 0 days 00:00:10 10.0 6 A 12 2 2022-09-03 20:31:00 0 days 00:00:10 10.0 7 A 13 1 2022-09-03 20:31:00 0 days 00:00:00 0.0 8 A 36 3 2022-09-03 20:31:10 0 days 00:00:10 10.0 9 A 12 1 2022-09-03 20:31:20 0 days 00:00:10 10.0 10 A 36 11 2022-09-03 20:32:10 0 days 00:00:50 50.0 11 A 12 4 2022-09-03 20:32:40 0 days 00:00:30 30.0 众数 10.0 time_diff_seconds salary mul salary 10 10.0 10 100.0 12 70.0 12 840.0 13 10.0 13 130.0 36 70.0 36 2520.0