Python dataframe 处理记录
1、分组求和
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import pandas as pd from datetime import datetime, timedelta data = pd.DataFrame({ "company" : [ "A" , "A" , "A" , "A" , "A" , "A" , "A" , "A" , "A" , "A" , "A" , "A" ], "salary" : [ 10 , 12 , 13 , 36 , 12 , 10 , 12 , 13 , 36 , 12 , 36 , 12 ], "age" : [ 3 , 1 , 1 , 1 , 1 , 2 , 2 , 1 , 3 , 1 , 11 , 4 ], "coll_time" : [ '2022-09-03 20:30:00' , '2022-09-03 20:30:10' , '2022-09-03 20:30:20' , '2022-09-03 20:30:30' , '2022-09-03 20:30:40' , '2022-09-03 20:30:50' , '2022-09-03 20:31:00' , '2022-09-03 20:31:00' , '2022-09-03 20:31:10' , '2022-09-03 20:31:20' , '2022-09-03 20:32:10' , '2022-09-03 20:32:40' ] }) # 时间格式转化 data[ 'coll_time' ] = data[ 'coll_time' ].astype( 'datetime64' ) print (data) # 行时间差 data[ 'time_diff' ] = data[ 'coll_time' ].diff() data[ "time_diff_seconds" ] = data[[ "time_diff" ]]. apply ( lambda x: x[ "time_diff" ].total_seconds(), axis = 1 ) print (data) # 均匀切片求值 res = [] for i in range ( 0 , int ( len (data)) + 1 , 6 ): df_slice = data[i:i + 6 ] v = df_slice[ 'salary' ]. max () / df_slice[ 'time_diff_seconds' ]. sum () res.append(v) # 取众数 print ( '众数' , data[ "time_diff_seconds" ].mode().pop( 0 )) # 分组求和 dtd = data.groupby([ 'salary' ]).agg({ 'time_diff_seconds' : 'sum' , 'salary' : 'max' }) # 两列相乘 dtd[ "mul" ] = dtd[[ "salary" , 'time_diff_seconds' ]]. apply ( lambda x: x[ "salary" ] * x[ "time_diff_seconds" ], axis = 1 ) print (dtd) |
控制台输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | company salary age coll_time 0 A 10 3 2022 - 09 - 03 20 : 30 : 00 1 A 12 1 2022 - 09 - 03 20 : 30 : 10 2 A 13 1 2022 - 09 - 03 20 : 30 : 20 3 A 36 1 2022 - 09 - 03 20 : 30 : 30 4 A 12 1 2022 - 09 - 03 20 : 30 : 40 5 A 10 2 2022 - 09 - 03 20 : 30 : 50 6 A 12 2 2022 - 09 - 03 20 : 31 : 00 7 A 13 1 2022 - 09 - 03 20 : 31 : 00 8 A 36 3 2022 - 09 - 03 20 : 31 : 10 9 A 12 1 2022 - 09 - 03 20 : 31 : 20 10 A 36 11 2022 - 09 - 03 20 : 32 : 10 11 A 12 4 2022 - 09 - 03 20 : 32 : 40 company salary age coll_time time_diff time_diff_seconds 0 A 10 3 2022 - 09 - 03 20 : 30 : 00 NaT NaN 1 A 12 1 2022 - 09 - 03 20 : 30 : 10 0 days 00 : 00 : 10 10.0 2 A 13 1 2022 - 09 - 03 20 : 30 : 20 0 days 00 : 00 : 10 10.0 3 A 36 1 2022 - 09 - 03 20 : 30 : 30 0 days 00 : 00 : 10 10.0 4 A 12 1 2022 - 09 - 03 20 : 30 : 40 0 days 00 : 00 : 10 10.0 5 A 10 2 2022 - 09 - 03 20 : 30 : 50 0 days 00 : 00 : 10 10.0 6 A 12 2 2022 - 09 - 03 20 : 31 : 00 0 days 00 : 00 : 10 10.0 7 A 13 1 2022 - 09 - 03 20 : 31 : 00 0 days 00 : 00 : 00 0.0 8 A 36 3 2022 - 09 - 03 20 : 31 : 10 0 days 00 : 00 : 10 10.0 9 A 12 1 2022 - 09 - 03 20 : 31 : 20 0 days 00 : 00 : 10 10.0 10 A 36 11 2022 - 09 - 03 20 : 32 : 10 0 days 00 : 00 : 50 50.0 11 A 12 4 2022 - 09 - 03 20 : 32 : 40 0 days 00 : 00 : 30 30.0 众数 10.0 time_diff_seconds salary mul salary 10 10.0 10 100.0 12 70.0 12 840.0 13 10.0 13 130.0 36 70.0 36 2520.0 |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· DeepSeek 开源周回顾「GitHub 热点速览」
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了