分段取得最小值
input
import pandas as pd
import numpy as np
A=[17,18,21,15,18,19,22,16,30,50,]
cond=[False,True,False,False,False,True,False,False,True,False]
df=pd.DataFrame({'A':A,'cond':cond})
方法1
def abc(arr):
c = arr.index[-1]
l = arr.index[0]
if df['cond'][c]==True:
df.loc[c, 'fx'] =df['A'][c]
elif df['A'][c]<df['fx'][l]:
df.loc[c, 'fx'] =df['A'][c]
else:
df.loc[c, 'fx'] =df['A'][l]
return 0
df[['A', 'cond']].rolling(2).apply(abc)
#6.34 ms ± 557 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
方法2
df['expected'] = df.groupby(df.cond.cumsum())['A'].cummin()
#1.49 ms ± 88.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
方法3
# First get a list of all the index on which to reset the cummin()
my_ix = df[df['cond']].index
return_list = []
# Loop over all chunks of indexes
for i in range(0, len(my_ix)):
ix_start = my_ix[i]
try:
ix_end = my_ix[i+1]
except IndexError:
# This happens on the last record
ix_end = None
cummin_df = df[ix_start:ix_end]['A'].cummin()
return_list.append(cummin_df)
pd.concat(return_list)
#737 µs ± 53.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
注意:这里测试的是轻量级数据,之后我又用百万数据测试:
for循环竟然用了将近1分钟,apply直接死机,只有groupby仅用了0.08秒。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· AI技术革命,工作效率10个最佳AI工具