分段取得最小值

input

import pandas as pd
import numpy as np
A=[17,18,21,15,18,19,22,16,30,50,]
cond=[False,True,False,False,False,True,False,False,True,False]
df=pd.DataFrame({'A':A,'cond':cond})

方法1

def abc(arr):
    c = arr.index[-1]
    l = arr.index[0]
    if df['cond'][c]==True:
        df.loc[c, 'fx'] =df['A'][c]
    elif df['A'][c]<df['fx'][l]:
         df.loc[c, 'fx'] =df['A'][c]
    else:
         df.loc[c, 'fx'] =df['A'][l]                   
    return 0
df[['A', 'cond']].rolling(2).apply(abc)
#6.34 ms ± 557 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

方法2

df['expected'] = df.groupby(df.cond.cumsum())['A'].cummin()
#1.49 ms ± 88.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

方法3

# First get a list of all the index on which to reset the cummin()
my_ix = df[df['cond']].index
return_list = []
# Loop over all chunks of indexes
for i in range(0, len(my_ix)):
    ix_start = my_ix[i]
    try:
        ix_end = my_ix[i+1]
    except IndexError:
        # This happens on the last record
        ix_end = None

    cummin_df = df[ix_start:ix_end]['A'].cummin()
    return_list.append(cummin_df)
pd.concat(return_list)
#737 µs ± 53.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

注意:这里测试的是轻量级数据,之后我又用百万数据测试:

for循环竟然用了将近1分钟,apply直接死机,只有groupby仅用了0.08秒。

posted @   C羽言  阅读(26)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· AI技术革命,工作效率10个最佳AI工具
点击右上角即可分享
微信分享提示