【LeetCode 2701. 连续递增交易】[MySQL 用户变量/Pandas]面向过程编程得到严格递增连续子序列的开始位置,结束位置,长度等统计信息
题目地址
https://leetcode.cn/problems/consecutive-transactions-with-increasing-amounts/
MySQL代码
# Write your MySQL query statement below
with t1 as(
select
*
# --------------------------只需要改动这里的逻辑,其他不要动。注意里面的语句是“顺序执行的”----------------------
-- 如果customer_id切换了
,(case when (@prev_customer_id is null or @prev_customer_id<>customer_id) then @interval_id := @interval_id +1 end) -- 这句看你需要,如果跨顾客需要interval_id额外间隔1,你应该保留这一句。这句注释或者不注释都能AC
,(case when @prev_customer_id is null or @prev_customer_id<>customer_id then @reg:=0 end)
,(case when @prev_customer_id is null or @prev_customer_id<>customer_id then @prev_amount:=0 end)
,(case when @prev_customer_id is null or @prev_customer_id<>customer_id then @consecutive_start:=transaction_date end)
,(case when @prev_customer_id is null or @prev_customer_id<>customer_id then @consecutive_end:=transaction_date end)
,(case when @prev_customer_id is null or @prev_customer_id<>customer_id then @prev_transaction_date:= transaction_date - interval 1 day end) -- 当前日的上一日,哨兵
, @prev_transaction_date as prev_transaction_date -- 把这个用户变量@prev_transaction_date存到列里。
-- 这是维护一个【严格递增连续子序列的长度】
,(case when amount>@prev_amount and @prev_transaction_date + interval 1 day =transaction_date then @reg:=@reg+1 else @reg:=1 end)
,@reg as reg -- 把这个用户变量@reg存到列里。
-- 更新一下consecutive_start
,(case when customer_id=@prev_customer_id and amount>@prev_amount and @prev_transaction_date + interval 1 day =transaction_date
then @consecutive_start:=@consecutive_start
else @consecutive_start:=transaction_date end) as consecutive_start
-- 不管什么情况,这么更新consecutive_end总是对的
,@consecutive_end:=transaction_date as consecutive_end
-- 更新一下interval_id
,(case when customer_id=@prev_customer_id and amount>@prev_amount and @prev_transaction_date + interval 1 day =transaction_date
then @interval_id:=@interval_id
else @interval_id:=@interval_id+1 end) as interval_id
-- 准备好处理下一行了,一些@prev_*用户变量更新一下
,@prev_amount:=amount
,@prev_customer_id:=customer_id
,@prev_transaction_date:=transaction_date
,@prev_amount:=amount
# --------------------------只需要改动这里的逻辑,其他不要动。注意里面的语句是“顺序执行的”----------------------
from
(select * from Transactions order by customer_id asc, transaction_date asc) sorted_ticks,
(select @reg:=0 -- 某用户每日购买量严格增加的天数(连续日)
,@prev_amount:=null, @prev_customer_id:=null, @prev_transaction_date:=null,@prev_amount:=null
,@consecutive_start:=null , @consecutive_end:=null,@interval_id := 0)vars
)
-- 对于每一个interval_id,找到reg最大的那一行,就是该区间最终的统计信息
select customer_id,consecutive_start,consecutive_end
from (select *, row_number() over(partition by interval_id order by reg desc) as rn from t1) tmp1
where rn=1 and reg>=3
order by customer_id,consecutive_start,consecutive_end
-- 下面注释掉的代码是用来调试的
# select
# customer_id,transaction_date,reg,amount,prev_transaction_date,interval_id
# ,consecutive_start,consecutive_end
# from t1
# order by customer_id asc, transaction_date asc
等效pandas代码
import pandas as pd
def consecutive_increasing_transactions(transactions: pd.DataFrame) -> pd.DataFrame:
# Ensure the DataFrame is sorted appropriately
transactions = transactions.sort_values(by=['customer_id', 'transaction_date'])
# Create the 'interval_id' and 'reg' columns
transactions['interval_id'] = 0
transactions['reg'] = 1
# Initialize variables to track the previous row values
prev_customer_id = None
prev_amount = None
prev_transaction_date = None
interval_id = 0
# Lists to hold the processed column data
consecutive_starts = []
consecutive_ends = []
regs = []
interval_ids = []
for index, row in transactions.iterrows():
customer_id = row['customer_id']
amount = row['amount']
transaction_date = row['transaction_date']
# Check if it's a new customer or a new increasing transaction sequence
if prev_customer_id != customer_id or amount <= prev_amount or \
prev_transaction_date + pd.Timedelta(days=1) != transaction_date:
interval_id += 1
reg = 1
consecutive_start = transaction_date
else:
reg += 1
# Append the new values to the lists
consecutive_starts.append(consecutive_start)
consecutive_ends.append(transaction_date)
regs.append(reg)
interval_ids.append(interval_id)
# Update the previous row values
prev_customer_id = customer_id
prev_amount = amount
prev_transaction_date = transaction_date
# Assign the processed lists to the DataFrame
transactions['consecutive_start'] = consecutive_starts
transactions['consecutive_end'] = consecutive_ends
transactions['reg'] = regs
transactions['interval_id'] = interval_ids
# Filter the DataFrame to only include sequences with a 'reg' of at least 3
transactions = transactions[transactions['reg'] >= 3]
# Group by 'interval_id' and take the row with the maximum 'reg' in each group
transactions = transactions.loc[transactions.groupby('interval_id')['reg'].idxmax()]
# Select the required columns and return
return transactions[['customer_id', 'consecutive_start', 'consecutive_end']].reset_index(drop=True)