【LeetCode1651. Hopper 公司查询 III 】over(ORDER BY str_to_date(`month`, '%Y-%m') rows between CURRENT ROW AND 2 following)实现【当月和接下来2个月】滑动窗口
题目地址
https://leetcode.cn/problems/hopper-company-queries-iii/description/
MySQL代码
-- CTE生成2020年每月的最后一天
WITH RECURSIVE months AS (
SELECT LAST_DAY('2019-12-01') AS month_end -- 初始日期的前一个月的最后一天
UNION ALL
SELECT LAST_DAY(month_end + INTERVAL 1 DAY) -- 下一个月的最后一天
FROM months
WHERE month_end < '2020-12-01' -- 递归结束条件
)
,
months2020 as (
SELECT *, DATE_FORMAT(month_end, '%Y-%m') AS `month` -- 格式化日期,只保留年月
FROM months
where DATE_FORMAT(month_end, '%Y')="2020"
)
,
t1 as(
SELECT months2020.`month`
, ifnull(sum( ar.ride_distance) , 0 ) as ride_distance
, ifnull(sum( ar.ride_duration) ,0) as ride_duration
from months2020
left join Rides r
on months2020.`month`=DATE_FORMAT(r.requested_at,"%Y-%m")
left join AcceptedRides ar
on ar.ride_id=r.ride_id
group by months2020.`month`
)
SELECT * from (
SELECT
MONTH(str_to_date(`month`, '%Y-%m')) AS `month`,
round(AVG(ride_distance) OVER (
ORDER BY str_to_date(`month`, '%Y-%m')
ROWS BETWEEN CURRENT ROW AND 2 following
),2) AS average_ride_distance,
round(AVG(ride_duration) OVER (
ORDER BY str_to_date(`month`, '%Y-%m')
ROWS BETWEEN CURRENT ROW AND 2 following
),2) AS average_ride_duration
FROM
t1
) result_to_be_truncated
where `month`<=10
等效pandas代码
import pandas as pd
import numpy as np
def hopper_company_queries(drivers: pd.DataFrame, rides: pd.DataFrame, accepted_rides: pd.DataFrame) -> pd.DataFrame:
# Generate the last day of each month in 2020
month_ends = pd.date_range(start='2020-01-31', end='2020-12-31', freq='M')
months2020 = pd.DataFrame({'month_end': month_ends})
months2020['month'] = months2020['month_end'].dt.to_period('M').astype(str)
# Ensure the 'requested_at' in rides is a datetime
rides['requested_at'] = pd.to_datetime(rides['requested_at'])
rides['month'] = rides['requested_at'].dt.to_period('M').astype(str)
# Merge the months2020 DataFrame with rides and accepted_rides DataFrames
merged = pd.merge(months2020, rides, how='left', on='month')
merged = pd.merge(merged, accepted_rides, how='left', on='ride_id')
# Group by month and calculate the sum of ride_distance and ride_duration
grouped = merged.groupby('month').agg({
'ride_distance': 'sum',
'ride_duration': 'sum'
}).fillna(0).reset_index()
# Calculate the 3-month moving average for ride_distance and ride_duration
# Shift the result by 2 to align with the SQL output (current row + 2 following rows)
grouped['average_ride_distance'] = grouped['ride_distance'].rolling(window=3, min_periods=1).mean().round(2).shift(-2)
grouped['average_ride_duration'] = grouped['ride_duration'].rolling(window=3, min_periods=1).mean().round(2).shift(-2)
# Filter the DataFrame to only include months up to October
# Extract the month number from the 'month' column before filtering to get the correct month numbers
grouped['month_num'] = grouped['month'].apply(lambda x: int(x.split('-')[1]))
grouped = grouped[grouped['month_num'] <= 10]
# Select the required columns and rename them as in the expected output
result_to_be_truncated = grouped[['month_num', 'average_ride_distance', 'average_ride_duration']]
result_to_be_truncated.columns = ['month', 'average_ride_distance', 'average_ride_duration']
# Drop the rows with NaN values that are a result of the shift operation
result_to_be_truncated = result_to_be_truncated.dropna()
return result_to_be_truncated