【LeetCode1225. 报告系统状态的连续日期】[MySQL/PostgreSQL/Oracle/T-SQL]使用lag,lead得到连续段的:开始标志,结束标志,分组号,长度
题目地址
https://leetcode.cn/problems/report-contiguous-dates/description/
题目描述
A system is running one task every day. Every task is independent of the previous tasks. The tasks can fail or succeed.
Write a solution to report the period_state
for each continuous interval of days in the period from 2019-01-01
to 2019-12-31
.
period_state
is 'failed'
if tasks in this interval failed or 'succeeded'
if tasks in this interval succeeded. Interval of days are retrieved as start_date
and end_date.
Return the result table ordered by start_date
.
代码
MySQL
# Write your MySQL query statement below
with t1 as(
select "failed" as status, fail_date as task_date from Failed
union all
select "succeeded" as status, success_date as task_date from Succeeded
)
,
t2 as (
select * from t1
where task_date between date("2019-01-01") and date("2019-12-31")
order by task_date asc
)
,
t3 as (
select *,
lag(task_date, 1, task_date -interval 2 day) over(order by task_date) as lag_task_date,
lag(status, 1) over(order by task_date) as lag_status,
lead(task_date, 1, task_date + interval 2 day) over(order by task_date) as lead_task_date,
lead(status, 1) over(order by task_date) as lead_status
from t2
)
,
Segments as(
select *,
(case when datediff(task_date, lag_task_date) >1 or status<>lag_status then 1 else 0 end) as is_start ,
(case when datediff(lead_task_date, task_date)>1 or status<>lead_status then 1 else 0 end) as is_end
from t3
)
,
GroupsWithNumber AS (
SELECT *,
SUM(is_start) OVER (ORDER BY task_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS group_num
FROM Segments
)
,
comprehensive_statistic as(
SELECT *,
COUNT(task_date) OVER (PARTITION BY group_num) AS segment_length
FROM GroupsWithNumber
ORDER BY task_date
)
# select * from comprehensive_statistic
select period_state , start_date , end_date from
(select status as period_state, group_num, min(case when is_start then task_date end) as start_date,
max(case when is_end then task_date end) as end_date
from comprehensive_statistic
group by status,group_num) tmp1
order by start_date asc
PostgreSQL
pgsql用的是这个语法 interval '1 day'
with t1 as(
select 'failed' as status, fail_date as task_date from Failed
union all
select 'succeeded' as status, success_date as task_date from Succeeded
),
t2 as (
select * from t1
where task_date between '2019-01-01'::date and '2019-12-31'::date
order by task_date asc
),
t3 as (
select *,
lag(task_date, 1, task_date - interval '2 days') over(order by task_date) as lag_task_date,
lag(status, 1) over(order by task_date) as lag_status,
lead(task_date, 1, task_date + interval '2 days') over(order by task_date) as lead_task_date,
lead(status, 1) over(order by task_date) as lead_status
from t2
),
Segments as(
select *,
(case when task_date - lag_task_date > interval '1 day' or status <> lag_status then 1 else 0 end) as is_start,
(case when lead_task_date - task_date > interval '1 day' or status <> lead_status then 1 else 0 end) as is_end
from t3
),
GroupsWithNumber AS (
SELECT *,
SUM(is_start) OVER (ORDER BY task_date) AS group_num
FROM Segments
),
comprehensive_statistic as(
SELECT *,
COUNT(task_date) OVER (PARTITION BY group_num) AS segment_length
FROM GroupsWithNumber
ORDER BY task_date
)
select period_state, start_date, end_date from (
select status as period_state, group_num, min(case when is_start = 1 then task_date end) as start_date,
max(case when is_end = 1 then task_date end) as end_date
from comprehensive_statistic
group by status, group_num
) tmp1
order by start_date asc;
Oracle
/* Write your PL/SQL query statement below */
WITH t1 AS (
SELECT 'failed' AS status, fail_date AS task_date FROM Failed
UNION ALL
SELECT 'succeeded' AS status, success_date AS task_date FROM Succeeded
),
t2 AS (
SELECT * FROM t1
WHERE task_date BETWEEN DATE '2019-01-01' AND DATE '2019-12-31'
),
t3 AS (
SELECT t2.*,
LAG(task_date, 1, task_date - NUMTODSINTERVAL(2, 'DAY')) OVER (ORDER BY task_date) AS lag_task_date,
LAG(status, 1) OVER (ORDER BY task_date) AS lag_status,
LEAD(task_date, 1, task_date + NUMTODSINTERVAL(2, 'DAY')) OVER (ORDER BY task_date) AS lead_task_date,
LEAD(status, 1) OVER (ORDER BY task_date) AS lead_status
FROM t2
),
Segments AS (
SELECT t3.*,
CASE WHEN task_date - lag_task_date > 1 OR status != lag_status THEN 1 ELSE 0 END AS is_start,
CASE WHEN lead_task_date - task_date > 1 OR status != lead_status THEN 1 ELSE 0 END AS is_end
FROM t3
),
GroupsWithNumber AS (
SELECT Segments.*, SUM(is_start) OVER (ORDER BY task_date RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS group_num
FROM Segments
),
comprehensive_statistic AS (
SELECT GroupsWithNumber.*, COUNT(task_date) OVER (PARTITION BY group_num) AS segment_length
FROM GroupsWithNumber
)
SELECT period_state,
TO_CHAR(start_date, 'YYYY-MM-DD') as start_date,
TO_CHAR(end_date, 'YYYY-MM-DD') as end_date
FROM (
SELECT status AS period_state, group_num, MIN(CASE WHEN is_start = 1 THEN task_date END) AS start_date,
MAX(CASE WHEN is_end = 1 THEN task_date END) AS end_date
FROM comprehensive_statistic
GROUP BY status, group_num
) tmp1
ORDER BY start_date ASC;
T-SQL
/* Write your T-SQL query statement below */
WITH t1 AS (
SELECT 'failed' AS status, fail_date AS task_date FROM Failed
UNION ALL
SELECT 'succeeded' AS status, success_date AS task_date FROM Succeeded
),
t2 AS (
SELECT *
FROM t1
WHERE task_date BETWEEN '2019-01-01' AND '2019-12-31'
),
t3 AS (
SELECT
*,
LAG(task_date, 1, DATEADD(day, -2, task_date)) OVER(ORDER BY task_date) AS lag_task_date,
LAG(status, 1) OVER(ORDER BY task_date) AS lag_status,
LEAD(task_date, 1, DATEADD(day, 2, task_date)) OVER(ORDER BY task_date) AS lead_task_date,
LEAD(status, 1) OVER(ORDER BY task_date) AS lead_status
FROM t2
),
Segments AS (
SELECT *,
CASE
WHEN DATEDIFF(day, lag_task_date, task_date) > 1 OR status <> lag_status THEN 1
ELSE 0
END AS is_start,
CASE
WHEN DATEDIFF(day, task_date, lead_task_date) > 1 OR status <> lead_status THEN 1
ELSE 0
END AS is_end
FROM t3
),
GroupsWithNumber AS (
SELECT *,
SUM(is_start) OVER (ORDER BY task_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS group_num
FROM Segments
),
comprehensive_statistic AS (
SELECT *,
COUNT(task_date) OVER (PARTITION BY group_num) AS segment_length
FROM GroupsWithNumber
-- ORDER BY task_date -- In a CTE, ORDER BY is typically unnecessary and not allowed unless TOP is used
)
-- SELECT * FROM comprehensive_statistic -- This line is commented out, but you would remove the comment to execute this SELECT statement.
SELECT
period_state,
start_date,
end_date
FROM (
SELECT
status AS period_state,
group_num,
MIN(CASE WHEN is_start = 1 THEN task_date END) AS start_date,
MAX(CASE WHEN is_end = 1 THEN task_date END) AS end_date
FROM comprehensive_statistic
GROUP BY status, group_num
) AS tmp1
ORDER BY start_date ASC
Pandas
import pandas as pd
def report_contiguous_dates(failed: pd.DataFrame, succeeded: pd.DataFrame) -> pd.DataFrame:
# Concatenate the failed and succeeded dataframes with appropriate labels
failed['status'] = 'failed'
succeeded['status'] = 'succeeded'
t1 = pd.concat([
failed.rename(columns={'fail_date': 'task_date'}),
succeeded.rename(columns={'success_date': 'task_date'})
], ignore_index=True)
# Filter the rows for the year 2019
t1['task_date'] = pd.to_datetime(t1['task_date'])
t2 = t1[(t1['task_date'] >= '2019-01-01') & (t1['task_date'] <= '2019-12-31')].sort_values('task_date')
# Add lag and lead columns
t2['lag_task_date'] = t2['task_date'].shift(1)
t2['lag_status'] = t2['status'].shift(1)
t2['lead_task_date'] = t2['task_date'].shift(-1)
t2['lead_status'] = t2['status'].shift(-1)
# Define the start and end of segments
t2['is_start'] = ((t2['task_date'] - t2['lag_task_date']).dt.days > 1) | (t2['status'] != t2['lag_status'])
t2['is_end'] = ((t2['lead_task_date'] - t2['task_date']).dt.days > 1) | (t2['status'] != t2['lead_status'])
# Calculate the group number for each segment
t2['group_num'] = t2['is_start'].cumsum()
# Determine the start and end dates of each contiguous segment
segment_starts = t2[t2['is_start']].groupby(['status', 'group_num'])['task_date'].first().reset_index()
segment_ends = t2[t2['is_end']].groupby(['status', 'group_num'])['task_date'].last().reset_index()
# Merge the start and end dates based on status and group number
segments = pd.merge(segment_starts, segment_ends, on=['status', 'group_num'])
# Select and rename columns
final_report = segments[['status', 'task_date_x', 'task_date_y']].rename(columns={
'status': 'period_state',
'task_date_x': 'start_date',
'task_date_y': 'end_date'
})
# Sort by start_date
final_report.sort_values('start_date', inplace=True)
return final_report.reset_index(drop=True)