【LeetCode2199. 找到每篇文章的主题】[MySQL group_concat/T-SQL coalesce/Oracle listagg/PostgreSQL STRING_AGG]按照group拼接字符串
题目地址
https://leetcode.cn/problems/finding-the-topic-of-each-post/description/
要点
- [MySQL group_concat/T-SQL coalesce/Oracle listagg/PostgreSQL STRING_AGG]按照group拼接字符串
- [MySQL LOCATE/T-SQL CHARINDEX/Oracle INSTR/PostgreSQL POSITION]判断某字符串是否出现在另一个字符串中
代码
MySQL
with t1 as(
select p.*, k.*
from Posts p
left join Keywords k
on LOCATE(LOWER(CONCAT(' ', word, ' ')), LOWER(CONCAT(' ', content, ' '))) >0
)
select post_id,
ifnull(group_concat(distinct topic_id order by topic_id asc separator "," ),"Ambiguous!") as topic
from t1
group by post_id
order by post_id asc
# select * from t1
PostgreSQL
WITH t1 AS (
SELECT p.*, k.*
FROM Posts p
LEFT JOIN Keywords k ON POSITION(LOWER(CONCAT(' ', word, ' ')) IN LOWER(CONCAT(' ', content, ' '))) > 0
)
SELECT post_id,
COALESCE(STRING_AGG(topic_id::TEXT, ',' ORDER BY topic_id), 'Ambiguous!') AS topic
FROM (
SELECT DISTINCT post_id, topic_id
FROM t1
) subquery
GROUP BY post_id
ORDER BY post_id ASC;
T-SQL
WITH t1 AS (
SELECT p.*, k.*
FROM Posts p
LEFT JOIN Keywords k ON CHARINDEX(' ' + LOWER(k.word) + ' ', ' ' + LOWER(p.content) + ' ') > 0
)
SELECT
post_id,
COALESCE(STUFF((SELECT ',' + CAST(topic_id AS VARCHAR(MAX))
FROM t1 t2
WHERE t1.post_id = t2.post_id
GROUP BY topic_id
ORDER BY topic_id ASC
FOR XML PATH('')), 1, 1, ''), 'Ambiguous!') AS topic
FROM t1
GROUP BY post_id
ORDER BY post_id ASC;
Oracle
WITH t1 AS (
SELECT p.*, k.*
FROM Posts p
LEFT JOIN Keywords k ON INSTR( (LOWER(' ' || content || ' ')), (LOWER(' ' || word || ' ')) ) > 0
)
SELECT
post_id,
COALESCE(LISTAGG(topic_id, ',') WITHIN GROUP (ORDER BY topic_id), 'Ambiguous!') AS topic
FROM (
SELECT DISTINCT
post_id,
topic_id
FROM t1
) t
GROUP BY post_id
ORDER BY post_id;
pandas
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
def find_topic(keywords, posts):
# Function to check if the keyword is in the post content
def check_keyword_in_content(row):
return ' '+row['word'].lower()+' ' in\
' '+row['content'].lower()+' '
# Merge posts and keywords with a left join on the condition
# using apply to check if the keyword is in the content
merged = posts.assign(key='_').merge(keywords.assign(key='_'), on='key', how='left')
merged = merged[merged.apply(check_keyword_in_content, axis=1)]
# Group by post_id and concatenate distinct topic_ids separated by commas
result = merged.groupby('post_id')['topic_id'].apply(lambda x: ','.join(sorted(set(x.dropna().astype(str)), key=lambda x:int(x))))
# print(f"result=\n{result}")
# Replace empty strings with "Ambiguous!"
result = result.reindex(posts['post_id']).fillna("Ambiguous!")
# Reset index to turn the Series into a DataFrame
result = result.reset_index()
# Rename the column to match the SQL output
result.columns = ['post_id', 'topic']
return result.sort_values('post_id').reset_index(drop=True)