根据关键词提取相关句子

# %% [markdown]
# Step1 提取出前1轮的关键词

# %%
import pandas as pd
head = pd.read_excel('前1轮相同电器.xlsx')
tail = pd.read_excel('后1轮相同电器.xlsx')

# %%
key1 = head.iloc[:, 2]

# %%
key1

# %%
# 删除字符串中的特定值
import re
list1 = []
for i in key1:
  num = re.sub(r'[燃气灶]|[、]', "", i)
  list1.append(num)

while '' in list1:
  list1.remove('')

list1

# %% [markdown]
# Step2 提取出后1轮的数据(不包含意图和关键词)

# %%
# 获取第4列到最后1列的数据
value1 = tail.iloc[:, 3:tail.shape[1] + 1]

# %%
value1

# %%
# 将dataframe转换为2维数据
import numpy as np
data_array = np.array(value1)
data_array.tolist()

# %%
# 删除空值
list2 = []
for i in data_array:
  for j in i:
    list2.append(j)

while np.nan in list2:
  list2.remove(np.nan)



# %%
list2

# %% [markdown]
# Step3 若句子中含有该关键词,则提取出该关键词

# %%
# 提取出包含关键词的句子
keywords = list1
for i in list2:
  if any(j in i for j in keywords):
      print(i)
posted @ 2022-09-07 10:07  tiansz  阅读(19)  评论(0编辑  收藏  举报