对相似的句子两两配对

from itertools import combinations
import pandas as pd

# %% 获取不同意图的双列表
f = open("same_sentence.txt", encoding="utf-8")

single_sentence_list = []
same_sentence_list = []
while True:
    line = f.readline()
    if line:
        line = line.replace("\n", "")
        if "----" in line:
            same_sentence_list.append(line.split("----"))
        else:
            single_sentence_list.append(line)
    else:
        break
f.close()

same_sentence_list.append(single_sentence_list)


# %% 对每个意图进行组合
combination_list = []
for sentence_list in same_sentence_list:
    temp = combinations(sentence_list, 2)
    temp_list = list(temp)
    combination_list.append(temp_list)


# %% 将组合转换为csv文件
csv_list = []
for i in combination_list:
    for j in i:
        csv_list.append(j)

pd.DataFrame(data=csv_list).to_excel("combination.xlsx", index=None)

posted @ 2023-03-06 11:28  tiansz  阅读(14)  评论(0编辑  收藏  举报