具体代码
from jieba import analyse
import pandas as pd
import numpy as np
# 读取csv文件内容
df = pd.read_csv("D:/12140/Desktops/111/222/test002.csv", encoding="utf-8") # 编码默认UTF-8,若乱码自行更改
data = df[['成果简介_y']]
data = data.fillna("0")
data_array = np.array(data.stack()) # 首先将pandas读取的数据转化为array
data_list = data_array.tolist() # 然后转化为list形式
#print(data_list)
# 存储分词后的关键词列表
data2_list = {}
for i in range(1, len(data_list)):
tfidf = analyse.extract_tags
if tfidf(data_list[i]) == None:
data2_list[i] = None
else:
data2_list[i] = tfidf(str(data_list[i]))
#print(tfidf(data_list[i]))
#print(str(data2_list[i]))
print(len(data2_list))
# 将关键字存储到关键词字段
df2 = pd.read_csv('D:/12140/Desktops/111/222/test002.csv',encoding='utf-8')
data2 = df2['关键词']
#print(data2)
print(len(data2))
list = []
for i in range(1, len(data2_list)):
result = ""
for j in range(1,len(data2_list[i])):
result += str(data2_list[i][j])
#print(result)
list.append(result)
#print(list)
print(len(list))
dd = pd.DataFrame(list)
dd.fillna("0")
df2['关键词'] = dd
print(df2['关键词'])
df2.to_csv("D:/12140/Desktops/111/222/test003.csv", index=False)
效果展示