python实现词云的具体步骤
1、python里面下载相关依赖的包--jieba
实现中文分词和词频计算:
import csv
import jieba
with open('E:\Data\Code\pythonProject\hlm.txt',encoding='utf-8') as fp:
text = fp.read()
print(text)
ls = jieba.lcut(text) # 执行jieba分词操作
print(ls)
# 统计词频
counts={}
for i in ls:
if len(i)>1:
counts[i]=counts.get(i,0)+1
# 词频排序
ls1=sorted(counts.items(),key=lambda x:x[1],reverse=True)
print(ls1[:20])
# 将词频信息存储到csv文件中
f = open('rrr.csv','w',encoding='utf-8')
csv_writer=csv.writer(f)
csv_writer.writerow(['词语','词频'])
length = len(ls1)
for i in range(length):
csv_writer.writerow([ls1[i][0],ls1[i][1]])
2、使用代码生成词云
导入相关依赖:
numpy、pandas、wordcloud、matplotlib
import jieba
import numpy as np
# 1、首先进行分词
from PIL import Image
from matplotlib import pyplot as plt
from wordcloud import WordCloud
path = 'hlm.txt'
def tcg(texts):
cut=jieba.cut(texts)
string = ' '.join(cut)
return string
text = open(path,'r',encoding='utf-8').read()
string = tcg(text)
# 2、绘图
img = Image.open('bear.jpg')
img_array = np.array(img) #将图片装换为数组
wc = WordCloud(
background_color='white',
width=1000,
height=800,
mask=img_array
)
wc.generate_from_text(string) # 绘制图片
plt.imshow(wc)
plt.axis('off')#隐藏坐标轴
plt.show() #显示图片
wc.to_file('getIt.jpg') #保存图片