python实现词云的具体步骤

1、python里面下载相关依赖的包--jieba

实现中文分词和词频计算:

import csv

import jieba

with open('E:\Data\Code\pythonProject\hlm.txt',encoding='utf-8') as fp:
    text = fp.read()

print(text)

ls = jieba.lcut(text)  # 执行jieba分词操作

print(ls)

# 统计词频
counts={}
for i in ls:
    if len(i)>1:
        counts[i]=counts.get(i,0)+1


# 词频排序
ls1=sorted(counts.items(),key=lambda x:x[1],reverse=True)

print(ls1[:20])

# 将词频信息存储到csv文件中
f = open('rrr.csv','w',encoding='utf-8')
csv_writer=csv.writer(f)
csv_writer.writerow(['词语','词频'])

length = len(ls1)
for i in range(length):
    csv_writer.writerow([ls1[i][0],ls1[i][1]])


2、使用代码生成词云

导入相关依赖:
numpy、pandas、wordcloud、matplotlib

import jieba
import numpy as np


# 1、首先进行分词
from PIL import Image
from matplotlib import pyplot as plt
from wordcloud import WordCloud

path = 'hlm.txt'

def tcg(texts):
    cut=jieba.cut(texts)
    string = ' '.join(cut)
    return string

text = open(path,'r',encoding='utf-8').read()
string = tcg(text)

# 2、绘图
img = Image.open('bear.jpg')
img_array = np.array(img) #将图片装换为数组

wc = WordCloud(
    background_color='white',
    width=1000,
    height=800,
    mask=img_array
)

wc.generate_from_text(string) # 绘制图片

plt.imshow(wc)
plt.axis('off')#隐藏坐标轴
plt.show()  #显示图片
wc.to_file('getIt.jpg')  #保存图片
posted @ 2023-09-21 22:06  yesyes1  阅读(67)  评论(0编辑  收藏  举报