#!/usr/bin/env python
# _*_ coding: utf-8 _*_
# @Time : 2017/4/18 15:22
# @Author : otfsenter
# @File : strip_extr.py
import pprint
import jieba
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import PIL
from wordcloud import WordCloud
def wordcloudplot(txt):
path = r'C:\Windows\Fonts\verdana.ttf'
path = unicode(path, 'utf8').encode('gb18030')
alice_mask = np.array(PIL.Image.open('she.jpg'))
wordcloud = WordCloud(font_path=path,
background_color='white',
margin=1, width=10, height=2, mask=alice_mask,
max_words=200, max_font_size=1000, random_state=42)
wordcloud = wordcloud.generate(txt)
wordcloud.to_file('she2.jpg')
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
def main():
a = []
# b = {}
f = open('nms.txt', 'r').read()
words = list(jieba.cut(f))
for index, word in enumerate(words):
if len(word) > 1:
a.append(word.encode('utf-8'))
# b.setdefault(word, []).append(index)
# for k in b:
# b[k] = len(b[k])
# dict1 = sorted(b.iteritems(),
# key=lambda d: d[1],
# reverse=True)
# print dict1
# for k in dict1:
# print list(k)[0], list(k)[1]
txt = ' '.join(a)
wordcloudplot(txt)
if __name__ == '__main__':
main()