python 爬取微信好友列表和个性签名,绘制个性签名云图
python爬取微信好友列表和个性签名,绘制个性签名云图
1. 简要介绍
本次实验主要用到下面几个库 :
1)itchat---用于微信接口,实现生成QR码,用于微信扫描登陆
2)re(正则化)---由于微信好友个性签名含有中英文,本次只提取中文,需要使用re模块去除其他无关字符
3)wordcloud(云图)---使用该模块生成中文云图
4)jieba(中文分词)--- 号称最好的中文分词工具
2. 安装库
1 pip install re 2 pip install jieba 3 pip install itchat 4 pip install wordcloud
3. 实验代码
1 #!/usr/bin/python3 2 # -*- coding: utf-8 -*- 3 # @Time : 2018/1/19 14:37 4 # @Author : Z.C.Wang 5 # @Email : 6 # @File : spider_wechat.py 7 # @Software: PyCharm Community Edition 8 """ 9 Description : 10 """ 11 import re 12 import jieba 13 import itchat 14 from pandas import DataFrame 15 import matplotlib.pyplot as plt 16 from wordcloud import WordCloud, ImageColorGenerator 17 import numpy as np 18 import PIL.Image as Image 19 import pickle 20 21 def get_var(var): 22 variable = [] 23 for i in friends: 24 value = i[var] 25 variable.append(value) 26 return variable 27 28 def list2str(wordlist): 29 string = ' ' 30 for word in wordlist: 31 string = string + ' ' + word 32 return string 33 34 if __name__ == '__main__': 35 itchat.login() 36 friends = itchat.get_friends(update=True) 37 male = female = other = 0 38 for i in friends[1:]: 39 sex = i['Sex'] 40 if sex == 1: male += 1 41 elif sex == 2: female += 1 42 else: other += 1 43 total = len(friends[1:]) 44 # print('男性好友:%.2f%%' % float(male/total*100)) 45 # print('女性好友:%.2f%%' % float(female/total*100)) 46 # print('不明性别好友:%.2f%%' % float(other/total*100)) 47 Nickname = get_var('NickName') 48 Sex = get_var('Sex') 49 Province = get_var('Province') 50 print(Province) 51 City = get_var('City') 52 Signature = get_var('Signature') 53 data = {'Nickname': Nickname, 'Sex': Sex, 'Province': Province, 54 'City': City, 'Signature': Signature} 55 pickle.dump(data, open('data.txt', 'wb')) 56 frame = DataFrame(data) 57 frame.to_csv('info.csv', index=True, encoding='utf-8-sig') 58 59 siglist = [] 60 for i in friends: 61 signature = i['Signature'].strip().replace('spam', '').replace('class', '').replace('emoji', '') 62 # rep = re.compile('1f\d+\w*|[<>/=]') 63 rep = re.compile("[^\u4e00-\u9fa5^]") 64 signature = rep.sub('', signature) 65 siglist.append(signature) 66 text = ''.join(siglist) 67 wordlist = jieba.cut(text, cut_all=True) 68 wordlist = list(wordlist) 69 String = list2str(wordlist) 70 71 coloring = np.array(Image.open('alice.png')) 72 my_wordcloud = WordCloud(background_color='white', max_words=2000, 73 mask=coloring, max_font_size=55, random_state=42, 74 scale=2, font_path=r'C:\Windows\Fonts\simhei.ttf').generate(String) 75 image_colors = ImageColorGenerator(coloring) 76 plt.imshow(my_wordcloud.recolor(color_func=image_colors)) 77 plt.imshow(my_wordcloud) 78 plt.axis('off') 79 plt.show()
4. 实验结果