数据分析
1 #!/usr/bin/python 2 #-*- coding: utf-8 -*- 3 4 5 6 import os 7 import re 8 import csv 9 import time 10 import json 11 import jieba 12 from jieba import analyse 13 import pandas as pd 14 import itchat 15 import base64 16 from snownlp import SnowNLP 17 import requests 18 import sys 19 from collections import Counter 20 import matplotlib.pyplot as plt 21 from pylab import * 22 from faceApi import FaceAPI 23 mpl.rcParams['font.sans-serif'] = ['SimHei'] 24 from PIL import Image 25 import numpy as np 26 from wordcloud import WordCloud 27 from pyecharts import Pie, Map, Style, Page, Bar 28 29 def analyseSex(firends): 30 sexs = list(map(lambda x:x['Sex'],friends[1:])) 31 counts = Counter(sexs).items() 32 counts = sorted(counts, key=lambda x:x[0], reverse=False) 33 counts = list(map(lambda x:x[1],counts)) 34 labels = ['不明','男性','女性'] 35 colors = ['red','yellow','blue'] 36 plt.figure(figsize=(8,5), dpi=80) 37 plt.axes(aspect=1) 38 plt.pie(counts, 39 labels=labels, 40 colors=colors, 41 labeldistance = 1.1, 42 autopct = '%3.1f%%', 43 shadow = False, 44 startangle = 90, 45 pctdistance = 0.6 46 ) 47 plt.legend(loc='upper right',) 48 plt.title(u'%s的微信好友性别组成' % friends[0]['NickName']) 49 plt.show() 50 51 def analyseLocation(friends): 52 freqs = {} 53 headers = ['NickName','Province','City'] 54 with open('location.csv','w',encoding='utf-8',newline='',) as csvFile: 55 writer = csv.DictWriter(csvFile, headers) 56 writer.writeheader() 57 for friend in friends[1:]: 58 row = {} 59 row['NickName'] = friend['NickName'] 60 row['Province'] = friend['Province'] 61 row['City'] = friend['City'] 62 if(friend['Province']!=None): 63 if(friend['Province'] not in freqs): 64 freqs[friend['Province']] = 1 65 else: 66 freqs[friend['Province']] = 1 67 writer.writerow(row) 68 69 70 71 def analyseHeadImage(frineds): 72 # Init Path 73 basePath = os.path.abspath('.') 74 baseFolder = basePath + '\\HeadImages\\' 75 if(os.path.exists(baseFolder) == False): 76 os.makedirs(baseFolder) 77 78 # Analyse Images 79 faceApi = FaceAPI() 80 use_face = 0 81 not_use_face = 0 82 image_tags = '' 83 for index in range(1,len(friends)): 84 friend = friends[index] 85 # Save HeadImages 86 imgFile = baseFolder + '\\Image%s.jpg' % str(index) 87 imgData = itchat.get_head_img(userName = friend['UserName']) 88 if(os.path.exists(imgFile) == False): 89 with open(imgFile,'wb') as file: 90 file.write(imgData) 91 92 # Detect Faces 93 time.sleep(1) 94 result = faceApi.detectFace(imgFile) 95 if result == True: 96 use_face += 1 97 else: 98 not_use_face += 1 99 100 # Extract Tags 101 result = faceApi.extractTags(imgFile) 102 image_tags += ','.join(list(map(lambda x:x['tag_name'],result))) 103 104 labels = [u'使用人脸头像',u'不使用人脸头像'] 105 counts = [use_face,not_use_face] 106 colors = ['red','yellow'] 107 plt.figure(figsize=(8,5), dpi=80) 108 plt.axes(aspect=1) 109 plt.pie(counts, #性别统计结果 110 labels=labels, #性别展示标签 111 colors=colors, #饼图区域配色 112 labeldistance = 1.1, #标签距离圆点距离 113 autopct = '%3.1f%%', #饼图区域文本格式 114 shadow = False, #饼图是否显示阴影 115 startangle = 90, #饼图起始角度 116 pctdistance = 0.5 #饼图区域文本距离圆点距离 117 ) 118 plt.legend(loc='upper right',) 119 plt.title(u'%s的微信好友使用人脸头像情况' % friends[0]['NickName']) 120 plt.show() 121 122 image_tags = image_tags.encode('iso8859-1').decode('utf-8') 123 back_coloring = np.array(Image.open('face.jpg')) 124 wordcloud = WordCloud( 125 font_path='simfang.ttf', 126 background_color="white", 127 max_words=1200, 128 mask=back_coloring, 129 max_font_size=85, 130 random_state=75, 131 width=800, 132 height=480, 133 margin=15 134 ) 135 136 wordcloud.generate(image_tags) 137 plt.imshow(wordcloud) 138 plt.axis("off") 139 plt.show() 140 141 def analyseSignature(friends): 142 signatures = '' 143 emotions = [] 144 pattern = re.compile("1f\d.+") 145 print (pattern) 146 for friend in friends: 147 signature = friend['Signature'] 148 if(signature != None): 149 signature = signature.strip().replace('span', '').replace('class', '').replace('emoji', '') 150 signature = re.sub(r'1f(\d.+)','',signature) 151 if(len(signature)>0): 152 nlp = SnowNLP(signature) 153 emotions.append(nlp.sentiments) 154 signatures += ' '.join(jieba.analyse.extract_tags(signature,5)) 155 print(signatures) 156 with open('signatures.txt','wt',encoding='utf-8') as file: 157 file.write(signatures) 158 159 # Sinature WordCloud 160 back_coloring = np.array(Image.open('flower.jpg')) 161 wordcloud = WordCloud( 162 font_path='simfang.ttf', 163 background_color="white", 164 max_words=1200, 165 mask=back_coloring, 166 max_font_size=75, 167 random_state=45, 168 width=960, 169 height=720, 170 margin=15 171 ) 172 173 wordcloud.generate(signatures) 174 plt.imshow(wordcloud) 175 plt.axis("off") 176 plt.show() 177 wordcloud.to_file('signatures.jpg') 178 179 # Signature Emotional Judgment 180 count_good = len(list(filter(lambda x:x>0.66,emotions))) 181 count_normal = len(list(filter(lambda x:x>=0.33 and x<=0.66,emotions))) 182 count_bad = len(list(filter(lambda x:x<0.33,emotions))) 183 print(count_good * 100/len(emotions)) 184 print(count_normal * 100/len(emotions)) 185 print(count_bad * 100/len(emotions)) 186 print(count_good) 187 print(count_normal) 188 print(count_bad) 189 labels = [u'负面消极',u'中性',u'正面积极'] 190 values = (count_bad ,count_normal,count_good) 191 plt.rcParams['font.sans-serif'] = ['simHei'] 192 plt.rcParams['axes.unicode_minus'] = False 193 plt.xlabel(u'情感判断') 194 plt.ylabel(u'频数') 195 plt.xticks(range(3),labels) 196 plt.legend(loc='upper right',) 197 plt.bar(range(3), values, color = 'rgb') 198 plt.title(u'%s的微信好友签名信息情感分析' % friends[0]['NickName']) 199 plt.show() 200 201 def create_charts(): 202 users = itchat.get_friends() 203 page = Page() 204 style = Style(width=1100, height=600) 205 style_middle = Style(width=900, height=500) 206 data = prov_stats(users) 207 attr, value = data 208 chart = Map('中国地图', **style.init_style) 209 chart.add('', attr, value, is_label_show=True, is_visualmap=True, visual_text_color='#000') 210 page.add(chart) 211 page.render() 212 213 214 def prov_stats(users): 215 prv = pd.DataFrame(users) 216 prv_cnt = prv.groupby('Province', as_index=True)['Province'].count().sort_values() 217 attr = list(map(lambda x: x if x != '' else '未知', list(prv_cnt.index))) 218 return attr, list(prv_cnt) 219 220 # login wechat and extract friends 221 itchat.auto_login(hotReload = True) 222 friends = itchat.get_friends(update = True) 223 create_charts() 224 analyseSex(friends) 225 analyseSignature(friends) 226 analyseHeadImage(friends) 227 analyseLocation(friends)