python分析拉勾网职位信息-python职位信息

import pandas as pd
import matplotlib.pyplot as plt
import jieba,wordcloud
from PIL import Image
import numpy as np

#数据清洗
df = pd.read_excel(r'C:\Users\xxx\Desktop\out/lagou_1.xlsx')
df[['工资1','工资2']] = df['工资'].str.split('-',expand=True)
df['工资1'] = df['工资1'].str.replace('k','').str.replace('K','')
df['工资2'] = df['工资2'].str.replace('k','').str.replace('K','')
df['工资平均'] = (df['工资2'].astype(int)+df['工资1'].astype(int))/2
#print(df['工资平均'])
df_1 = df.groupby(by='经验').agg({'经验':['nunique']})
#print(df_1)
df['平均经验']=df['经验'].str.replace('1-3年','2').str.replace('10年以上','10').str.replace('1年以下','1')\
.str.replace('3-5年','4').str.replace('3-5年','4').str.replace('5-10年','8').str.replace('不限','0').str.replace('应届毕业生','0').astype(int)
#print(df['平均经验'])
del df['工资1']
del df['工资2']
#'\\n','\'',' '
df['职位描述'] = df['职位描述'].str.replace('[','').str.replace('n,','').str.replace('\'','').str.replace(' ','')\
.str.replace(']','').str.replace('n','').str.replace('\\','').str.replace(',','').str.replace('#','')
df['工作地点'] = df['工作地点'].str.replace('[','').str.replace('n,','').str.replace('\'','').str.replace(' ','')\
.str.replace(']','').str.replace('n','').str.replace('\\','').str.replace(',','').str.replace('#','').str.replace('-','')
print(df['工作地点'])
#df.to_excel(r'C:\Users\xxx\Desktop\out/lagou_2.xlsx')
#df = pd.read_excel(r'C:\Users\xxx\Desktop\out/lagou_2.xlsx')
print(df)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

#可视化展示-直方图
plt.hist(df['工资平均'], alpha=0.8, color='steelblue')
plt.xlabel('工资/千元')
plt.ylabel('频数')
plt.title("python平均工资直方图")
plt.savefig('lagou-gongzi.jpg') # 指定保存路径
plt.show()

plt.hist(df['平均经验'], alpha=0.8, color='steelblue')
plt.xlabel('经验/年')
plt.ylabel('频数')
plt.title("python平均经验直方图")
plt.savefig('lagou-jingyan.jpg') # 指定保存路径

#可视化展示-饼图
count = df['学历'].value_counts()
plt.pie(count, labels=count.keys(), shadow=True,autopct='%2.2f%%')
plt.savefig('lagou_xueli.jpg')
plt.show()

#可视化展示-词图云
pic = Image.open('壁纸.jpg')
mang_mask = np.array(pic)
strs = ''
for line in df['福利']:
strs += line
print(strs)
cut_strs = ' '.join(jieba.cut(strs))
word_cloud = wordcloud.WordCloud(font_path='/home/shen/Downloads/fonts/msyh.ttc',mask=mang_mask,background_color='white').generate(cut_strs)
word_cloud.to_file('lagou_wordcloud.jpg')
plt.imshow(word_cloud)
plt.show()


posted @ 2020-09-09 10:58  Merge_1126  阅读(123)  评论(0编辑  收藏  举报