# -*- coding:utf-8 -*-
# 1,一个四维的数组,以后两维度为单位,计算它们的和,比如一个1*2*3*4的数组,合并后两列,输出合并后的轴的sum
# 实例,输入:[[[[2 7 9 7] [6 6 8 2] [0 0 9 3]][[5 4 1 4][5 7 9 7] [8 4 1 4]] ]] 输出[[59 59]]
# import numpy as np
# a=np.array([[[[2,7,9,7],[6,6,8,2],[0,0,9,3]],[[5,4,1,4],[5,7,9,7],[8,4,1,4]]]])
# b=a.reshape(2,12)
# c=np.array([[sum(b[0]),sum(b[1])]])
#
# print(c)
# 2,在数组[1,2,3,4,5]中相邻两个数字中间插入两个0
# 输入:[1.2.3.4.5.] 输出[1,0,0,0,2,0,0,0,3,0,0,0,4,0,0,0,5]
# import numpy as np
# arr01=np.arange(1,6)
# arr02=np.zeros(17)
# arr02[::4]=arr01
# print(arr02)
# 3,二维矩阵与三维矩阵如何相乘?
# import numpy as np
# A=np.ones((3,3,2,))
# print(A)
# print('-'*50)
# B=np.zeros((3,3))
# B[::]=2
# print(B)
# print('-'*50)
#
# print(B.dot(A))
# 4,怎么交换矩阵的其中两行?比如交换第一二行
# import numpy as np
# arr01=np.arange(0,25).reshape(5,5)
#
# print(arr01)
# print('-'*50)
#
# arr01[[0]],arr01[[1]]=arr01[[1]],[[0]]
#
# print(arr01)
# 5,创建一个10*10的数组,并且边框是1,里面是0
# import numpy as np
# arr01=np.ones((10,10))
# arr01[1:9,1:9]=0
# print(arr01)
# 6,将数组s3中的A和dog替换成XXX;
# s3=pd.Series(['A','B','C','Aaba','Baca','',np.nan,'CABA','dog','cat'])
# import pandas as pd
# import numpy as np
# s3=pd.Series(np.array(['A','B','C','Aaba','Baca','',np.nan,'CABA','dog','cat']))
# s3[0]='XXX'
# s3[8]='XXX'
# print(s3)
# 7,pattern=r'[a-z][0-9]'
# s=pd.Series(['1','b2','3a','3b','c2c'])
# s.str.contains(pattern)
# 请在不运行代码的状态下,输出以上代码运行的结果.
# import pandas as pd
# import re
#
# pattern=r'[a-z][0-9]'
# s=pd.Series(['1','b2','3a','3b','c2c'])
# print(s.str.contains(pattern))
# 运行结果:
# 0 False
# 1 True
# 2 False
# 3 False
# 4 True
# dtype: bool
# 8,将附件中的数据导入DataFrame中,实现以下操作:
# 1,对异常数据(数值为0)进行值替换,替换为当前列的平均值;
# 2,对zwyx列的数据进行平均值统计,其他列做计数统计(提示使用value_counts());
# 3,得到zwmc字段的唯一值列表;
# 4,通过group函数,实现对于dd字段的分组,并按照城市计算每个城市的最大薪资,使用折线图,显示Top10城市(选做)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
ca=pd.read_csv('ca_list_copy(2).csv')
cas=ca.replace({0.0:ca['zwyx'].mean()})
print('zwyx:平均值:',ca['zwyx'].mean())
col=list(cas.columns)
col.remove('Id')
col.remove('zwyx')
new_df=pd.DataFrame()
for i in col:
result=cas[i].value_counts()
re=pd.DataFrame(result)
re.columns=['values']
new_df=pd.concat([new_df,re])
print(new_df)
only_value=list(cas['zwmc'].unique())
print('唯一值列表:%s'%only_value)
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
data=cas.groupby('dd')['zwyx'].max().sort_values(ascending=False).head(10)
data.index.name='地区'
print(data)
data.plot()
plt.show()