数据分析4-numpy
1、Numpy创建数组 np.array([])
persontype = np.dtype({ 'names':['name','age','chinese','math','english'], 'formats':['S32','i','i','i','f']}) peoples = np.array([("ZhangFei",32,75,100, 90),("GuanYu",24,85,96,88.5), ("ZhaoYun",28,85,92,96.5),("HuangZhong",29,65,85,100)], dtype=persontype) ages = peoples[:]['age'] chineses = peoples[:]['chinese'] maths = peoples[:]['math'] englishs =peoples[:]['english'] print(np.mean(ages)) print(np.mean(chineses)) print(np.mean(maths)) print(np.mean(englishs)) # 定义numpy数据结构
2、连续数组创建
x1 = np.arange(1,11,2) x2=np.linspace(1,9,5,dtype=int) #默认输出是浮点数,起始值,终值,个数 print(x1,x2)
运行结果
[1 3 5 7 9] [1 3 5 7 9]
3、算术运算
# 还可以进行加减运算,求平方,求余,n次方等 print(np.add(x1,x2)) print(np.subtract(x1,x2)) print(np.multiply(x1,x2)) print(np.divide(x1,x2)) print(np.power(x1,x2)) print(np.remainder(x1,x2)) print(np.mod(x1,x2))
运行结果
[ 2 6 10 14 18] [0 0 0 0 0] [ 1 9 25 49 81] [1. 1. 1. 1. 1.] [ 1 27 3125 823543 387420489] [0 0 0 0 0] [0 0 0 0 0]
4、统计函数
#计算数组/句矩阵中最大最小值函数 a = np.array([[1,2,3],[4,5,6],[7,8,9]]) print(np.amin(a)) print(np.amin(a,0)) #沿着axis=0轴的最小值 print(np.amin(a,1)) #沿着axis=1轴的最小值 print(np.amax(a)) print(np.amax(a,0)) print(np.amax(a,1))
运算结果
1 [1 2 3] [1 4 7] 9 [7 8 9] [3 6 9]
#统计最大最小值之差ptp print(np.ptp(a)) print(np.ptp(a,0)) print(np.ptp(a,1))
运行结果
8 [6 6 6] [2 2 2]
#统计数值中的百分位数percentile() print(np.percentile(a,100)) #9-1=8 再分成100份,每一份为0.08,y=0.08x+1 print(np.percentile(a,1,axis=0)) #a=array([1,4,7],[2,5,8],[3,6,9]),同理,以每个数组为单位单独计算重新构成新数组 print(np.percentile(a,50,axis=1))
运行结果
9.0 [1.06 2.06 3.06] [2. 5. 8.]
#统计数组中的中位数median(),平均数mean() print(np.median(a)) print(np.median(a,axis=0)) print(np.median(a,axis=1)) print(np.mean(a)) print(np.mean(a,axis=0)) print(np.mean(a,axis=1))
#运行结果
5.0 [4. 5. 6.] [2. 5. 8.] 5.0 [4. 5. 6.] [2. 5. 8.]
#统计数组中的加权平均数average() b=np.array([1,2,3,4]) wet=[1,2,3,4] print(np.average(b)) print(np.average(b,weights=wet)) #运行结果 2.5 3.0
#统计数组中的标准差std和方差var print(np.std(b)) print(np.var(b)) # 运行结果 1.118033988749895 1.25
#排序 sort c=np.array([[4,2,3],[5,6,1]]) print(np.sort(c)) print(np.sort(c,axis=0,kind='mergesort'))#kind=quicksort,mergesort,headsort分别表示快速排序,合并排序,推排序 #运行结果 [[2 3 4] [1 5 6]] [[4 2 1] [5 6 3]]