numpy模块

numpy属性：

ndim--维度，shape--行数和列数，size--元素个数

import numpy as np



#numpy 数组的创建
a = np.array([2,3,4],dtype=np.int32)#一维数组的创建[2 3 4]
print(a)
b = np.array([[1,2,3],[2,3,4]])#多维数组的创建[[1 2 3] [2 3 4]]
print(b)
c = np.zeros((2,4))#创建全零数组[[0. 0. 0. 0.] [0. 0. 0. 0.]]
print(c)
d = np.ones((3,4),dtype=np.int)#创建全一数组，同时制定数据类型[[1 1 1 1] [1 1 1 1] [1 1 1 1]]
print(d)
e = np.empty((3,4))#创建全空数组，每个数接近于0
print(e)
f = np.arange(1,10,2)#1-10的数据，步长为2
print(f)

g = np.array([2,3,4],dtype=np.int32)
print(g.ndim)#1
print(g.shape)#(3,)
print(g.size)#3

#创建两个1维矩阵
a = np.array([1,2,3,4])
b = np.arange(4)
c1 = a-b#对应相减
c2 = a*b#对应相乘
c3 = a.dot(b)#对应相乘再求和
c4 = b**2#相应位置平方
c5 = np.sin(a)#求sin

a=np.random.random((2,4))#创建2*4的随机数矩阵
print(a)
print(np.sum(a))#矩阵中数的和
print(np.min(a))#矩阵中的最小值
print(np.mean(a))#矩阵中元素的平均值
print(np.sum(a,axis=0))#按列进行求和
print(np.sum(a,axis=1))#按行进行求和

#矩阵基本计算
A=np.arange(2,14).reshape((3,4))#创建一下3*4的矩阵2-13
print(A)
print(np.argmin(A))#最小值的索引
print(np.argmax(A))#最大值的索引
print(np.mean(A))#整个矩阵的均值
print(np.average(A))#平均数
print(A.mean())#平均数
print(np.median(A))#中位数
print(np.cumsum(A))#累加，每个位置的数是前面位置的和
print(np.diff(A))#累差运算，后一个元素减去前一个元素的值

A=np.arange(14,2,-1).reshape((3,4))
print(A)#[[14 13 12 11] [10 9 8 7] [6 5 4 3]]
print(np.sort(A))#对每行进行递增排序[[11 12 13 14] [7 8 9 10] [3 4 5 6]]
print(np.transpose(A))#矩阵转置
print(A.T)#矩阵转置
print(np.clip(A,5,9))#将元素取值范围规定到[5,9]区间，比5小变为5，比9大变为9，其余保持不变
                        # [[9 9 9 9] [9 9 8 7] [6 5 5 5]]

#索引和切片
A=np.arange(3,15)#[3 4 5 6 7 8 9 10 11 12 13 14]
B=A.reshape(3,4)#[[ 3  4  5  6] [7  8  9 10] [11 12 13 14]]
print(B[2])#第二行元素[11 12 13 14]
print(B[0][2])#第零行第3个元素5
print(B[1,1:3])#第一行的第1和第2位置的元素[8 9]
print(B[0:2,1:3])#第零行到第一行第1和第2位置元素[[4 5][8 9]]

#打印行
for row in B:
    print(row)#[3 4 5 6] [7 8 9 10] [11 12 13 14]
#打印列
for columns in B.T:
    print(columns)#[3 7 11][4 8 12][5 9 13][6 10 14]

#多维数组变成一维
C = np.arange(3, 15).reshape((3, 4))
print(C)#[[ 3  4  5  6][ 7  8  9 10][11 12 13 14]]
D=C.flatten()
print(D)#[ 3  4  5  6  7  8  9 10 11 12 13 14]

#数组的合并与拆分
A=np.arange(1,16).reshape(3,5)
print(A)
#分割
print(np.split(A,3,axis=0))#5为拆分的数组个数，axis为分割方向


B=np.array([1,1,1])
C=np.array([2,2,2])
print(np.vstack((B,C)))#上下合并[[1 1 1][2 2 2]]
print(np.hstack((B,C)))#左右合并[1 1 1 2 2 2]

D=np.array([[1,2,3,4,5],[6,7,8,9,10]])
print(D)#[[ 1  2  3  4  5] [ 6  7  8  9 10]]
E=D*2
print(E)#[[ 2  4  6  8 10] [12 14 16 18 20]]
print(np.dstack((D,E)))#深度合并[[[1  2] [2  4] [3  6] [4  8] [5 10]] [[6 12] [7 14] [8 16] [9 18] [10 20]]]

数组合并与拆分

#多个矩阵合并
A=np.array([1,1,1])
B=np.array([2,2,2])
A=A[:,np.newaxis]#数组转置[[1][1][1]]
B=B[:,np.newaxis]#数组转置[[2][2][2]]
#纵向合并
C1=np.concatenate((A,B),axis=0)#[[1] [1] [1] [2] [2] [2]]
#横向合并
C2=np.concatenate((A,B),axis=1)#[[1 2] [1 2] [1 2]]

#深度合并
D=np.array([[0,1,2,3,4,5],[6,7,8,9,10,11]])
# print(D)
E=D*2
arr_dstack=np.dstack((D,E))
# print(arr_dstack)#[[[ 0  0][ 1  2][ 2  4][ 3  6][ 4  8][ 5 10]]
                #[[ 6 12][ 7 14][ 8 16][ 9 18][10 20][11 22]]]
#深度拆分
F=np.dsplit(arr_dstack,2)
print(F)#[array([[[ 0],[ 1],[ 2], [ 3],[ 4],[ 5]],
       #[[ 6],[ 7],[ 8],[ 9],[10], [11]]]),
        # array([[[ 0],[ 2],[ 4],[ 6],[ 8],[10]],
       #[[12],[14],[16],[18],[20],[22]]])]

copy()

#=号赋值的关联性
a = np.arange(4)
b = a
a[0] = 11 #a中元素发生改变，b中的元素同样改变
print(a)#[11 1 2 3]
print(b)#[11 1 2 3]
b[1:3]=[12,13]#b中元素发生变化，a中的元素同样变化
print(a)#[11 12 13 3]
print(b)#[11 12 13 3]


#copy()赋值没有关联性
c=np.arange(4)
d=c.copy()
c[3]=44
print(c)#[0 1 2 44]
print(d)#[0 1 2 3]

广播机制：

当数组跟一个标量进行计算时，标量需要根据数组的形状进行扩展，然后执行运算。该扩展过程称为“广播”

#广播机制：
a=np.array([[0,0,0],[10,10,10],[20,20,20],[30,30,30]])
b=np.array([0,1,2])
print(a+b)#a,b的维度不一样，在进行计算时会将b广播后计算

#相当于将b重复多次
b=np.tile([0,1,2],(4,1))#b行重复4次，列重复1次
print(a+b)

常用API：

bincount():计算索引出现次数

 #bincount()
x=np.array([1,2,3,3,0,1,4,0])
# print(np.bincount(x))#[2 2 1 2 1]
#索引0出现2次，索引1出现2次，索引2出现1次，索引3出现2次，索引4出现1次，输出[2 2 1 2 1]
#x中最大数为4，那么bin中统计的索引只能为0-4的5个数字，统计他们在x中出现的次数

w=np.array([0.3,0.5,0.7,0.6,0.1,-0.9,1,0.2])
print(np.bincount(x,weights=w))#增加一个权重，结果为次数与权重相乘后的结果
                               #[ 0.3 -0.6  0.5  1.3  1. ]

print(np.bincount(x,weights=w,minlength=7))#bincount中元素为5个，当设置长度为7时
                                    #后面两位设为0#[ 0.3 -0.6  0.5  1.3  1.   0.   0. ]

argmax():返回最大元素中的位置

#argmax()
x=[[1,3,3],[7,5,2]]
print(x)
print(np.argmax(x))#返回最大元素中的索引位置
print(np.argmax(x,axis=0))#按列操作返回最大元素的索引位置[1 1 0]
print(np.argmax(x,axis=1))#按行操作返回最大元素的索引位置，同为最大值返回第一个位置索引[1 0]

around():近似取整

floor():向下取整

ceil():向上取整

#decimals指定保留有效数的位数
print(np.around([-0.6,1.274,2.35,9.67,13],decimals=0))#[-1.  1.  2. 10. 13.]
print(np.around([1.2798,2.357,9.67,13],decimals=1))#[ 1.3  2.4  9.7 13. ]
print(np.around([1.2798,2.357,6.67,13],decimals=2))#[ 1.28  2.36  6.67 13. ]
print(np.around([1,2,5,6,56],decimals=-1))#-1表示看最后一位数决定是否进位，大于5进位，小于5舍去
                                        #[ 0  0  0 10 60]

print(np.around([1,2,5,6,56],decimals=-2))#-2表示看最后两位数决定是否进位，大于50进位，小于舍去
                                        #[  0   0   0   0 100]

#向下取整
print(np.floor([-0.6,-1.4,-0.1,-1.8,0,1.4,1.7]))#[-1. -2. -1. -2.  0.  1.  1.]

#向上取整
print(np.ceil([-0.6,-1.4,-0.1,-1.8,0,1.4,1.7]))#[-0. -1. -0. -1.  0.  2.  2.]

numpy.flatnonzero():

　　该函数输入一个矩阵，返回扁平化后矩阵中非零元素的位置（index）

这是官方文档给出的用法，非常正规，输入一个矩阵，返回了其中非零元素的位置.

1 >>> x = np.arange(-2, 3)
2 >>> x
3 array([-2, -1,  0,  1,  2])
4 >>> np.flatnonzero(x)
5 array([0, 1, 3, 4])
import numpy as np
d = np.array([1,2,3,4,4,3,5,3,6])
haa = np.flatnonzero(d == 3)
print (haa)

[2 5 7]

对向量元素的判断d==3返回了一个和向量等长的由0/1组成的矩阵，然后调用函数，返回的位置，就是对应要找的元素的位置。

# Visualize some examples from the dataset.
# We show a few examples of training images from each class.
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] #类别列表
num_classes = len(classes) #类别数目
samples_per_class = 7 # 每个类别采样个数
for y, cls in enumerate(classes): # 对列表的元素位置和元素进行循环，y表示元素位置（0,num_class），cls元素本身'plane'等
    idxs = np.flatnonzero(y_train == y) #找出标签中y类的位置
    idxs = np.random.choice(idxs, samples_per_class, replace=False) #从中选出我们所需的7个样本
    for i, idx in enumerate(idxs): #对所选的样本的位置和样本所对应的图片在训练集中的位置进行循环
        plt_idx = i * num_classes + y + 1 # 在子图中所占位置的计算
        plt.subplot(samples_per_class, num_classes, plt_idx) # 说明要画的子图的编号
        plt.imshow(X_train[idx].astype('uint8')) # 画图
        plt.axis('off')
        if i == 0:
            plt.title(cls) # 写上标题，也就是类别名
plt.show() # 显示

np.random.choice()

# 参数意思分别是从a 中以概率P，随机选择3个, p没有指定的时候相当于是一致的分布 a1 = np.random.choice(a=5, size=3, replace=False, p=None) print(a1) # 非一致的分布，会以多少的概率提出来 a2 = np.random.choice(a=5, size=3, replace=False, p=[0.2, 0.1, 0.3, 0.4, 0.0]) print(a2) # replacement 代表的意思是抽样之后还放不放回去，如果是False的话，那么出来的三个数都不一样，如果是 True的话，有可能会出现重复的，因为前面的抽的放回去了。

np.reshape(a,shape(x,-1))#-1相当于所有的元素个数的和除以reshape的第一个元素的值

np.mean() 函数定义：
numpy.mean(a, axis, dtype, out，keepdims )

mean()函数功能：求取均值
经常操作的参数为axis，以m * n矩阵举例：

axis 不设置值，对 m*n 个数求均值，返回一个实数

axis = 0：压缩行，对各列求均值，返回 1* n 矩阵

axis =1 ：压缩列，对各行求均值，返回 m *1 矩阵

posted on 2019-06-01 12:41 Manuel 阅读(144) 评论(0) 编辑收藏举报