Numpy数组是一个多维数组,称为ndarray。有两个部分组成:
·实际的数据
·描述这些数据的元数据
>>> import numpy as np
>>> ar = np.array([1,2,3,4,5])
>>> print(ar)
[1 2 3 4 5]
>>> print(ar.ndim) # 输出数组维度的个数(轴数或秩)
1
>>> print(ar.shape) # 输出维度数,n行m列数组,shape为(n,m)
(5,)
>>> print(ar.size) # 输出元素的个数,n行m列的数组,元素总个数为n*m
5
>>> print(ar.dtype) # 数组元素的类型
int32
>>> print(ar.itemsize) # 每个元素的字节大小,int32为4,float64为8
4
>>> print(ar.data) # 包含实际数组元素的缓冲区
<memory at 0x000001A4FE5C4588>
>>> ar
array([1, 2, 3, 4, 5])
>>> # 创建数组:array()函数,可以是列表、元组、数组、生成器等
>>> ar1 = np.array(range(10))
>>> ar1
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> ar2 = np.array([1,2,3,4])
>>> ar2
array([1, 2, 3, 4])
>>> ar3 = np.array([1.1,2.1,3,4]) # 元素都是浮点型
>>> ar3
array([1.1, 2.1, 3. , 4. ])
>>> ar4 = np.array([[1,2,3],('a','b','c')])
>>> ar4
array([['1', '2', '3'],
['a', 'b', 'c']], dtype='<U11')
>>> ar5 = np.array([[1,2,3],('a','b','c','d')]) # 嵌套的数量不一致
>>> ar5
array([list([1, 2, 3]), ('a', 'b', 'c', 'd')], dtype=object)
>>> a1 = np.array([1,2,3])
>>> a2 = np.array([4,5,6])
>>> a3 = np.array([a1,a2])
>>> a3
array([[1, 2, 3],
[4, 5, 6]])
# 创建数组 arange()函数
>>> print(np.arange(10))
[0 1 2 3 4 5 6 7 8 9]
>>> print(np.arange(10.0)) # 元素都是浮点型
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
>>> print(np.arange(2,5))
[2 3 4]
>>> print(np.arange(2,5,2))
[2 4]
>>> ar = np.arange(16).reshape(4,4)
>>> ar
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
>>> # 创建数组:linspace()返回指定上计算的num个均匀间隔的样本
>>> ar1 = np.linspace(2.0,3.0,num=5) # 区间内均匀取5个元素,默认num为50
>>> ar1
array([2. , 2.25, 2.5 , 2.75, 3. ])
>>> ar2 = np.linspace(2.0,3.0,num=5,endpoint=False) # endpoint=False表示右端点值不取
>>> print(ar2)
[2. 2.2 2.4 2.6 2.8]
>>> ar3 = np.linspace(2.0,3.0,num=5,retstep=True) # retstep=True表示返回步骤
>>> print(ar3) # 返回array和步长
(array([2. , 2.25, 2.5 , 2.75, 3. ]), 0.25)
# 创建数组 zeros()/zeros_like()/ones()/ones_like()
# numpy.zeros(shape,dtype=float,order=’C’)
# shape:数组维度数,二维以上需用(),参数为整数
# dtype:数据类型,默认numpy.float64
# order:是否在存储器中以C或Fortran连续(按行或列方式)存储多维数据
>>> ar1 = np.zeros(5)
>>> ar1
array([0., 0., 0., 0., 0.])
>>> ar2 = np.zeros((2,2),dtype = np.int)
>>> ar2
array([[0, 0],
[0, 0]])
>>> ar3 = np.array([[1,2,3],[4,5,6]])
# zeros_like() 返回与指定数组形状类型相同的零数组
>>> ar3
array([[1, 2, 3],
[4, 5, 6]])
>>> ar4 = np.zeros_like(ar3)
>>> ar4
array([[0, 0, 0],
[0, 0, 0]])
# 创建单位矩阵
>>> ar1 = np.eye(5)
>>> ar1
array([[1., 0., 0., 0., 0.],
[0., 1., 0., 0., 0.],
[0., 0., 1., 0., 0.],
[0., 0., 0., 1., 0.],
[0., 0., 0., 0., 1.]])
# 数组切片
>>> ar = np.arange(16).reshape(4,4)
>>> ar
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
>>> ar[2] # 截取第三行
array([ 8, 9, 10, 11])
>>> ar[2][1] # 截取第三行第二个
9
>>> ar[2,1] # 截取第三行第二个
9
>>> ar[1:3] # 截取第二到三行
array([[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> ar[:2,1:] # 截取1、2行,2、3、4列的二维数组
array([[1, 2, 3],
[5, 6, 7]])
以下内容使用jupyter_notebook
[In]:
# 生成随机数
import numpy as np
samples = np.random.normal(size=(4,4)) # 生成标准正态分布的4*4的样本值(二维数组)
print(samples)
[Out]:
[[ 0.873187 -0.79028849 -1.34830254 0.20502978]
[ 0.173149 2.27029103 -0.17089641 -1.58288984]
[ 1.284522 -1.50668781 -0.16048419 -1.41927718]
[ 0.41458879 0.43483858 -0.51511941 -0.02804412]]
[In]:
import numpy as np
samples = np.random.normal(size=(1000)) # 生成一维随机数组
import matplotlib.pyplot as plt
plt.hist(samples) # 绘制直方图
[Out]:
(array([ 2., 2., 36., 112., 218., 276., 224., 105., 20., 5.]),
array([-3.94221935, -3.21586594, -2.48951252, -1.76315911, -1.0368057 ,
-0.31045229, 0.41590113, 1.14225454, 1.86860795, 2.59496137,
3.32131478]),
<a list of 10 Patch objects>)
[In]:
import numpy as np
samples = np.random.normal(size=(1000))
import matplotlib.pyplot as plt
plt.hist(samples,bins = 50) # 绘制直方图,指定50个阶
[Out]:
(array([ 1., 1., 1., 0., 2., 2., 2., 2., 5., 9., 17., 8., 5.,
25., 17., 26., 34., 35., 34., 50., 42., 45., 50., 39., 63., 47.,
50., 42., 41., 44., 33., 39., 33., 36., 29., 19., 15., 13., 10.,
8., 5., 4., 3., 5., 5., 2., 0., 0., 1., 1.]),
array([-3.10211851, -2.97671477, -2.85131103, -2.72590729, -2.60050355,
-2.4750998 , -2.34969606, -2.22429232, -2.09888858, -1.97348484,
-1.8480811 , -1.72267735, -1.59727361, -1.47186987, -1.34646613,
-1.22106239, -1.09565864, -0.9702549 , -0.84485116, -0.71944742,
-0.59404368, -0.46863993, -0.34323619, -0.21783245, -0.09242871,
0.03297503, 0.15837878, 0.28378252, 0.40918626, 0.53459 ,
0.65999374, 0.78539748, 0.91080123, 1.03620497, 1.16160871,
1.28701245, 1.41241619, 1.53781994, 1.66322368, 1.78862742,
1.91403116, 2.0394349 , 2.16483865, 2.29024239, 2.41564613,
2.54104987, 2.66645361, 2.79185736, 2.9172611 , 3.04266484,
3.16806858]),
<a list of 50 Patch objects>)
[In]:
# numpy.random.rand(d0,d1,……,dn)生成一个[0,1)的随机浮点数或n维随机数组(均匀分布)
a1 = np.random.rand() # 生成随机浮点数
print(a1,type(a1))
[Out]:
0.600512724429105 <class 'float'>
[In]:
a2 = np.random.rand(4) # 生成形状为4的一维数组
print(a2,type(a2))
[Out]:
[0.91518171 0.09333734 0.4020652 0.83582766] <class 'numpy.ndarray'>
[In]:
a3 = np.random.rand(2,3) # 生成形状为2*3的二维数组
print(a3,type(a3))
[Out]:
[[0.72906892 0.32699937 0.86482124]
[0.11489558 0.22683691 0.64697615]] <class 'numpy.ndarray'>
[In]:
sample1 = np.random.rand(1000) # 生成形状为1000的一维数组
sample2 = np.random.rand(1000)
plt.scatter(sample1,sample2)
[Out]:
[In]:
# numpy.random.randn(d0,d1,……,dn)生成一个随机浮点数或n维随机数组(正态分布)
sample1 = np.random.randn(1000)
sample2 = np.random.randn(1000)
plt.scatter(sample1,sample2)
[Out]:
[In]:
# numpy.random.randint(low,high=None,size=None,dtype='1')生成一个整数或n维整数数组
# high不为None时,取[low,high)之间的随机数,否则取值[0,low)之间的随机整数
# dtype参数只能是int类型
a1 = np.random.randint(2) #low=2,生成一个[0,2)之间的随机整数
print(a1)
[Out]:
[0 0 0 1 0]
[In]:
a3 = np.random.randint(2,8,size=5) #low=2,high=8,size=5,生成5个[2,8)之间的随机整数
print(a3)
[Out]:
[7 4 4 5 2]
[In]:
#low=2,size=(2,3)生成一个2*3的整数数组,取值[0,2)随机整数
a4 = np.random.randint(2,size=(2,3))
print(a4)
[Out]:
[[1 0 0]
[1 1 0]]
[In]:
#low=2,high=8,size=(2,3)生成一个2*3的整数数组,取值[2,8)随机整数
a5 = np.random.randint(2,8,size=(2,3)) print(a5)
[[4 2 2]
[3 3 6]]
[In]:
#随机数种子
rng = np.random.RandomState(1) # 随机种子,种子确定不变的随机数
xtrain = 10 * rng.rand(30)
ytrain = 8 + 4 * xtrain + rng.rand(30) # 样本关系y=8+4x,加个随机数产生浮动
fig = plt.figure(figsize = (12,3))
ax1 = fig.add_subplot(1,2,1)
plt.scatter(xtrain,ytrain,marker = '.',color = 'k')
plt.grid()
plt.title('样本数据散点图')
[Out]:
[In]:
# 通用函数
# .T方法:转置
ar1 = np.arange(10) # 生成0-9的一维数组
ar2 = np.ones((5,2)) # 生成5行2列的全1矩阵
print(ar1,'\n',ar1.T) # 一维数组转置之后不变
print(ar2,'\n',ar2.T) # 二维数组转置之后行列互换
[Out]:
[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]
[[1. 1.]
[1. 1.]
[1. 1.]
[1. 1.]
[1. 1.]]
[[1. 1. 1. 1. 1.]
[1.1. 1. 1. 1.]]
[In]:
# reshape()方法
ar3 = ar1.reshape(2,5) # 直接将已有数组改变形状
ar4 = np.zeros((4,6)).reshape(3,8) # 生成数组后直接改变形状
ar5 = np.reshape(np.arange(12),(3,4)) # 参数内添加数组和目标形状
print(ar1,'\n',ar3)
print(ar4)
print(ar5)
[Out]:
[0 1 2 3 4 5 6 7 8 9]
[[0 1 2 3 4]
[5 6 7 8 9]]
[[0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0.]]
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[In]:
# resize() 方法
ar6 = np.resize(np.arange(5),(3,4))
# 生成5个元素,但却要生成3*4的矩阵,会重复这5个元素
print(ar6)
[Out]:
[[0 1 2 3]
[4 0 1 2]
[3 4 0 1]]
[In]:
# 数组的复制
ar1 = np.arange(10)
ar2 = ar1
print(ar2 is ar1)
ar1[2] = 9 # 浅拷贝,源对象改变,复制对象也跟着改变
print(ar1,ar2)
[Out]:
True
[0 1 9 3 4 5 6 7 8 9] [0 1 9 3 4 5 6 7 8 9]
[In]:
ar3 = ar1.copy()
print(ar3 is ar1)
ar1[0] = 9
print(ar1,ar3)
print(id(ar1),id(ar2))
[Out]:
False
[9 1 9 3 4 5 6 7 8 9] [9 1 9 3 4 5 6 7 8 9]
2635020270208 2635020270208
[In]:
# 数组类型的转换 astype()
ar1 = np.arange(10,dtype=float)
print(ar1,ar1.dtype)
[0.1. 2. 3. 4. 5. 6. 7. 8. 9.] float64
[In]:
ar2 = ar1.astype(np.int32)
print(ar2,ar2.dtype)
print(ar1,ar1.dtype)
[Out]:
[0 1 2 3 4 5 6 7 8 9] int32
[0.1. 2. 3. 4. 5. 6. 7. 8. 9.] float64
[In]:
# 数组的堆叠
a = np.arange(5) # 生成0-4的一维数组
b = np.arange(5,9) # 生成5-9的一维数组
ar1 = np.hstack((a,b)) # 将a、b两个数组横向堆叠(另vstack是纵向堆叠)
print(a,a.shape)
print(b,b.shape)
print(ar1,ar1.shape)
[Out]:
[0 1 2 3 4] (5,)
[5 6 7 8] (4,)
[0 1 2 3 4 5 6 7 8] (9,)
[In]:
a = np.array([[1],[2],[3]]) # 生成3行1列的数组
b = np.array([['a'],['b'],['c']]) # 生成3行1列的数组
ar2 = np.hstack((a,b)) #将两个数组横向堆叠(数字数字与字符数组统一成字符型数组)
print(a,a.shape)
print(b,b.shape)
print(ar2,ar2.shape)
[Out]:
[[1]
[2]
[3]] (3, 1)
[['a']
['b']
['c']] (3, 1)
[['1' 'a']
['2' 'b']
['3' 'c']] (3, 2)
[In]:
# numpy.stack(arrays,axis=0) 沿着新轴连接数组的序列,形状必须一样
# 如[1 2 3]和[4 5 6],shape均为(3,0)
# axis=0:[[1 2 3] [4 5 6]],shape为(2,3)
# axis=1:[[1 4] [2 5] [3 6]],shape为(3,2)
a = np.arange(5)
b = np.arange(5,10)
ar1 = np.stack((a,b))
ar2 = np.stack((a,b),axis = 1)
print(a,a.shape)
print(b,b.shape)
print(ar1,ar1.shape)
print(ar2,ar2.shape)
[Out]:
[0 1 2 3 4] (5,)
[5 6 7 8 9] (5,)
[[0 1 2 3 4]
[5 6 7 8 9]] (2, 5)
[[0 5]
[1 6]
[2 7]
[3 8]
[4 9]] (5, 2)
[In]:
# 数组的拆分
# numpy.hsplit(ary,indices_or_sections):将数组水平(逐行)拆分为多个子数组(按列拆分)
# 输出结果为列表,列表中元素为数组
ar = np.arange(16).reshape(4,4)
ar1 = np.hsplit(ar,2) # 拆分为两列为一个数组
print(ar)
print(ar1,type(ar1))
[Out]:
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]]
[array([[ 0, 1],
[ 4, 5],
[ 8, 9],
[12, 13]]), array([[ 2, 3],
[ 6, 7],
[10, 11],
[14, 15]])] <class 'list'>
[In]:
# numpy.vsplit(ary,indices_or_sections):将数组垂直(行方向)拆分为多个子数组(按行拆分)
ar2 = np.vsplit(ar,4)
print(ar2,type(ar2))
[Out]:
[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8, 9, 10, 11]]), array([[12, 13, 14, 15]])] <class 'list'>
[In]:
# 数组的简单运算
ar = np.arange(6).reshape(2,3)
print(ar + 10)
print(ar * 2)
print(1 / (ar+1))
print(ar ** 0.5)
[Out]:
[[10 11 12]
[13 14 15]]
[[ 0 2 4]
[ 6 8 10]]
[[1. 0.5 0.33333333]
[0.25 0.2 0.16666667]]
[[0. 1. 1.41421356]
[1.73205081 2. 2.23606798]]
[In]:
print(ar.mean()) # 均值
print(ar.max())
print(ar.min())
print(ar.std())
print(ar.var()) # 方差
print(ar.sum(),np.sum(ar,axis = 0)) # axis为0,按列求和;axis为1,按行求和
print(np.sort(np.array([1,5,2,3,7]))) #排序
[Out]:
2.5
5
0
1.707825127659933
2.9166666666666665
15 [3 5 7]
[1 2 3 5 7]