numpy模块
numpy模块学习
前言
Numpy库支持高级大量的维度数组与矩阵运算,Numpy同时也对数组运算提供大量的数学函数,对于大量计算运行效率极好,是大量机器学习框架的基础库
常用属性与方法
>>> import numpy as np
# 生成行向向量
>>> A = np.array([1, 2, 3, 4])
>>> print(A)
[1 2 3 4]
# 生成数组向量
>>> B = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [4, 3, 2, 1]])
>>> print(B)
[[1 2 3 4]
[5 6 7 8]
[4 3 2 1]]
# 生成矩阵,对矩阵求逆
>>> A = np.mat([[1, 2, 3], [1, 2, 3]])
>>> A
matrix([[1, 2, 3],
[1, 2, 3]])
>>> A.I
matrix([[0.03571429, 0.03571429],
[0.07142857, 0.07142857],
[0.10714286, 0.10714286]])
# shape 代表维度, size 代表大小
>>> print(A.shape)
(4,)
>>> print(B.shape)
(3, 4)
>>> print(A.size)
4
# 判断对应位置是否被2整除,成立用 nan <缺省> 代替
np.where(a % 3 == 0, a, np.nan)
>>> A
matrix([[1, 2, 3],
[1, 2, 3],
[1, 2, 3]])
>>> A.ndim # 维度
2
>>> np.where(A % 2 == 0, A, np.nan)
array([[ 1., nan, 3.],
[ 1., nan, 3.],
[ 1., nan, 3.]])
# 正余弦
>>> d = np.array([3.14/2, 3.14])
>>> np.sin(d)
array([0.99999968, 0.00159265])
>>> np.cos(d)
array([ 7.96326711e-04, -9.99998732e-01])
# exp log
>>> d2 = np.array([1, 2, 3])
>>> np.exp(d2)
array([ 2.71828183, 7.3890561 , 20.08553692])
>>> np.log(d2)
array([0. , 0.69314718, 1.09861229])
# dtype 属性 代表所有的数据类型
>>> print(A.dtype)
int32
>>> C = np.array([1, 2, 3.0, 4])
>>> print(C.dtype)
float64
>>> D = np.array([1, 2, 3.0, '4'])
>>> print(D.dtype)
<U32
# 改变数据类型
>>> E = D.astype(float)
>>> E.dtype
dtype('float64')
# 生成一个一维数组,并转化为 2行 x 5列 的数组
>>> a = np.arange(10)
>>> print(a)
[0 1 2 3 4 5 6 7 8 9]
>>> b = a.reshape(2, 5)
>>> print(b)
[[0 1 2 3 4]
[5 6 7 8 9]]
# reshape
>>> x
array([0.23976742, 0.03450459, 0.14859311, 0.81086617, 0.65310538,
0.54446869, 0.04119477, 0.46816625, 0.88874673, 0.47625856])
>>> x.reshape(-1, 1)
array([[0.23976742],
[0.03450459],
[0.14859311],
[0.81086617],
[0.65310538],
[0.54446869],
[0.04119477],
[0.46816625],
[0.88874673],
[0.47625856]])
# 初始化一个全零的 3行 x 4列 的数组
>>> c = np.zeros((3, 4), dtype=int)
>>> print(c)
[[0 0 0 0]
[0 0 0 0]
[0 0 0 0]]
# 初始化一个全一的 3行 x 4列 的数组
>>> c2 = np.ones((3, 4), dtype=int)
>>> print(c2)
[[1 1 1 1]
[1 1 1 1]
[1 1 1 1]]
# 初始化一个单位矩阵
>>> c3 = np.eye(3)
>>> print(c3)
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]
# 在 [10, 25) 内以5为步长生成一维数组
>>> d = np.arange(10, 25, 5)
>>> print(d)
[10 15 20]
# 生成随机的 2行 x 3列 数组
>>> e = np.random.rand(2, 3)
>>> print(e)
[[0.84489957 0.05814669 0.68395074]
[0.99549967 0.40004318 0.06304403]]
# 在 [0, 20] 内生成 5 个等差数列的一维数组
>>> f = np.linspace(0, 20, 5)
>>> print(f)
[ 0. 5. 10. 15. 20.]
# 返回一个大小为 10 区间为 [1, 2) 任取的随机整型数
>>> o = np.random.randint(1, 3, 10, dtype=int)
>>> print(o)
[1 2 1 2 2 2 1 2 2 1]
# 排序 0表示按列排序,1表示按行排序
>>> g = np.random.rand(3, 4)
>>> print(np.sort(g, axis=1))
[[0.2781411 0.3905857 0.49844233 0.87326903]
[0.20858033 0.38660401 0.49121198 0.78279587]
[0.04344476 0.64196408 0.66657039 0.89382273]]
>>> print(np.sort(g, axis=0))
[[0.2781411 0.20858033 0.38660401 0.04344476]
[0.64196408 0.49844233 0.66657039 0.3905857 ]
[0.78279587 0.89382273 0.87326903 0.49121198]]
# 从小到大各个数的索引位置
>>> print(np.argsort(g))
[[0 3 1 2]
[1 2 3 0]
[3 0 2 1]]
>>> print(g)
[[0.2781411 0.49844233 0.87326903 0.3905857 ]
[0.78279587 0.20858033 0.38660401 0.49121198]
[0.64196408 0.89382273 0.66657039 0.04344476]]
# 数组对应元素相乘 相加
>>> h = np.array([[1, 2, 3], [4, 5, 6]])
>>> h2 = np.array([[1, 2, 3], [4, 5, 6]])
>>> h3 = h * h2
>>> print(h3)
[[ 1 4 9]
[16 25 36]]
>>> h4 = h + h2
>>> print(h4)
[[ 2 4 6]
[ 8 10 12]]
# 矩阵做内积
>>> print(h)
[[1 2 3]
[4 5 6]]
>>> print(h2)
[[1 4]
[2 5]
[3 6]]
>>> print(np.dot(h, h2))
[[14 32]
[32 77]]
>>> a = np.array([1,1])
>>> b = np.array([2,2])
>>> a.T.dot(b)
4
>>> np.dot(a,b)
4
# 矩阵拼接
>>> print(k)
[[1 2 3]
[4 5 6]]
>>> print(k2)
[[1 2 3]
[4 5 6]]
>>> k3 = np.hstack((k, k2)) # 列拼接
>>> print(k3)
[[1 2 3 1 2 3]
[4 5 6 4 5 6]]
>>> k3 = np.vstack((k, k2)) # 行拼接
>>> print(k3)
[[1 2 3]
[4 5 6]
[1 2 3]
[4 5 6]]
# insert(目标, )
>>> a
array([[1., 2.],
[3., 4.]])
>>> a1 = np.insert(a, 0, 1, axis=1)
>>> a1
array([[1., 1., 2.],
[1., 3., 4.]])
>>> a2 = np.insert(a, 1, 0, axis=1)
>>> a2
array([[1., 0., 2.],
[3., 0., 4.]])
>>> a3 = np.insert(a, 1, 0, axis=0)
>>> a3
array([[1., 2.],
[0., 0.],
[3., 4.]])
>>> a4 = np.insert(a, 2, [9, 9], axis=0)
>>> a4
array([[1., 2.],
[3., 4.],
[9., 9.]])
# 矩阵转向量
>>> l = np.array([[1, 2, 3, 4], [1, 2, 3, 4]])
>>> l2 = np.ravel(l)
>>> print(l2)
[1 2 3 4 1 2 3 4]
# 拆分矩阵
>>> print(l)
[[1 2 3 4]
[1 2 3 4]]
>>> print(np.vsplit(l, 2)) # 按行拆分
[array([[1, 2, 3, 4]]), array([[1, 2, 3, 4]])]
>>> print(np.hsplit(l, 2)) # 按列拆分
[array([[1, 2],
[1, 2]]),
array([[3, 4],
[3, 4]])]
# 读取文件 转换成矩阵或向量
# genfromtxt()
# delimiter 指的是用什么分割
# dtype 指定当前数据类型
# skip_header 消除前多少行的数据
D:\Document\LearningNotes>type test.txt
this a test page
this a test page
this a test page
this a test page
this a test page
this a test page
this a test page
this a test page
D:\Document\LearningNotes>python
Python 3.9.4 (tags/v3.9.4:1f2e308, Apr 6 2021, 13:40:21) [MSC v.1928 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import numpy as np
>>> text = np.genfromtxt('test.txt', delimiter='.', dtype=str, skip_header=1)
>>> print(text)
['this a test page' 'this a test page' 'this a test page'
'this a test page' 'this a test page' 'this a test page'
'this a test page']
>>> text = np.genfromtxt('test.txt', delimiter=' ', dtype=str, skip_header=1)
>>> print(text)
[['this' 'a' 'test' 'page']
['this' 'a' 'test' 'page']
['this' 'a' 'test' 'page']
['this' 'a' 'test' 'page']
['this' 'a' 'test' 'page']
['this' 'a' 'test' 'page']
['this' 'a' 'test' 'page']]
# 矩阵数据操作
>>> print(text[2, 0])
this
>>> print(text[3, 2])
test
>>> print(text[3, 1:3])
['a' 'test']
>>> print(text[1:3, 1:3])
[['a' 'test']
['a' 'test']]
>>> print(text.T) # 转置
[['this' 'this' 'this' 'this' 'this' 'this' 'this']
['a' 'a' 'a' 'a' 'a' 'a' 'a']
['test' 'test' 'test' 'test' 'test' 'test' 'test']
['page' 'page' 'page' 'page' 'page' 'page' 'page']]
# 求最大值,最小值,平均值
>>> nums = np.array([[1, 0, 3], [1, 2, 7]])
>>> print(nums.max())
7
>>> print(nums.min())
0
>>> print(nums.mean())
2.3333333333333335
>>> print(nums[0:1, 1:2].max())
0
# 求和、行和、列和
>>> print(nums)
[[1 0 3]
[1 2 7]]
>>> print(nums.sum())
14
>>> print(nums.sum(axis=1))
[ 4 10]
>>> print(nums.sum(axis=0))
[ 2 2 10]
# 种子
# 如果设置相同的种子数,可以得到相同的随机数排列
np.random.seed(123)
# 正态分布
# loc(float):正态分布的均值,对应着这个分布的中心
# scale(float):正态分布的标准差,对应分布的宽度,scale越大,正态分布的曲线越矮胖,scale越小,曲线越高瘦
size(int 或者整数元组):输出的值赋在shape里,默认为None
x = numpy.random.normal(loc=0,scale=1e-2,size=shape)
实例代码
import numpy as np
import matplotlib.pyplot as plt
# 随机散点图
# ex_1
np.random.seed(555)
x = np.random.random(size=10)
y = 3 * x + 4 + np.random.normal(size=10)
plt.scatter(x, y)
plt.show()
# ex_2
x = np.arange(-2, 2, 0.1)
y = 2 * x + np.random.random(len(x))
plt.scatter(x, y)
plt.show()