NumPy Essentials 带注释源码 五、NumPy 中的线性代数

# 来源:NumPy Essentials ch5

矩阵

import numpy as np 
ndArray = np.arange(9).reshape(3,3) 
# matrix 可以从 ndarray 直接构建
x = np.matrix(ndArray) 
# identity 用于构建单位矩阵
y = np.mat(np.identity(3)) 
x 
'''
matrix([[0, 1, 2], 
        [3, 4, 5], 
        [6, 7, 8]]) 
'''
y 
''' 
matrix([[1., 0., 0.], 
        [0., 1., 0.], 
        [0., 0., 1.]]) 
'''

# 矩阵的相加是逐元素的
x + y
'''
matrix([[ 1.,  1.,  2.], 
        [ 3.,  5.,  5.], 
        [ 6.,  7.,  9.]]) 
'''

# 矩阵乘法是一行乘一列
x * x 
'''
matrix([[ 15,  18,  21], 
        [ 42,  54,  66], 
        [ 69,  90, 111]]) 
'''
# np.dot 效果相同
np.dot(ndArray, ndArray) 
'''
array([[ 15,  18,  21], 
       [ 42,  54,  66], 
       [ 69,  90, 111]]) 
'''

# 矩阵的乘方
x ** 3 
'''
matrix([[ 180,  234,  288], 
        [ 558,  720,  882], 
        [ 936, 1206, 1476]])
'''

z = np.matrix(np.random.random_integers(1, 50, 9).reshape(3,3)) 
z
'''
matrix([[32, 21, 28], 
        [ 2, 24, 22], 
        [32, 20, 22]]) 
'''

# I 属性求矩阵的逆,不可逆时报错
z.I 
'''
matrix( [[-0.0237 -0.0264  0.0566] 
         [-0.178   0.0518  0.1748] 
         [ 0.1963 -0.0086 -0.1958]]) 
'''

# H 属性求共轭转置
z.H
'''
matrix([[32  2 32] 
        [21 24 20] 
        [28 22 22]]) 
'''

# 求解线性方程组
# Ax = b => x = A.I b
# 可以从这种字符串创建矩阵
# 空格分隔行,分号分隔列
A = np.mat('3 1 4; 1 5 9; 2 6 5') 
b = np.mat([[1],[2],[3]]) 
x = A.I * b
x 
'''
matrix([[ 0.2667], 
        [ 0.4667], 
        [-0.0667]]) 
'''

# allclose 验证是否等价
np.allclose(A * x, b) 
# True 

# matrix 使用矩阵的方法来计算转置
x = np.arange(25000000).reshape(5000,5000) 
y = np.mat(x) 
'''
%timeit x.T 
10000000 loops, best of 3: 176 ns per loop 

%timeit y.T 
1000000 loops, best of 3: 1.36 µs per loop
'''

# A 属性返回等价的 ndarray
# A1 属性返回展开的等价 ndarray
A.A 
'''
array([[3, 1, 4], 
       [1, 5, 9], 
       [2, 6, 5]]) 
'''
A.A1 
# array([3, 1, 4, 1, 5, 9, 2, 6, 5]) 

线性代数

x = np.array([[1, 2], [3, 4]]) 
y = np.array([[10, 20], [30, 40]]) 
# dot 对一维数组计算内积
# 对二维数组计算矩阵乘法
np.dot(x, y) 
'''
[[1*10+2*30, 1*20+2*40],
 [3*10+4*30, 3*20+4*40]]

=>

[[ 70, 100],
 [150, 220]] 
'''

# vdot 将数组展开计算内积
# 等价于 np.dot(x.ravel(), y.ravel())
np.vdot(x, y) 
# 1*10+2*20+3*30+4*40=300

# outer计算外积
# 返回一个二维数组,每个元素是 x[i]*y[j]
np.outer(x,y) 
'''
[[1*10, 1*20, 1*30, 1*40],
 [2*10, 2*20, 2*30, 2*40],
 [3*10, 3*20, 3*30, 3*40],
 [4*10, 4*20, 4*30, 4*40]]

=>
 
[[ 10,  20,  30,  40], 
 [ 20,  40,  60,  80], 
 [ 30,  60,  90, 120], 
 [ 40,  80, 120, 160]]
'''

# cross 计算叉积
# 接受一维二元或三元向量

a = np.array([1,0,0]) 
b = np.array([0,1,0]) 
np.cross(a,b) 
'''
det([[i, j, k],
     [1, 0, 0],
     [0, 1, 0]])
     
=> 

[0, 0, 1]
'''
np.cross(b,a) 
# array([ 0,  0, -1]) 

# linalg.det 计算行列式
# 对于二阶方阵来说
# det = m[0,0]*m[1,1] - m[0,1]*m[1,0]
x = np.array([[4,8],[7,9]]) 
np.linalg.det(x) 
# -20.000000000000007 

# linalg.inv 求逆
np.linalg.inv(x) 
'''
array([[-0.45,  0.4 ], 
       [ 0.35, -0.2 ]]) 
'''
np.mat(x).I 
'''
matrix([[-0.45,  0.4 ], 
        [ 0.35, -0.2 ]]) 
'''

# linalg.solve 解方程组
# 原理是 dot(inv(A), b)
# 不可逆时会报错
x = np.linalg.solve(A,b) 
x 
'''
matrix([[ 0.2667], 
        [ 0.4667], 
        [-0.0667]]) 
'''

分解


x = np.random.randint(0, 10, 9).reshape(3,3) 
x
'''
array([[ 1,  5,  0] 
       [ 7,  4,  0] 
       [ 2,  9,  8]]) 
'''

# 特征值分解
# https://en.wikipedia.org/wiki/Eigenvalues_and_eigenvectors
# w 为特征值,v 的列向量为特征向量
w, v = np.linalg.eig(x) 
w
# array([ 8.,  8.6033,  -3.6033]) 
v
'''
array([[ 0.,  0.0384,  0.6834] 
       [ 0.,  0.0583, -0.6292] 
       [ 1.,  0.9976,  0.3702]] 
) 
'''

# 复矩阵的分解
y = np.array([[1, 2j],[-3j, 4]]) 
np.linalg.eig(y) 
'''
(array([ -0.3723+0.j,  5.3723+0.j]), 
 array([[0.8246+0.j    ,  0.0000+0.416j     ], 
        [-0.0000+0.5658j,  0.9094+0.j    ]])) 
'''

# 可能存在舍入错误
# 这里的特征值应该是 1 +/- 1e-10
z = np.array([[1 + 1e-10, -1e-10],[1e-10, 1 - 1e-10]]) 
np.linalg.eig(z) 
'''
(array([ 1.,  1.]), array([[0.70710678,  0.707106], 
        [0.70710678,  0.70710757]])) 
'''

# 奇异值分解可以看做特征值分解的扩展
# 可用于非方阵
# 如果 A 是 mxn 矩阵,可分解为 A = u * sigma * v
# u 是 mxmin(m,n) 矩阵,列向量为左奇异向量,也就是 A A^T 的特征向量
# sigma 是 min(m,n) 阶奇异值的对角阵,奇异值是 A A^T 和 A^T A 的特征值平方根
# v 是 min(m,n)xn 矩阵,列向量为右奇异向量,也就是 A^T A 的特征向量
np.set_printoptions(precision = 4) 
A = np.array([3,1,4,1,5,9,2,6,5]).reshape(3,3) 
# 这里没有设置 full_matrices=False
# u 是 mxm 阶,v 是 nxn 阶
u, sigma, vh = np.linalg.svd(A) 
u 
'''
array([[-0.3246,  0.799 ,  0.5062], 
       [-0.7531,  0.1055, -0.6494], 
       [-0.5723, -0.592 ,  0.5675]]) 
'''
vh 
'''
array([[-0.2114, -0.5539, -0.8053], 
       [ 0.4633, -0.7822,  0.4164], 
       [ 0.8606,  0.2851, -0.422 ]]) 
'''
sigma 
# array([ 13.5824,   2.8455,   2.3287]) 

# 如果设置了 full_matrices=False
# 就能将其乘起来
u, sigma, v = np.linalg.svd(A, full_matrices=False) 
u * np.diag(s) * v
'''
array([[3, 1, 4],
       [1, 5, 9],
       [2, 6, 5]])
'''

# QR 分解
# Ax = b, x = A^(-1) b
# A = QR
# x = R^(-1) Q^(-1) b
#   = R^(-1) Q.T b
b = np.array([1,2,3]).reshape(3,1) 
q, r = np.linalg.qr(A) 
x = np.dot(np.linalg.inv(r), np.dot(q.T, b)) 
x 
'''
array([[ 0.2667], 
       [ 0.4667], 
       [-0.0667]]) 
'''

多项式

# root 表示多项式的根为 1,2,3,4
root = np.array([1,2,3,4]) 
# poly 把根转化成系数数组
# 高次项在前
np.poly(root) 
# array([  1, -10,  35, -50,  24]) 
# 也就是 x ** 4 - 10 * x ** 3 + 35 * x ** 2 - 50 * x + 24

# roots 求多项式的根
np.roots([1,-10,35,-50,24]) 
# array([ 4.,  3.,  2.,  1.]) 

# polyval 计算多项式的值
# 接受多项式系数和 x 值
np.polyval([1,-10,35,-50,24], 5) 
# 24 

coef = np.array([1,-10,35,-50,24])
# polyint 求多项式函数的不定积分
# ∫(x ** n)dx = x ** (n + 1) / (n + 1) + C
# ∫(u + v)dx = ∫udx + ∫vdx
# 这里的常数项 C 一律为 0
integral = np.polyint(coef)  
integral
# array([  0.2 ,  -2.5 ,  11.6667, -25.  ,  24.  ,  0.  ]) 

# polyder 对多项式求导
# (x ** n)' = n * x ** (n - 1)
# (u + v)' + u' + v'
np.polyder(integral) == coef
# array([ True,  True,  True,  True,  True], dtype=bool) 
# 还可以直接算五阶导数
np.polyder(coef, 5)
# array([], dtype=int32) 

# 构造 Polynomial 对象
from numpy.polynomial import polynomial 
p = polynomial.Polynomial(coef) 
p 
# Polynomial([  1., -10.,  35., -50.,  24.], [-1,  1], [-1,  1]) 
# 取系数
p.coef 
# array([  1., -10.,  35., -50.,  24.]) 
# 取根
p.roots() 
# array([ 0.25  ,  0.3333,  0.5   ,  1.    ]) 
# 求函数值
polynomial.polyval(p, 5) 
# Polynomial([ 5.], [-1.,  1.], [-1.,  1.]) 
# 积分
p.integ() 
Polynomial([  0.    ,   1.    ,  -5.    ,  11.6667, -12.5   ,   4.8   ], [-1.,  1.], [-1.,  1.]) 
# 微分
p.integ().deriv() == p 
# True 

曲线拟合

# 我们的分组为
# <20: x7
# 21 ~ 30: x24
# 31 ~ 40: x21
# 41 ~ 50: x19
# 51 ~ 60: x17
# >60: x12
groups = [7, 24, 21, 19, 17, 12] 

# 我们模拟生成样本年龄,并将其连接起来
# concatenate 沿轴 0 连接数组
age = np.concatenate([np.random.randint((ind + 1)*10, (ind + 2)*10, group) for ind, group in enumerate(groups)]) 

age
'''
array( 
[11, 15, 12, 17, 17, 18, 12, 26, 29, 24, 28, 25, 27, 25, 26, 24, 23,  27, 26, 24, 27, 20, 28, 20, 22, 21, 23, 25, 27, 24, 25, 35, 39, 33, 35, 30, 32, 32, 36, 38, 31, 35, 38, 31, 37, 36, 39, 30, 36, 33, 36, 37, 45, 41, 44, 48, 45, 40, 44, 42, 47, 46, 47, 42, 42, 42, 44, 40, 40, 47, 47, 57, 56, 53, 53, 57, 54, 55, 53, 52, 54, 57, 53, 58, 58, 54, 57, 55, 64, 67, 60, 63, 68, 65, 66, 63, 67, 64, 68, 66] 
) 
'''

# 生成样本的分数
scores = [5.5, 5.7, 5.4, 4.9, 4.6, 4.4] 
sim_scores = np.concatenate([.01 * np.random.rand(group) + scores[ind] for ind, group in enumerate(groups)] ) 

sim_scores 
'''
array([ 
5.5089,  5.5015,  5.5024,  5.5   ,  5.5033,  5.5019,  5.5012, 
5.7068,  5.703 ,  5.702 ,  5.7002,  5.7084,  5.7004,  5.7036, 
5.7055,  5.7024,  5.7099,  5.7009,  5.7013,  5.7093,  5.7076, 
5.7029,  5.702 ,  5.7067,  5.7007,  5.7004,  5.7   ,  5.7017, 
5.702 ,  5.7031,  5.7087,  5.4079,  5.4082,  5.4083,  5.4025, 
5.4008,  5.4069,  5.402 ,  5.4071,  5.4059,  5.4037,  5.4004, 
5.4024,  5.4058,  5.403 ,  5.4041,  5.4075,  5.4062,  5.4014, 
5.4089,  5.4003,  5.4058,  4.909 ,  4.9062,  4.9097,  4.9014, 
4.9097,  4.9023,  4.9   ,  4.9002,  4.903 ,  4.9062,  4.9026, 
4.9094,  4.9099,  4.9071,  4.9058,  4.9067,  4.9005,  4.9016, 
4.9093,  4.6041,  4.6031,  4.6016,  4.6021,  4.6079,  4.6046, 
4.6055,  4.609 ,  4.6052,  4.6005,  4.6017,  4.6091,  4.6073, 
4.6029,  4.6012,  4.6062,  4.6098,  4.4014,  4.4043,  4.4013, 
4.4091,  4.4087,  4.4087,  4.4027,  4.4017,  4.4067,  4.4003, 
4.4021,  4.4061]) 
'''

# 现在我们需要构造数据集
# 行为记录,列为属性
# 第一个属性是年龄,第二个属性恒为 1
# 因为 y = k0 * x0 + k1 * x1
# 我们如果想得到常数项,就需要让 x0 恒为 1
AGE = np.vstack([age, np.ones(len(age))]).T 

# linalg.lstsq 使用最小二乘法做多元线性回归
# 原理是 x = (A.T * A).I * A.T * b
# 其中 x 是系数数组,A 是自变量数据集,b 是因变量数组
# 结果为属性对应的系数,score ~= m * age + c
m, c = np.linalg.lstsq(AGE, sim_scores)[0] 
m 
# -0.029435313781 
c 
# 6.30307651938 

year = np.arange(1,11) 
price = np.array([129000, 133000, 138000, 144000, 142000, 150000, 135000, 134000, 139000]). 
year 
# array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]) 

# polyfit 执行一元多项式回归
# 参数为自变量、因变量和最高项次数
# 原理是转换为多元线性回归
# 特征为 1, x, x ** 2, ..., x ** n
# 结果为系数数组,高次在前
# year ~= a * price ** 2 + b * price + ca*11**2 + b*11 + c 
a, b, c = np.polyfit(year, price, 2) 
a 
# -549.242424242 
b 
# 6641.66666667 
c 
# 123116.666667 
a*11**2 + b*11 + c 
# 129716.66666666642 

posted @ 2017-06-15 09:34  绝不原创的飞龙  阅读(15)  评论(0编辑  收藏  举报  来源