day 18 numpy模块/matplotlib模块/pandas模块
numpy模块
numpy模块:用来做数据分析
numpy数组
import numpy as np
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
print(arr1 * arr2)
[ 4 10 18]
# 一位数组
arr = np.array([1, 2, 4])
print(type(arr), arr)
<class 'numpy.ndarray'> [1 2 4]
# 二维数组
arr = np.array([
[1, 2, 3],
[4, 5, 6]
])
print(arr)
[[1 2 3]
[4 5 6]]
# 三维数组(不在讨论范围内)
arr3 = np.array([
[[1, 2, 3],
[4, 5, 6]],
[[1, 2, 3],
[4, 5, 6]],
])
arr = np.array([
[1, 2, 3],
[4, 5, 6]
])
# T 数组的转置 --> 行列互换
print(arr, '\n', arr.T)
# dtype 数组元素的数据类型.numpy数组是属于python解释器的;int32,float64是属于numpy的
print(arr.dtype)
[[1 2 3]
[4 5 6]]
[[1 4]
[2 5]
[3 6]]
int32
arr = np.array([
[1, 2, 3],
[4, 5, 6]
])
# size 数组元素的个数
print(arr.size)
# ndim 数组的维数
print(arr.ndim)
# shape 数组的维度大小(以元组形式)
print(arr.shape)
# astype 转换数据类型
arr = arr.astype(np.float64)
print(arr)
6
2
(2, 3)
[[1. 2. 3.]
[4. 5. 6.]]
arr = np.array([
[1, 2, 3],
[4, 5, 6]
])
# 切片
print(arr[:, :]) # 行,列
print(arr[0, 0])
print(arr[0, :])
print(arr[:2, -2:])
print(arr[arr > 4]) # 逻辑取值
[[1 2 3]
[4 5 6]]
1
[1 2 3]
[[2 3]
[5 6]]
[5 6]
arr = np.array([
[1, 2, 3],
[4, 5, 6]
])
# 赋值
arr[0, 0] = 0
print(arr)
arr[0, :] = 0
print(arr)
arr[:, :] = 0
print(arr)
[[0 2 3]
[4 5 6]]
[[0 0 0]
[4 5 6]]
[[0 0 0]
[0 0 0]]
# 数组的合并
arr1 = np.array([
[1, 2, 3],
[4, 5, 6]
])
arr2 = np.array([
[7, 8, 9],
['a', 'b', 'c']
])
# 行合并
print(np.hstack((arr1, arr2))) # 只能放元组
# 列合并
print(np.vstack((arr1, arr2)))
# 默认以列合并 0表示列, 1表示行
print(np.concatenate((arr1, arr2), axis=1))
[['1' '2' '3' '7' '8' '9']
['4' '5' '6' 'a' 'b' 'c']]
[['1' '2' '3']
['4' '5' '6']
['7' '8' '9']
['a' 'b' 'c']]
[['1' '2' '3' '7' '8' '9']
['4' '5' '6' 'a' 'b' 'c']]
通过函数创建numpy数组
# 通过函数创建numpy数组
print(np.ones((2, 3)))
print(np.zeros((3, 3)))
print(np.eye(3, 3))
print(np.linspace(1, 100, 10))
print(np.arange(2, 10))
arr1 = np.zeros((3, 3))
print(arr1.reshape((1, 9))) # 重构数组形状
[[1. 1. 1.]
[1. 1. 1.]]
[[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]]
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]
[ 1. 12. 23. 34. 45. 56. 67. 78. 89. 100.]
[2 3 4 5 6 7 8 9]
[[0. 0. 0. 0. 0. 0. 0. 0. 0.]]
numpy数组的运算
# 数组的运算(+-*)
arr1 = np.ones((3, 4)) * 4
print(arr1)
# 数组的运算函数
arr1 = np.sin(arr1)
print(arr1)
# 矩阵运算--点乘
arr1 = np.array([
[1, 2, 3],
[4, 5, 6]
])
arr2 = np.array([
[1, 2],
[4, 5],
[6, 7]
])
print(np.dot(arr1, arr2))
# 求逆
arr = np.array([[1, 2, 3], [4, 5, 6], [9, 8, 9]])
print(np.linalg.inv(arr))
# numpy数组和统计方法
print(np.sum(arr[0,:]))
[[4. 4. 4. 4.]
[4. 4. 4. 4.]
[4. 4. 4. 4.]]
[[-0.7568025 -0.7568025 -0.7568025 -0.7568025]
[-0.7568025 -0.7568025 -0.7568025 -0.7568025]
[-0.7568025 -0.7568025 -0.7568025 -0.7568025]]
[[27 33]
[60 75]]
[[ 0.5 -1. 0.5 ]
[-3. 3. -1. ]
[ 2.16666667 -1.66666667 0.5 ]]
6
numpy.random
# numpy.random生成随机数
print(np.random.rand(3, 4))
print(np.random.random((3, 4)))
np.random.seed(1)
print(np.random.random((3, 4)))
# s = np.random.RandomState(1) # 效果和seed相同
# print(s.random((3, 4)))
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
np.random.shuffle(arr)
print(arr)
# 针对一维
print(np.random.choice([1, 2, 3], 1))
# 针对某一个范围
print(np.random.randint(1, 100, (3, 4)))
[[0.53896726 0.71229709 0.92507313 0.31876484]
[0.83072795 0.63231691 0.15914402 0.63281235]
[0.75497099 0.00880939 0.2655119 0.34494942]]
[[0.734392 0.93710219 0.70851098 0.03865121]
[0.4247206 0.64120213 0.47434356 0.32331907]
[0.23769872 0.96864964 0.60257089 0.01608933]]
[[4.17022005e-01 7.20324493e-01 1.14374817e-04 3.02332573e-01]
[1.46755891e-01 9.23385948e-02 1.86260211e-01 3.45560727e-01]
[3.96767474e-01 5.38816734e-01 4.19194514e-01 6.85219500e-01]]
[[7 8 9]
[1 2 3]
[4 5 6]]
[1]
[[95 23 67 66]
[93 13 43 81]
[50 72 46 84]]
matplotlib模块
matplotlib模块:画图
条形图
from matplotlib import pylot as pit
from matplotlib.font_manager import FontProperties # 修改字体
font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc')