一. NumPy介绍
1. 介绍
| NumPy(Numerical Python)是Python的一个开源的数值计算库。 |
| |
| 可用来存储和处理大型矩阵,比Python自身的嵌套列表结构要高效的多, |
| 支持大量的维度数组与矩阵运算,此外也针对数组运算提供大量的数学函数库, |
| 包括数学、逻辑、形状操作、排序、选择、输入输出、离散傅立叶变换、基本线性代数,基本统计运算和随机模拟等等 |
| |
| 几乎所有从事Python工作的数据分析师都利用NumPy的强大功能。 |
2. 安装第三方库
| !pip install numpy |
| |
| |
| import numpy as np |
| np.__version__ |
3. 数据分析'三剑客'
| import numpy as np |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| |
| |
| img_data = plt.imread('123.jpg') |
| print(img_data) |
| """ |
| 图片: 其实是数字组成的,三维数组 |
| RGB: 红Red,绿Green,蓝Blue |
| RGB 范围: 0-255 |
| """ |
| |
| print(type(img_data)) |
| """ |
| numpy.ndarray:多维数组 类型 |
| nd: n维度,多维 |
| array: 数组 |
| """ |
| |
| |
| print(img_data.shape) |
| """ |
| 高度: 300行 |
| 宽度: 750列 |
| 3 表示的是RGB(red,green,blue)的值 |
| """ |
| |
| |
| print(plt.imshow(img_data)) |
| |
| |
| |
| |
| |
| |
| |
二. 创建NumPy数组(ndarray)
1. 使用np.array()由python list创建
| |
| |
| |
| l = [1,4,2,3,5] |
| n = np.array(l) |
| print(n) |
| print(type(n)) |
| print(n.shape) |
| |
| |
| n=np.array([3.14,2,'hello']) |
| print(n) |
| |
| |
| ''' |
| ①. numpy默认ndarray的所有元素的类型是相同的。 |
| ②. 如果传进来的列表中包含不同的类型,则统一为同一类型,优先级:str>float>int |
| ③. ndarray的常见数据类型: |
| int: int8、uint8、int16、int32、int64 |
| float: float16、float32、float64 |
| str: 字符串 |
| ''' |
2. 使用np的常规函数创建
| |
| |
| |
| |
| |
| n = np.ones(shape=(3,4)) |
| print(n) |
| |
| n = np.ones(shape=(3,4,5),dtype=np.int16) |
| print(n) |
| |
| |
| ''' |
| ①. shape: 形状 |
| ②. dtype=None: 元素类型 |
| ③. order: {'C','F'},可选,默认值:C,是否在内存中以行主(C-风格) 或列主(Fortran-风格),一般默认即可 |
| ''' |
| |
| |
| |
| |
| |
| n = np.zeros((5,5),dtype=np.int16) |
| print(n) |
| |
| |
| ''' |
| ①. shape: 形状 |
| ②. dtype=None: 元素类型 |
| ''' |
| |
| |
| |
| |
| |
| n = np.full(shape=(3,4),fill_value=8) |
| print(n) |
| |
| |
| ''' |
| ①. shape: 形状 |
| ②. fill_value: 填充值 |
| ③. dtype=None: 元素类型 |
| ''' |
| |
| |
| |
| |
| |
| |
| n = np.eye(6,6,dtype=np.int8) |
| |
| |
| n = np.eye(6,6,k=2,dtype=np.int8) |
| |
| |
| n = np.eye(6,6,k=-2,dtype=np.int8) |
| |
| print(n) |
| |
| |
| ''' |
| ①. N: 行数 |
| ②. M: 列数,默认为None,表示和行数一样 |
| ③. k=0: 向右偏移0个位置 |
| ④. dtype=None: 元素类型 |
| ''' |
| |
| |
| |
| |
| |
| |
| n = np.linspace(0,100,num=51,dtype=np.int16) |
| print(n) |
| |
| |
| ''' |
| ①. start: 开始值 |
| ②. stop: 结束值 |
| ③. num=50: 等差数列中默认有50个数 |
| ④. endpoint=True: 是否包含结束值 |
| ⑤. retstep=False: 是否返回等差值(步长) |
| ⑥. dtype=None: 元素类型 |
| ''' |
| |
| |
| |
| |
| |
| n = np.arange(10) |
| print(n) |
| |
| |
| ''' |
| ①. start: 开始值(可选) |
| ②. stop: 结束值(不包含) |
| ③. step: 步长(可选) |
| ④. dtype=None: 元素类型 |
| ''' |
| |
| |
| |
| |
| |
| n = np.random.randint(3,10) |
| print(n) |
| |
| n = np.random.randint(3,10,size=6) |
| print(n) |
| |
| n = np.random.randint(3,10,size=(3,4)) |
| print(n) |
| |
| n = np.random.randint(0,256,size=(20,40,3)) |
| print(n) |
| |
| plt.imshow(n) |
| |
| |
| ''' |
| ①. low: 最小值 |
| ②. high=None: 最大值 |
| high=None时,生成的数值在[0,low]区间内 |
| 如果使用high这个值,则生成的数值在[low,high]区间 |
| ③. size=None: 数组形状,默认只输出一个随机值 |
| ④. dtype=None: 元素类型 |
| ''' |
| |
| |
| |
| |
| |
| |
| |
| |
| n = np.random.randn() |
| print(n) |
| |
| n = np.random.randn(10) |
| print(n) |
| |
| n = np.random.randn(3,4) |
| print(n) |
| |
| |
| ''' |
| ①. dn: 第n个维度的数值 |
| ''' |
| |
| |
| |
| |
| |
| n = np.random.normal() |
| print(n) |
| |
| n = np.random.normal(loc=100) |
| print(n) |
| |
| n = np.random.normal(loc=100,scale=10,size=(3,4)) |
| print(n) |
| |
| |
| ''' |
| ①. loc=0.0: 均值,对应着正态分布的中心 |
| ②. scale: 标准差,对应分布的宽度,scale越大,正态分布的曲线越矮胖,scale越小,曲线越瘦高 |
| ③. size=None: 数组形状 |
| ''' |
| |
| |
| |
| |
| |
| n = np.random.random() |
| print(n) |
| |
| n = np.random.random(size=(3,4)) |
| print(n) |
| |
| |
| |
| ''' |
| ①. size=None: 数组形状 |
| ''' |
| |
| |
| |
| |
| |
| |
| n = np.random.rand() |
| print(n) |
| |
| n = np.random.rand(3,4) |
| print(n) |
| |
| |
| |
| ''' |
| ①. dn: 第n个维度的数值 |
| ''' |
三. NumPy数组常用属性
| ndim:维度 |
| shape: 形状(各维度的长度) |
| size:总长度 |
| dtype: 元素类型 |
| |
| img_data = plt.imread('123.jpg') |
| |
| |
| img_data.shape |
| """ |
| 第一个维度: 421 |
| 第二个维度: 725 |
| 第三个维度: 3 |
| 有几个数字就表示 几维 |
| """ |
| |
| |
| img_data.ndim |
| |
| |
| img_data.size |
| |
| |
| img_data.dtype |
四. NumPy数组基本操作
1. 索引
| |
| |
| |
| n = np.array([1,2,3,4,5]) |
| n[0],n[-1] |
| |
| |
| n = np.random.randint(0,10,size=(4,5)) |
| n[3][4], n[-1][-1] |
| |
| n[3,4], n[-1,-1] |
| |
| |
| n = np.random.randint(0,100,size=(4,5,6)) |
| n[1,2,-1] |
| |
| |
| n[1,2,-1] = 886 |
| m[1,2] = [1,2,3,4,5,6] |
| m[1,2] = 100 |
2. 切片
| |
| |
| |
| n=np.array([1,2,3,4,5]) |
| print(n[2:6],n[::-1]) |
| |
| |
| n = np.random.randint(0,10,size=(6,8)) |
| |
| |
| print(n[0]) |
| |
| print(n[1:4]) |
| |
| print(n[[1,2,4]]) |
| |
| |
| |
| print(n[1:4,0]) |
| |
| print(n[:, 2:5]) |
| |
| print(n[:,[1,2,4]]) |
3. 练习:将图片反转
| |
| import numpy as np |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| |
| |
| n = np.random.randint(0,100,size=(6,8)) |
| print(n) |
| n[::-1] |
| n[:,::-1] |
| |
| |
| dog = plt.imread('123.jpg') |
| dog.shape |
| plt.imshow(dog) |
| plt.imshow(dog[::-1]) |
| plt.imshow(dog[:,::-1]) |
| plt.imshow(dog[:,:,::-1]) |
| plt.imshow(dog[::10,::10,::-1]) |
4. 变形-reshape()
| import numpy as np |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| |
| n = np.arange(1,21) |
| n |
| n.shape |
| |
| |
| n2 = np.reshape(n,(4,5)) |
| n2 |
| n2.shape |
| |
| n2.reshape((5,4)) |
| n2.reshape(5,4) |
| |
| |
| |
| n2.reshape(20) |
| n2.reshape(-1) |
| |
| n2.reshape(4,-1) |
| n2.reshape(5,-1) |
| n2.reshape(-1,2) |
| n2.reshape(-1,1) |
| n2.reshape(2,-1,2) |
5. 级联合并-concatenate()
| |
| |
| |
| n1 = np.random.randint(0,100,size=(3,5)) |
| n2 = np.random.randint(0,100,size=(3,5)) |
| |
| |
| |
| np.concatenate((n1,n2)) |
| np.concatenate((n1,n2),axis=0) |
| |
| np.concatenate((n1,n2),axis=1) |
| |
| |
| |
| np.hstack((n1,n2)) |
| |
| |
| |
| np.vstack((n1,n2)) |
6. 拆分-split()
| n = np.random.randint(0,100,size=(6,4)) |
| n |
| |
| |
| |
| |
| |
| np.split(n,2) |
| np.split(n,2,axis=0) |
| np.split(n,2,axis=1) |
| |
| |
| |
| np.vsplit(n,3) |
| |
| np.vsplit(n,(1,2,4)) |
| |
| |
| |
| np.hsplit(n,2) |
| |
| |
| |
| dog = plt.imread('123.jpg') |
| dog.shape |
| |
| |
| |
| dog2=dog[:,:-1] |
| dog2.shape |
| |
| |
| dog3 = np.split(dog,2) |
| dog3[0] |
| dog3[1] |
| plt.imshow(dog3[1]) |
| |
| |
| dog4 = np.split(dog2,5,axis=1) |
| plt.imshow(dog4[2]) |
7. 复制和深拷贝-copy()
| |
| n = np.arange(10) |
| n2=n |
| n2[0]=100 |
| display(n,n2) |
| |
| |
| n1 = np.arange(10) |
| n2 = n1.copy() |
| n2[0]=100 |
| display(n1,n2) |
| |
| |
| n = np.random.randint(0,10,size=(2,3)) |
| n2=n.copy() |
| n2[0][0]=100 |
| display(n,n2) |
8. 聚合函数
| |
| n = np.arange(10) |
| np.sum(n) |
| |
| n = np.random.randint(0,10,size = (3,4)) |
| np.sum(n) |
| |
| |
| np.sum(n,axis=0) |
| |
| |
| np.sum(n,axis=1) |
| |
| |
| n = np.random.randint(0,10,size = (3,4)) |
| np.max(n) |
| np.min(n) |
| |
| np.mean(n) |
| np.average(n) |
| |
| np.median(n) |
| np.percentile(n,q=50) |
| |
| n = n.reshape(-1) |
| display(n) |
| np.argmax(n) |
| np.argmin(n) |
| |
| np.argwhere(n==np.max(n)) |
| |
| np.power(n,3) |
| |
| np.std(n) |
| np.var(n) |
| |
| |
| |
| |
| n = np.array([1,2,3,np.nan]) |
| np.sum(n) |
| np.nansum(n) |
五. 矩形操作
1. 基本矩形操作
| |
| n = np.random.randint(0,10,size=(4,5)) |
| |
| n+10 |
| n-10 |
| n*10 |
| n/10 |
| n//2 |
| n**2 |
| n%2 |
| |
| n1 = np.random.randint(0,10,size=(4,5)) |
| n2 = np.random.randint(0,10,size=(4,5)) |
| display(n1,n2) |
| |
| n1 + n2 |
| n1 - n2 |
| n1 * n2 |
| ... |
2. 线性代数
| |
| |
| n1 = np.random.randint(0,5,size=(2,3)) |
| n2 = np.random.randint(0,5,size=(3,2)) |
| display(n1,n2) |
| np.dot(n1,n2) |
| |
| |
| ''' |
| [3, 0, 4], |
| [2, 1, 3] |
| |
| [0, 2], |
| [1, 3], |
| [1, 1] |
| |
| = [3*0+0*1+ 4*1, 3*2+0*3+4*1] |
| [2*0+1*1+ 3*1, 2*2+1*3+3*1] |
| |
| =[4,10] |
| [4,10] |
| ''' |

| |
| n = np.array([[1,2,3], |
| [2,5,4], |
| [4,5,8]]) |
| |
| np.linalg.inv(n) |
| |
| |
| np.round(np.linalg.det(n)) |
| |
| |
| |
| |
| np.linalg.matrix_rank(n) |
3. 广播机制
| |
| |
| |
| |
| |
| m = np.ones((2,3),dtype=np.int8) |
| a = np.arange(3) |
| |
| display(m,a) |
| m+a |
| |
| |
| a = np.arange(3).reshape(3,1) |
| b = np.arange(3) |
| |
| display(a,b) |
| a+b |
| |
| |
| a = np.ones((4,1),dtype=np.int8) |
| b = np.arange(4) |
| |
| display(a,b) |
| a+b |
4. 其他数学操作
| |
| |
| n =np.array([1,4,8,9,16,25,64]) |
| |
| np.abs(n) |
| np.sqrt(n) |
| np.square(n) |
| np.exp(n) |
| np.log(n) |
| np.log(np.e) |
| np.log(1) |
| np.log2(n) |
| np.log10(n) |
| |
| np.sin(n) |
| np.cos(n) |
| np.tan(n) |
| np.round(n,2) |
| np.ceil(n) |
| np.floor(n) |
| |
| np.cumsum(n) |
5. 排序
| |
| n1 = np.random.randint(0,10,size=6) |
| n2 = np.sort(n1) |
| |
| |
| |
| n3 = np.random.randint(0,10,size=6) |
| n3.sort() |
6. 文件IO操作
| |
| |
| |
| x = np.arange(5) |
| y = np.arange(10,20) |
| |
| |
| np.save('x',x) |
| |
| |
| np.savez('arr.npz',xarr=x,yarr=y ) |
| |
| |
| |
| np.load('x.npy') |
| |
| |
| np.load('arr.npz')["xarr"] |
| |
| |
| n = np.random.randint(0,10,size=(3,4)) |
| |
| |
| np.savetxt('arr.csv',n,delimiter=',') |
| |
| |
| np.loadtxt('arr.csv',delimiter=',',dtype=np.int16,converters=float) |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本