numpy入门简介
numpy入门简介
1. numpy 优点
- 底层为 C,效率远远高于普通的 python(效率)
- 内置并行运算功能(效率)
- 大量强大成熟的函数库(生态)
- 功能强大的 ndarray(生态)
2. 下载与导入
conda install numpy
conda install pandas
import numpy as np
import pandas as pd
3. numpy 的基本属性
\begin{equation}
\begin{aligned}
a.dtype &\Leftrightarrow type\ of\ elements\
a.ndim &\Leftrightarrow number\ of\ dimension\
a.size &\Leftrightarrow number\ of\ elements\
a.shape &\Leftrightarrow shape\ of\ ndarray
\end{aligned}
\end{equation}
dtype:
np.int8, np.int16, np.int32, np.int64,
np.float8, np.float16, np.float32, np.float64,
>>> import numpy as np
>>> x = np.arange(0, 15).reshape(3, 5)
>>> type(x)
<class 'numpy.ndarray'>
>>> x
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
>>> x.dtype
dtype('int64')
>>> x.size
15
>>> x.shape
(3, 5)
>>> x.ndim
2
>>>
4. 创建 array
4.1 np.array()
>>> a = np.array([2, 23, 4], dtype=np.float32)
>>> a
array([ 2., 23., 4.], dtype=float32)
4.2 np.arange(st, ed, step)
注意上面这个end=24
不可达
reshape
之后,虽然Id
不同,但是仍然共用一个存储单元。
st 可以 > ed ,但是 step 有要求,否则就是空 ndarray
>>> x = np.arange(0, 30, 2, dtype=np.float32)
>>> x
array([ 0., 2., 4., 6., 8., 10., 12., 14., 16., 18., 20., 22., 24.,
26., 28.], dtype=float32)
>>> id(x)
140042028793744
>>> y = x.reshape(3, 5)
>>> id(y)
140042046645136 # id(x) 和 id(y) 是不同的
>>> y
array([[ 0., 2., 4., 6., 8.],
[10., 12., 14., 16., 18.],
[20., 22., 24., 26., 28.]], dtype=float32)
>>> x[0]=-123
>>> y
array([[-123., 2., 4., 6., 8.], # id(x) 和 id(y) 共用一个存储单元
[ 10., 12., 14., 16., 18.],
[ 20., 22., 24., 26., 28.]], dtype=float32)
>>>
>>>
>>> np.arange(5, 0, -2)
array([5, 3, 1])
>>> np.arange(5, 0, 2)
array([], dtype=int64)
4.3 np.linspace(x, y, num)
注意这个 x, y
是可达的,并且x, y
不存在大小的必要关系
>>> x = np.linspace(1, 2, 5) # x <= y
>>> x
array([1. , 1.25, 1.5 , 1.75, 2. ])
>>> x = np.linspace(2, 1, 5) # x >= y
>>> x
array([2. , 1.75, 1.5 , 1.25, 1. ])
4.4 np.zeros() & np.ones()
np.zeros(size,dtype)
np.ones(size,dtype)
>>> a=np.zeros((3, 4), dtype=np.int32)
>>> a
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]], dtype=int32)
>>> b=np.ones((4, 3), dtype=np.float64)
>>> b
array([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]])
4.5 np.empty()
empty(shape, dtype=float, order='C', *, like=None)
Return a new array of given shape and type, without initializing entries. # 未初始化的 array
Parameters
----------
shape : int or tuple of int
Shape of the empty array, e.g., ``(2, 3)`` or ``2``.
dtype : data-type, optional
Desired output data-type for the array, e.g, `numpy.int8`. Default is
`numpy.float64`.
order : {'C', 'F'}, optional, default: 'C' # 行列存储,是影响效率(内存读取 cache 等问题)
Whether to store multi-dimensional data in row-major
(C-style) or column-major (Fortran-style) order in
memory.
like : array_like
Reference object to allow the creation of arrays which are not
NumPy arrays. If an array-like passed in as ``like`` supports
the ``__array_function__`` protocol, the result will be defined
by it. In this case, it ensures the creation of an array object
compatible with that passed in via this argument.
4.6 np.ones_like & np.zeros_like()
np.zeros_like(m) 等价于: np.zeros(m.shape)
>>> m = np.random.random((3, 4, 5))
>>> m.shape
(3, 4, 5)
>>> x = np.ones_like(m); x.shape
(3, 4, 5)
>>> x = np.zeros_like(m); x.shape
(3, 4, 5)
>>> x = np.zeros(m.shape); x.shape
(3, 4, 5)
5. array 基础运算 1
5.1 +、-、*、/、**、//
- 对应元素进行运算
- 存在传播机制
- 形状可以进行传播我修改
广播机制简单介绍:
- It starts with the trailing (i.e. rightmost) dimensions and works its way left. Two dimensions are compatible when they are equal, or one of them is 1
A (4d array): 8 x 1 x 6 x 1
B (3d array): 7 x 1 x 5
Result (4d array): 8 x 7 x 6 x 5
>>> a = np.array([0.0, 10.0, 20.0, 30.0])
>>> b = np.array([1.0, 2.0, 3.0])
>>> a[:, np.newaxis] + b
array([[ 1., 2., 3.],
[11., 12., 13.],
[21., 22., 23.],
[31., 32., 33.]])
5.2 三角、指数、对数函数
sin(a), np.cos(a), np.tan(a)
np.pi
np.log(a), np.exp(a)
其他函数可以自行查阅官方 API
>>> a = np.arange(12).reshape(3, 4)
>>> np.sin(a)
array([[ 0. , 0.84147098, 0.90929743, 0.14112001],
[-0.7568025 , -0.95892427, -0.2794155 , 0.6569866 ],
[ 0.98935825, 0.41211849, -0.54402111, -0.99999021]])
>>> np.log(a)
<stdin>:1: RuntimeWarning: divide by zero encountered in log
array([[ -inf, 0. , 0.69314718, 1.09861229],
[1.38629436, 1.60943791, 1.79175947, 1.94591015],
[2.07944154, 2.19722458, 2.30258509, 2.39789527]])
>>> np.exp(a)
array([[1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01],
[5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03],
[2.98095799e+03, 8.10308393e+03, 2.20264658e+04, 5.98741417e+04]])
>>> np.pi
3.141592653589793
5.3 逻辑运算
>>> a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> a == 3
array([[False, False, False, True],
[False, False, False, False],
[False, False, False, False]])
>>> a > 3
array([[False, False, False, False],
[ True, True, True, True],
[ True, True, True, True]])
>>> a < 3
array([[ True, True, True, False],
[False, False, False, False],
[False, False, False, False]])
5.4 a.sum(), np.sum()
输入help(np.sum)
查看函数介绍,或者是在numpy
官方文档中查看
np.sum(a, axis=(0, 1), keepdims=True)
>>> a = a.reshape(2, 5)
>>> np.sum(a)
45
>>> np.sum(a, axis=[0])
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<__array_function__ internals>", line 180, in sum
File "/home/xyg/.conda/envs/xyg_ml/lib/python3.8/site-packages/numpy/core/fromnumeric.py", line 2296, in sum
return _wrapreduction(a, np.add, 'sum', axis, dtype, out, keepdims=keepdims,
File "/home/xyg/.conda/envs/xyg_ml/lib/python3.8/site-packages/numpy/core/fromnumeric.py", line 86, in _wrapreduction
return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
TypeError: 'list' object cannot be interpreted as an integer
>>> a
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
>>> np.sum(a, axis=(0))
array([ 5, 7, 9, 11, 13])
>>> np.sum(a, axis=(0,1))
45 # tuple
>>> np.sum(a, axis=(0,1), keepdims=True)
array([[45]])
5.5 a.max(), np.max() & a.min(), np.min()
np.max(a, axis=None, out=None, keepdims=<no value>, initial=<no value>, where=<no value>)
当 axis=None
时候,返回一个 scalar
。
>>> import numpy as np
>>> x = np.random.random((3, 4))
>>> x
array([[0.62713328, 0.44203626, 0.50106157, 0.57240821],
[0.69146405, 0.12020252, 0.28365231, 0.93345497],
[0.66842843, 0.62938339, 0.66450048, 0.02996865]])
>>> x.max()
0.9334549671760228
>>> x.max((0))
array([0.69146405, 0.62938339, 0.66450048, 0.93345497])
>>> x.max(axis=0, keepdims=True)
array([[0.69146405, 0.62938339, 0.66450048, 0.93345497]])
5.6 np.dot(X,Y), X.dot(Y) , matmul
a
, b
中有一个 scalar
: numpy.multiply(a, b) == a * b
a
, b
全是 vector
np.dot(a, b)
a
, b
是矩阵 np.matmul(a, b) == a @ b
dot(...)
dot(a, b, out=None)
Dot product of two arrays. Specifically,
- If both `a` and `b` are 1-D arrays, it is inner product of vectors
(without complex conjugation).
# 二维
- If both `a` and `b` are 2-D arrays, it is matrix multiplication,
but using :func:`matmul` or ``a @ b`` is preferred.
- If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
- If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
the last axis of `a` and `b`.
# 最后一个计算方法蛮有意思的,不过也是矩阵乘法的扩展,行乘以列。
- If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
sum product over the last axis of `a` and the second-to-last axis of `b`::
dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
Examples
--------
>>> np.dot(3, 4)
12
Neither argument is complex-conjugated:
>>> np.dot([2j, 3j], [2j, 3j])
(-13+0j)
For 2-D arrays it is the matrix product:
>>> a = [[1, 0], [0, 1]]
>>> b = [[4, 1], [2, 2]]
>>> np.dot(a, b)
array([[4, 1],
[2, 2]])
>>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
>>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
>>> np.dot(a, b)[2,3,2,1,2,2]
499128
>>> sum(a[2,3,2,:] * b[1,2,:,2])
499128
5.7 np.random random&randn&normal
np.random.random(size)
在半开区间[0.0, 1.0)中生成 size 的随机矩阵。需要注意的是 size 形状往往是 tuple
,有时候 list
是可以的,但是对于不会需要的列表,还是使用tuple
更为规范。
>>> np.random.random((3, 4, 5, 6)).shape
(3, 4, 5, 6)
np.random.randn(size)
"normal" (Gaussian) distribution of mean 0 and variance 1
>>> np.random.randn(3, 4) * 2.5 + 3
array([[ 1.00632845, 1.75902885, -1.56125322, -2.16328798],
[ 2.13001675, 2.13981641, 0.05208978, 1.47386297],
[ 3.96550105, 1.74589195, 0.0784211 , 2.54371841]])
均值为3,方差为6.25,标准差为2.5的2X4数列
See Also
--------
standard_normal : Similar, but takes a tuple as its argument.
standard_normal : Similar, but takes a tuple as its argument.
np.random.normal()
\begin{equation}
\begin{aligned}
p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }} e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} }
\end{aligned}
\end{equation}
p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }} e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} }
loc : float or array_like of floats Mean (centre) of the distribution.
scale : float or array_like of floats Standard deviation (spread or width) of the distribution. Must be non-negative.
size : int or tuple of ints, optional
>>> mu, sigma = 0, 0.1
>>> np.random.normal(loc=mu, scale=sigma, size=(3, 4))
array([[-0.13928084, 0.00699425, 0.03469589, 0.10115385],
[-0.18465651, 0.05148013, 0.0391767 , 0.0492551 ],
[ 0.12339438, -0.08609649, -0.1076069 , -0.18704943]])
6. array 基础运算 2
6.1 argmax, argmin
def argmax(a, axis=None, out=None, *, keepdims=<no value>):
"""
Returns the indices of the maximum values along an axis.
args:
a : array_like
Input array.
axis : int, optional
By default, the index is into the flattened array, otherwise
along the specified axis.
keepdims : bool, optional
"""
pass
# Examples
# --------
>>> np.random.normal(loc=5, scale=4, size=(3, 4))
array([[10.51915676, -1.12393074, 1.62767543, 5.9267231 ],
[-0.370581 , 4.88070428, 16.82943086, 12.92744001],
[-6.55832997, -2.52102516, 2.97808364, 12.65201042]])
>>> a = np.random.normal(loc=5, scale=4, size=(3, 4))
>>> a.argmax()
7
>>> a.argmax(axis=0)
array([1, 0, 1, 1])
>>> a.argmax(axis=0, keepdims=True)
array([[1, 0, 1, 1]])
6.2 np.median, np.average
median(a, axis=None, out=None, overwrite_input=False, keepdims=False)
def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
"""
Compute the median along the specified axis.
Returns the median of the array elements.
Args:
a : array_like
Input array or object that can be converted to an array.
axis : {int, sequence of int, None}, optional
Axis or axes along which the medians are computed. The default
is to compute the median along a flattened version of the array.
A sequence of axes is supported since version 1.9.0.
out : ndarray, optional
Alternative output array in which to **place the result**. It must
have the same shape and buffer length as the expected output,
but the type (of the output) will be cast if necessary.
overwrite_input : bool, optional
If True, then allow use of memory of input array `a` for
calculations. The input array will be modified by the call to
`median`. **This will save memory when you do not need to preserve
the contents of the input array**. **Treat the input as undefined,
but it will probably be fully or partially sorted**. Default is
False. If `overwrite_input` is ``True`` and `a` is not already an
`ndarray`, an error will be raised.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `arr`.
"""
pass
# Examples
# --------
>>> a = np.array([[10, 7, 4], [3, 2, 1]])
>>> a
array([[10, 7, 4],
[ 3, 2, 1]])
>>> np.median(a)
3.5
>>> np.median(a, axis=0)
array([6.5, 4.5, 2.5])
>>> b = a.copy()
>>> m = np.zeros(3)
>>> np.median(a, axis=0, out=m) # out=m
array([6.5, 4.5, 2.5])
>>> m
array([6.5, 4.5, 2.5])
>>> t = np.median(a, axis=0, out=m)
>>> t
array([6.5, 4.5, 2.5])
>>> m
array([6.5, 4.5, 2.5])
>>> np.median(b, axis=0, keepdims=True, overwrite_input=True) # overwrite_input=True, a 就被用过来排序了
array([[6.5, 4.5, 2.5]])
>>> b
array([[ 3, 2, 1],
[10, 7, 4]])
>>> a
array([[10, 7, 4],
[ 3, 2, 1]])
average(a, axis=None, weights=None, returned=False)
def average(a, axis=None, weights=None, returned=False):
"""
Compute the weighted average along the specified axis.
Args:
a : array_like
Array containing data to be averaged. If `a` is not an array, a
conversion is attempted.
axis : None or int or tuple of ints, optional
Axis or axes along which to average `a`. The default,
axis=None, will average over all of the elements of the input array.
If axis is negative it counts from the last to the first axis.
.. versionadded:: 1.7.0
If axis is a tuple of ints, averaging is performed on all of the axes
specified in the tuple instead of a single axis or all the axes as
before.
**weights** : array_like, optional
An array of weights associated with the values in `a`. Each value in
`a` contributes to the average according to its associated weight.
The weights array can either be 1-D (in which case its length must be
the size of `a` along the given axis) or of the same shape as `a`.
If `weights=None`, then all data in `a` are assumed to have a
weight equal to one. The 1-D calculation is::
**avg = sum(a * weights) / sum(weights)**
The only constraint on `weights` is that `sum(weights)` must not be 0.
returned : bool, optional
Default is `False`. If `True`, the tuple (`average`, `sum_of_weights`)
is returned, otherwise only the average is returned.
If `weights=None`, `sum_of_weights` is equivalent to the number of
elements over which the average is taken.
"""
pass
# Examples
# --------
>>> import numpy as np
>>> a = np.arange(10).reshape(2, 5)
>>> weight = np.random.random((2, 5))
>>> np.average(a, axis=(1), weights=weight) # all value weighted
array([2.05282935, 7.30380309])
>>> np.average(a, axis=(1), weights=(1, 2, 3, 4, 5)) # just along the axis weighted
array([2.66666667, 7.66666667])
6.3 np.cumsum()
cumulate -> cummulative
cumulative sum
def cumsum(a, axis=None, dtype=None, out=None):
"""
Return the cumulative sum of the elements along a given axis.
Args:
a : array_like
Input array.
axis : int, optional
Axis along which the cumulative sum is computed. The default
(None) is to compute the cumsum over the flattened array.
dtype : dtype, optional
Type of the returned array and of the accumulator in which the
elements are summed. If `dtype` is not specified, it defaults
to the dtype of `a`, unless `a` has an integer dtype with a
precision less than that of the default platform integer. In
that case, the default platform integer is used.
out : ndarray, optional
Alternative output array in which to place the result. It must
have the same shape and buffer length as the expected output
but the type will be cast if necessary. See :ref:`ufuncs-output-type` for
more details
"""
pass
# Examples
# --------
>>> a = np.arange(12).reshape(3, 4)
>>> a.cumsum()
array([ 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66])
>>> a.cumsum(axis=0)
array([[ 0, 1, 2, 3],
[ 4, 6, 8, 10],
[12, 15, 18, 21]])
>>> a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
6.4 np.diff()
def diff(a, n=1, axis=-1, prepend=<no value>, append=<no value>):
"""
Calculate the n-th discrete difference along the given axis.
The first difference is given by ``out[i] = a[i+1] - a[i]`` along
the given axis, higher differences are calculated by using `diff`
recursively.
Args:
a : array_like
Input array
n : int, optional
The number of times values are differenced. If zero, the input
is returned as-is.
axis : int, optional
The axis along which the difference is taken, **default is the
last axis.**
prepend, append : array_like, optional
Values to prepend or append to `a` along axis prior to
performing the difference. Scalar values are expanded to
arrays with length 1 in the direction of axis and the shape
of the input array in along all other axes. Otherwise the
dimension and shape must match `a` except along axis.
.. versionadded:: 1.16.0
Returns
-------
diff : ndarray
The n-th differences. The shape of the output is the same as `a`
**except along `axis` where the dimension is smaller by `n`**. The
type of the output is the same as the type of the difference
between any two elements of `a`. This is the same as the type of
`a` in most cases. A notable exception is `datetime64`, which
results in a `timedelta64` output array.
"""
pass
# Examples
# --------
>>> a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> a.diff()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'numpy.ndarray' object has no attribute 'diff'
>>> np.diff(a)
array([[1, 1, 1],
[1, 1, 1],
[1, 1, 1]])
>>> np.diff(a, axis=0)
array([[4, 4, 4, 4],
[4, 4, 4, 4]])
>>> np.diff(a, axis=0, n=2) # 二阶差分
array([[0, 0, 0, 0]])
6.5 np.sort()
a.sort() is an insplace operation
np.sort(a) does not change the value of array a
create a structured array
def sort(a, axis=-1, kind=None, order=None):
"""
Return a sorted copy of an array.
Args:
a : array_like
Array to be sorted.
axis : int or None, optional
Axis along which to sort. If None, the array is flattened before
sorting. The default is -1, which sorts along the last axis. last axis is fastest for
kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
Sorting algorithm. The default is 'quicksort'. Note that both 'stable'
and 'mergesort' use timsort or radix sort under the covers and, in general,
the actual implementation will vary with data type. The 'mergesort' option
is retained for backwards compatibility.
.. versionchanged:: 1.15.0.
The 'stable' option was added.
order : str or list of str, optional
When `a` is an array with fields defined, this argument specifies
which fields to compare first, second, etc. A single field can
be specified as a string, and not all fields need be specified,
but unspecified fields will still be used, in the order in which
they come up in the dtype, to break ties.
Returns
-------
sorted_array : ndarray
Array of the same type and shape as `a`.
"""
# Examples
# --------
# a.sort() inplace
>>> a = np.random.normal(loc=0, scale=1, size=(3, 4))
>>> a
array([[ 0.30628017, 0.02845933, -0.00328401, -1.26124011],
[ 1.03614003, -1.37437447, 0.92636578, -0.98164326],
[ 0.50566727, 0.75049298, 0.04638059, -0.29860511]])
>>> a.sort(axis=-1)
>>> a
array([[-1.26124011, -0.00328401, 0.02845933, 0.30628017],
[-1.37437447, -0.98164326, 0.92636578, 1.03614003],
[-0.29860511, 0.04638059, 0.50566727, 0.75049298]])
# np.sort()
>>> a = np.random.normal(loc=0, scale=1, size=(3, 4))
>>> a
array([[-1.619563 , -0.97776701, 0.61543684, 0.11918334],
[ 0.25938769, -0.2363774 , 0.64845272, -0.15611945],
[-0.5919179 , 0.00845824, -1.18084254, 0.92008597]])
>>> np.sort(a, axis=-1)
array([[-1.619563 , -0.97776701, 0.11918334, 0.61543684],
[-0.2363774 , -0.15611945, 0.25938769, 0.64845272],
[-1.18084254, -0.5919179 , 0.00845824, 0.92008597]])
# create a structured array
>>> dtype_list = [('name', 's10'), ('age', int), ('height', float))
File "<stdin>", line 1
dtype_list = [('name', 's10'), ('age', int), ('height', float))
^
SyntaxError: closing parenthesis ')' does not match opening parenthesis '['
>>> dtype_list = [('name', 's10'), ('age', int), ('height', float)]
>>> value_list = [('xyg', 22, 174.0), ('msz', 20, 183.0), ('wy', 21, 160.0)]
>>> a = np.array(value_list, dtype=dtype_list)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: data type 's10' not understood
>>> dtype_list = [('name', 'S10'), ('age', int), ('height', float)]
>>> a = np.array(value_list, dtype=dtype_list)
>>> a
array([(b'xyg', 22, 174.), (b'msz', 20, 183.), (b'wy', 21, 160.)],
dtype=[('name', 'S10'), ('age', '<i8'), ('height', '<f8')])
>>> np.sort(a, kind='quicksort', order=['age', 'height'])
array([(b'msz', 20, 183.), (b'wy', 21, 160.), (b'xyg', 22, 174.)],
dtype=[('name', 'S10'), ('age', '<i8'), ('height', '<f8')])
6.6 np.transpose(), np.clip()
transpose
的顺序,按照转换前的顺序走就好了
比如 a.transpose(2, 1, 0),那么 dim 转换关系为 (0<-2, 1<-1, 2<-0)
也就是说按照原来的走最后最后一个维 2,相当于走现在的第一维 0。
有点类似于 C++ 数组的下标变换。
def transpose(a, axes=None):
"""
Reverse or permute the axes of an array; returns the modified array.
For an array a with two axes, transpose(a) gives the matrix transpose.
Refer to `numpy.ndarray.transpose` for full documentation.
Args:
a : array_like
Input array.
axes : **tuple or list of ints**, optional
If specified, it must be a tuple or list which contains a permutation of
[0,1,..,N-1] where N is the number of axes of a. The i'th axis of the
returned array will correspond to the axis numbered ``axes[i]`` of the
input. If not specified, defaults to ``range(a.ndim)[::-1]``, which
reverses the order of the axes.
"""
pass
# Examples
# --------
>>> np.ones((2, 3, 4)).transpose().shape
(4, 3, 2)
>>> np.ones((2, 3, 4, 5)).transpose([1, 3, 2, 0]).shape
(3, 5, 4, 2)
>>> x = np.arange(18).reshape(2, 3, 3)
>>> x
array([[[ 0, 1, 2],
[ 3, 4, 5],
[ 6, 7, 8]],
[[ 9, 10, 11],
[12, 13, 14],
[15, 16, 17]]])
>>> x.transpose([2, 1, 0]) # 0->2, 1->1, 2->0, 如果还是按照原来的顺序走,你会发现一模一样
array([[[ 0, 9],
[ 3, 12],
[ 6, 15]],
[[ 1, 10],
[ 4, 13],
[ 7, 16]],
[[ 2, 11],
[ 5, 14],
[ 8, 17]]])
np.clip()
注意 a_min, a_max 的 broadcasting
def clip(a, a_min, a_max, out=None, **kwargs):
"""
Clip (limit) the values in an array.
Given an interval, values outside the interval are clipped to
the interval edges. For example, if an interval of ``[0, 1]``
is specified, values smaller than 0 become 0, and values larger
than 1 become 1.
Equivalent to but faster than ``np.minimum(a_max, np.maximum(a, a_min))``.
No check is performed to ensure ``a_min < a_max``.
Args:
a : array_like
Array containing elements to clip.
a_min, a_max : array_like or None
Minimum and maximum value. If ``None``, clipping is not performed on
the corresponding edge. Only one of `a_min` and `a_max` may be
``None``. Both are broadcast against `a`.
out : ndarray, optional
The results will be placed in this array. It may be the input
array for in-place clipping. `out` must be of the right shape
to hold the output. Its type is preserved.
"""
pass
# Examples
# --------
>>> a
array([[ -5.80884808, 0.66381746, -4.09271404, 2.95109915],
[ 2.48026049, -2.70753274, -2.92750608, 5.55364944],
[ 1.15303987, -3.29454651, 0.76547096, -10.20975501],
[ 1.85208623, 6.58854311, -6.15662336, 2.34428651],
[ 0.31991729, 0.05057278, 0.08164345, -9.88275984],
[ -5.25614517, -4.56251041, 0.54267897, -6.27214703],
[ -2.31286108, 4.2978882 , 9.08731054, -3.64983076],
[ -2.28896367, 2.21334178, -3.14408534, -0.79578716],
[ 0.47911737, -2.08568072, 0.87718787, -5.79884939],
[ 3.29014118, -0.43004713, -2.40442335, 6.62272816]])
>>> np.clip(a, a_min=np.array([-0.5, 0, 0.5, 1.0]), a_max=np.array([4.0, 5.0, 5.5, 6])) # broadcasting
array([[-0.5 , 0.66381746, 0.5 , 2.95109915],
[ 2.48026049, 0. , 0.5 , 5.55364944],
[ 1.15303987, 0. , 0.76547096, 1. ],
[ 1.85208623, 5. , 0.5 , 2.34428651],
[ 0.31991729, 0.05057278, 0.5 , 1. ],
[-0.5 , 0. , 0.54267897, 1. ],
[-0.5 , 4.2978882 , 5.5 , 1. ],
[-0.5 , 2.21334178, 0.5 , 1. ],
[ 0.47911737, 0. , 0.87718787, 1. ],
[ 3.29014118, 0. , 0.5 , 6. ]])
7. array的索引
这个是否[]外是有区别的,相当于对[]后的结果再次操作
A[1, 2] == A[1][2]
A[:], A[::-1][::2]==A[::-2]
A[:,2] != A[:][2]
for row in A:
for col in A.T:
for item in A.flatten():
for item in A.flat:
# Examples
# --------
>>> a = np.random.randn(3, 3, 4)
>>> a
array([[[ 0.11162199, 0.50899669, 1.43875657, -0.18631797],
[-0.2353997 , 0.04718415, -0.59639872, -1.80883905],
[-0.3713679 , 1.61261631, 1.12475733, 0.05203711]],
[[ 1.90756754, -0.38983929, 1.1408755 , 0.71429065],
[ 0.15868841, 0.00850406, 1.87853773, 0.27239486],
[-0.18476569, -1.35824722, 0.4410609 , -0.16518748]],
[[ 1.18479233, -0.72387571, -0.64624958, -1.0462974 ],
[-0.85524974, -0.63754905, 0.08601103, -0.15196145],
[ 0.06569163, -0.13758958, 0.08397635, -0.00511833]]])
>>> a[:, :, 1]
array([[ 0.50899669, 0.04718415, 1.61261631],
[-0.38983929, 0.00850406, -1.35824722],
[-0.72387571, -0.63754905, -0.13758958]])
>>> b = a[:, :, 0]
>>> for row in b:
... print(row)
...
[ 0.11162199 -0.2353997 -0.3713679 ]
[ 1.90756754 0.15868841 -0.18476569]
[ 1.18479233 -0.85524974 0.06569163]
>>> for col in b.T:
... print(col)
...
[0.11162199 1.90756754 1.18479233]
[-0.2353997 0.15868841 -0.85524974]
[-0.3713679 -0.18476569 0.06569163]
>>> for item in b.flat:
... print(item)
...
0.11162198964726565
-0.23539969530784108
-0.37136789566884293
1.9075675435746606
0.15868840708629267
-0.184765694338286
1.1847923307596742
-0.8552497441876045
0.06569162775697603
8. array的合并
8.1 np.vstack() & np.hstack()
def vstack(tup):
"""
Stack arrays in sequence vertically (row wise).
This is equivalent to concatenation along the first axis after 1-D arrays
of shape `(N,)` have been reshaped to `(1,N)`. Rebuilds arrays divided by
`vsplit`.
This function makes most sense for arrays with up to 3 dimensions. For
instance, for pixel-data with a height (first axis), width (second axis),
and r/g/b channels (third axis). The functions `concatenate`, `stack` and
`block` provide more general stacking and concatenation operations.
Parameters
----------
tup : sequence of ndarrays
The arrays must have the same shape along all but the first axis.
1-D arrays must have the same length.
Returns
-------
stacked : ndarray
The array formed by stacking the given arrays, will be at least 2-D.
"""
pass
# Examples
# --------
>>> a = np.array([3, 2, 1]) # (3,)
>>> b = np.array([6, -1, 8]) # (3,)
>>> np.vstack((a, b)).shape
(2, 3)
>>>
>>> np.vstack((a, b)).shape
(2, 3)
>>> np.vstack((a, b))
array([[ 3, 2, 1],
[ 6, -1, 8]])
>>> a = np.arange(0, 3).reshape(1, 3); b = np.arange(-3, 0).reshape(1, 3)
>>> a
array([[0, 1, 2]])
>>> b
array([[-3, -2, -1]])
>>> np.vstack((a, b))
array([[ 0, 1, 2],
[-3, -2, -1]])
>>> a = np.array([[1], [2], [3]]) # (3, 1)
>>> b = np.array([[4], [5], [6]]) # (3, 1)
>>> a.shape
(3, 1)
>>> b
array([[4],
[5],
[6]])
>>> np.vstack((a, b)) # (6, 1)
array([[1],
[2],
[3],
[4],
[5],
[6]])
def hstack(tup):
"""
Stack arrays in sequence horizontally (column wise).
This is equivalent to concatenation along the **second axis**, except for 1-D
arrays where it concatenates along the first axis. Rebuilds arrays divided
by `hsplit`.
This function makes most sense for arrays with up to 3 dimensions. For
instance, for pixel-data with a height (first axis), width (second axis),
and r/g/b channels (third axis). The functions `concatenate`, `stack` and
`block` provide more general stacking and concatenation operations.
Parameters
----------
tup : sequence of ndarrays
The arrays must have the same shape along all but the second axis,
except 1-D arrays which can be any length.
Returns
-------
stacked : ndarray
The array formed by stacking the given arrays.
"""
pass
# Examples
# --------
>>> a = np.array((1,2,3))
>>> b = np.array((4,5,6))
>>> np.hstack((a,b))
array([1, 2, 3, 4, 5, 6])
>>> a = np.array([[1],[2],[3]])
>>> b = np.array([[4],[5],[6]])
>>> np.hstack((a,b))
array([[1, 4],
[2, 5],
[3, 6]])
8.2 np.concatenate((A, B), axis=0)
def concatenate(...):
concatenate((a1, a2, ...), axis=0, out=None, dtype=None, casting="same_kind")
Join a sequence of arrays along an existing axis.
Parameters
----------
a1, a2, ... : sequence of array_like
The arrays must have the same shape, except in the dimension
corresponding to `axis` (the first, by default).
axis : int, optional
The axis along which the arrays will be joined. If axis is None,
arrays are flattened before use. Default is 0.
out : ndarray, optional
If provided, the destination to place the result. The shape must be
correct, matching that of what concatenate would have returned if no
out argument were specified.
dtype : str or dtype
If provided, the destination array will have this dtype. Cannot be
provided together with `out`.
.. versionadded:: 1.20.0
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'same_kind'.
.. versionadded:: 1.20.0
Returns
-------
res : ndarray
The concatenated array
# Examples
# --------
>>> a = np.random.randn(3, 4, 5)
>>> b = np.random.randn(1, 4, 5)
>>> c = np.random.randn(5, 4, 5)
>>> d = np.random.randn(10, 1, 5)
>>> np.concatenate((a, b, c), axis=0).shape
(9, 4, 5)
>>> np.concatenate((a, b, c, d), axis=0).shape
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<__array_function__ internals>", line 180, in concatenate
ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 4 and the array at index 3 has size 1
>>> np.concatenate((a, b, c, d), axis=0, casting='unsafe').shape
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<__array_function__ internals>", line 180, in concatenate
ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 4 and the array at index 3 has size 1
>>> np.concatenate((a, b, c, d), axis=0, casting='safe').shape
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<__array_function__ internals>", line 180, in concatenate
ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 4 and the array at index 3 has size 1
9. array的分割
9.1 np.split()
必须整除
def split(ary, indices_or_sections, axis=0):
"""
Split an array into multiple sub-arrays as views into `ary`.
Parameters
----------
ary : ndarray
Array to be divided into sub-arrays.
indices_or_sections : int or 1-D array
If `indices_or_sections` is an integer, N, the array will be divided
into N equal arrays along `axis`. If such a split is not possible,
an error is raised.
If `indices_or_sections` is **a 1-D array of sorted integers**, the entries
indicate where along `axis` the array is split. For example,
``[2, 3]`` would, for ``axis=0``, result in
- ary[:2]
- ary[2:3]
- ary[3:] # ------------------
If an index exceeds the dimension of the array along `axis`,
an empty sub-array is returned correspondingly.
axis : int, optional
The axis along which to split, default is 0.
Returns
-------
sub-arrays : list of ndarrays
A list of sub-arrays as views into `ary`.
"""
pass
# Examples
# --------
>>> a = np.arange(3 * 4).reshape(3, 4)
>>> np.split(a, 2, axis=1)
[array([[0, 1],
[4, 5],
[8, 9]]), array([[ 2, 3],
[ 6, 7],
[10, 11]])]
>>> np.split(a, 2, axis=0)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<__array_function__ internals>", line 180, in split
File "/home/xyg/.conda/envs/xyg_ml/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 872, in split
raise ValueError(
ValueError: array split does not result in an equal division
9.2 np.array_split()
相比于 np.split(), np.array_split 更为常用,不要求均分
Help on function array_split in module numpy:
def array_split(ary, indices_or_sections, axis=0):
"""
Split an array into multiple sub-arrays.
Please refer to the ``split`` documentation. The only difference
between these functions is that ``array_split`` allows
`indices_or_sections` to be an integer that does *not* equally
divide the axis. For an array of length l that should be split
into n sections, it returns l % n sub-arrays of size l//n + 1
and the rest of size l//n.
See Also
--------
split : Split array into multiple sub-arrays of equal size.
"""
pass
# Examples
# --------
>>> a = np.arange(3 * 4).reshape(3, 4)
>>> np.array_split(a, 2, axis=0)
[array([[0, 1, 2, 3],
[4, 5, 6, 7]]), array([[ 8, 9, 10, 11]])]
9.3 np.vsplit()
和np.split
一样必须是均分
def vsplit(ary, indices_or_sections):
"""
Split an array into multiple sub-arrays vertically (row-wise).
Please refer to the ``split`` documentation. ``vsplit`` is **equivalent**
to ``split`` with `axis=0` (default), the array is always split along the
**first axis** regardless of the array dimension.
See Also
--------
split : Split an array into multiple sub-arrays of equal size.
"""
# Examples
# --------
>>> x = np.arange(16.0).reshape(4, 4)
>>> x
array([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.],
[12., 13., 14., 15.]])
>>> np.vsplit(x, 2)
[array([[0., 1., 2., 3.],
[4., 5., 6., 7.]]), array([[ 8., 9., 10., 11.],
[12., 13., 14., 15.]])]
>>> np.vsplit(x, np.array([3, 6]))
[array([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]]), array([[12., 13., 14., 15.]]), array([], shape=(0, 4), dtype=float64)]
9.4 np.hsplit()
和np.split
一样必须是均分
def hsplit(ary, indices_or_sections):
"""
Split an array into multiple sub-arrays horizontally (column-wise).
Please refer to the `split` documentation. `hsplit` is equivalent
to `split` with **``axis=1``**, the array is always split along the second
axis regardless of the array dimension.
See Also
--------
split : Split an array into multiple sub-arrays of equal size.
"""
pass
# Examples
# --------
>>> x = np.arange(16.0).reshape(4, 4)
>>> x
array([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.],
[12., 13., 14., 15.]])
>>> np.hsplit(x, 2)
[array([[ 0., 1.],
[ 4., 5.],
[ 8., 9.],
[12., 13.]]),
array([[ 2., 3.],
[ 6., 7.],
[10., 11.],
[14., 15.]])]
>>> np.hsplit(x, np.array([3, 6]))
[array([[ 0., 1., 2.],
[ 4., 5., 6.],
[ 8., 9., 10.],
[12., 13., 14.]]),
array([[ 3.],
[ 7.],
[11.],
[15.]]),
array([], shape=(4, 0), dtype=float64)] # attention this empty ndarray
10. array的copy
a.copy()
可以真正开辟内存。
这一点和列表不一样。
# ######### 使用 : 简单复制 #########
>>> a = np.arange(3 * 4).reshape(3, 4)
>>> a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> b = a[:]
>>> b
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> b[0, 0] = -8
>>> a
array([[-8, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> id(a), id(b)
(140429245775472, 140429245774896)
>>> id(a[0, 0]), id(b[0, 0]) # id ------------- 一样
(140429481610864, 140429481610864)
# ######### 使用 copy 复制 #########
>>> a
array([[-8, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> a = np.arange(3 * 4).reshape(3, 4)
>>> a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> b = a.copy()
>>> b[0, 0] = -8
>>> a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> id(a), id(b)
(140429245774896, 140429246035408)
>>> id(a[0, 0]), id(b[0, 0]) # id 虽然一样,但是数值不一样,有点离谱。。。。
(140429452734128, 140429452734128)
>>> a[0, 0], b[0, 0]
(0, -8)