萌新向Python数据分析及数据挖掘 第二章 pandas 第四节 NumPy Basics: Arrays and Vectorized Computation¶
NumPy Basics: Arrays and Vectorized Computation
In [1]:
import numpy as np
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)
In [2]:
import numpy as np
my_arr = np.arange(1000000)
my_list = list(range(1000000))
In [3]:
%time for _ in range(10): my_arr2 = my_arr * 2 #数组计算快很多
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]
The NumPy ndarray: A Multidimensional Array Object
In [4]:
import numpy as np
# Generate some random data
data = np.random.randn(2, 3)
data
Out[4]:
In [5]:
data * 10
data + data
Out[5]:
In [7]:
data.shape
Out[7]:
In [8]:
data.dtype
Out[8]:
Creating ndarrays
In [9]:
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
arr1
Out[9]:
In [10]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2
Out[10]:
In [13]:
arr2.ndim#维度
Out[13]:
In [17]:
arr2.shape
Out[17]:
In [15]:
arr1.dtype
Out[15]:
In [16]:
arr2.dtype
Out[16]:
In [20]:
np.zeros(10)
Out[20]:
In [23]:
np.empty((2, 3, 2))
Out[23]:
Docstring:
empty(shape, dtype=float, order='C')
Return a new array of given shape and type, without initializing entries.
Parameters
----------
shape : int or tuple of int
Shape of the empty array
dtype : data-type, optional
Desired output data-type.
order : {'C', 'F'}, optional
Whether to store multi-dimensional data in row-major
(C-style) or column-major (Fortran-style) order in
memory.
Returns
-------
out : ndarray
Array of uninitialized (arbitrary) data of the given shape, dtype, and
order. Object arrays will be initialized to None.
See Also
--------
empty_like, zeros, ones
Notes
-----
`empty`, unlike `zeros`, does not set the array values to zero,
and may therefore be marginally faster. On the other hand, it requires
the user to manually set all the values in the array, and should be
used with caution.
Examples
--------
>>> np.empty([2, 2])
array([[ -9.74499359e+001, 6.69583040e-309],
[ 2.13182611e-314, 3.06959433e-309]]) #random
>>> np.empty([2, 2], dtype=int)
array([[-1073741821, -1067949133],
[ 496041986, 19249760]]) #random
Type: builtin_function_or_method
In [24]:
np.zeros((3, 6))
Out[24]:
Docstring:
zeros(shape, dtype=float, order='C')
Return a new array of given shape and type, filled with zeros.
Parameters
In [25]:
np.arange(15)
Out[25]:
Docstring:
zeros(shape, dtype=float, order='C')
Return a new array of given shape and type, filled with zeros.
Parameters
Data Types for ndarrays
In [26]:
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([1, 2, 3], dtype=np.int32)
arr1.dtype
Out[26]:
In [27]:
arr2.dtype
Out[27]:
In [28]:
arr = np.array([1, 2, 3, 4, 5])
arr.dtype
Out[28]:
In [29]:
float_arr = arr.astype(np.float64)
float_arr.dtype
Out[29]:
In [30]:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr
Out[30]:
In [31]:
arr.astype(np.int32)
Out[31]:
In [32]:
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
numeric_strings.astype(float)
Out[32]:
In [33]:
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)
int_array.astype(calibers.dtype#引用数据类型
Out[33]:
In [34]:
empty_uint32 = np.empty(8, dtype='u4')
empty_uint32
Out[34]:
Arithmetic with NumPy Arrays
In [37]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr
Out[37]:
In [38]:
arr * arr
Out[38]:
In [39]:
arr - arr
Out[39]:
In [40]:
1 / arr
Out[40]:
In [41]:
arr ** 0.5#开方
Out[41]:
In [ ]:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2
arr2 > arr
Basic Indexing and Slicing
In [45]:
arr = np.arange(10)
arr
Out[45]:
In [46]:
arr[5]
Out[46]:
In [47]:
arr[5:8]
Out[47]:
In [48]:
arr[5:8] = 12
In [49]:
arr
Out[49]:
In [50]:
arr_slice = arr[5:8]
arr_slice
Out[50]:
In [51]:
arr_slice[1] = 12345
arr
Out[51]:
In [ ]:
arr_slice[:] = 64
arr
In [52]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]
Out[52]:
In [55]:
arr2d[0][2]
Out[55]:
In [58]:
arr2d[2, 2]
Out[58]:
In [59]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d
Out[59]:
In [64]:
arr3d[0,1,2]
Out[64]:
In [66]:
old_values = arr3d[0].copy()
arr3d[0] = 42
arr3d
Out[66]:
In [67]:
arr3d[0] = old_values
arr3d
Out[67]:
In [68]:
arr3d[1, 0]
Out[68]:
In [69]:
x = arr3d[1]
x
x[0]
Out[69]:
Indexing with slices
In [70]:
arr
arr[1:6]
Out[70]:
In [71]:
arr2d
arr2d[:2]
Out[71]:
In [72]:
arr2d[:2, 1:]
Out[72]:
In [73]:
arr2d[1, :2]
Out[73]:
In [74]:
arr2d[:2, 2]
Out[74]:
In [75]:
arr2d[:, :1]
Out[75]:
In [76]:
arr2d[:2, 1:] = 0
arr2d
Out[76]:
Boolean Indexing
In [78]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
names
Out[78]:
In [79]:
data
Out[79]:
In [80]:
names == 'Bob'
Out[80]:
In [81]:
data[names == 'Bob'] #数据筛选
Out[81]:
In [82]:
data[names == 'Bob', 2:]
data[names == 'Bob', 3]
Out[82]:
In [84]:
names != 'Bob'
Out[84]:
In [85]:
data[~(names == 'Bob')]#反向选择
Out[85]:
In [ ]:
In [86]:
cond = names == 'Bob'
data[~cond]
Out[86]:
In [87]:
mask = (names == 'Bob') | (names == 'Will')
mask
data[mask]
Out[87]:
In [88]:
data[data < 0] = 0 #筛选并修改数组
data
Out[88]:
In [89]:
data[names != 'Joe'] = 7
data
Out[89]:
Fancy Indexing
In [96]:
arr = np.empty((8, 4))
In [97]:
for i in range(8):
arr[i] = i
arr
Out[97]:
In [ ]:
In [98]:
arr[[4, 3, 0, 6]]#二维数组赋值
Out[98]:
In [99]:
arr[[-3, -5, -7]]
Out[99]:
In [101]:
arr = np.arange(32).reshape((8, 4))
arr
Out[101]:
In [102]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]#数组中提取特点位置的数字形成新的数组
Out[102]:
In [105]:
arr[[1, 5, 7, 2]][:, [2, 0, 1]]##数组中提取特点位置的数字并按特点顺序形成新的数组
Out[105]:
Transposing Arrays and Swapping Axes
In [107]:
arr = np.arange(15).reshape((3, 5))
arr
Out[107]:
In [108]:
arr.T
Out[108]:
In [109]:
arr = np.random.randn(6, 3)
arr
Out[109]:
In [111]:
np.dot(arr.T, arr)
Out[111]:
Docstring:
dot(a, b, out=None)
Dot product of two arrays. Specifically,
- If both `a` and `b` are 1-D arrays, it is inner product of vectors
(without complex conjugation).
- If both `a` and `b` are 2-D arrays, it is matrix multiplication,
but using :func:`matmul` or ``a @ b`` is preferred.
- If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
- If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
the last axis of `a` and `b`.
- If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
sum product over the last axis of `a` and the second-to-last axis of `b`::
dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
Parameters
----------
a : array_like
First argument.
b : array_like
Second argument.
out : ndarray, optional
Output argument. This must have the exact kind that would be returned
if it was not used. In particular, it must have the right type, must be
C-contiguous, and its dtype must be the dtype that would be returned
for `dot(a,b)`. This is a performance feature. Therefore, if these
conditions are not met, an exception is raised, instead of attempting
to be flexible.
Returns
-------
output : ndarray
Returns the dot product of `a` and `b`. If `a` and `b` are both
scalars or both 1-D arrays then a scalar is returned; otherwise
an array is returned.
If `out` is given, then it is returned.
Raises
------
ValueError
If the last dimension of `a` is not the same size as
the second-to-last dimension of `b`.
See Also
--------
vdot : Complex-conjugating dot product.
tensordot : Sum products over arbitrary axes.
einsum : Einstein summation convention.
matmul : '@' operator as method with out parameter.
Examples
--------
>>> np.dot(3, 4)
12
Neither argument is complex-conjugated:
>>> np.dot([2j, 3j], [2j, 3j])
(-13+0j)
For 2-D arrays it is the matrix product:
>>> a = [[1, 0], [0, 1]]
>>> b = [[4, 1], [2, 2]]
>>> np.dot(a, b)
array([[4, 1],
[2, 2]])
>>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
>>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
>>> np.dot(a, b)[2,3,2,1,2,2]
499128
>>> sum(a[2,3,2,:] * b[1,2,:,2])
499128
In [123]:
arr = np.arange(16).reshape((2, 2, 4))
arr
Out[123]:
In [120]:
arr.transpose((1, 2, 0))
Out[120]:
In [121]:
arr.transpose((1, 2, 0)).shape
Out[121]:
In [124]:
arr.transpose((2, 1, 0))
Out[124]:
In [125]:
arr.transpose((2, 1, 0)).shape
Out[125]:
In [127]:
arr.transpose((1, 0, 2))
Out[127]:
In [128]:
arr.transpose((1, 0, 2)).shape
Out[128]:
Docstring:
a.transpose(*axes)
Returns a view of the array with axes transposed.
For a 1-D array, this has no effect. (To change between column and
row vectors, first cast the 1-D array into a matrix object.)
For a 2-D array, this is the usual matrix transpose.
For an n-D array, if axes are given, their order indicates how the
axes are permuted (see Examples). If axes are not provided and
``a.shape = (i[0], i[1], ... i[n-2], i[n-1])``, then
``a.transpose().shape = (i[n-1], i[n-2], ... i[1], i[0])``.
Parameters
----------
axes : None, tuple of ints, or `n` ints
* None or no argument: reverses the order of the axes.
* tuple of ints: `i` in the `j`-th place in the tuple means `a`'s
`i`-th axis becomes `a.transpose()`'s `j`-th axis.
* `n` ints: same as an n-tuple of the same ints (this form is
intended simply as a "convenience" alternative to the tuple form)
Returns
-------
out : ndarray
View of `a`, with axes suitably permuted.
See Also
--------
ndarray.T : Array property returning the array transposed.
Examples
--------
>>> a = np.array([[1, 2], [3, 4]])
>>> a
array([[1, 2],
[3, 4]])
>>> a.transpose()
array([[1, 3],
[2, 4]])
>>> a.transpose((1, 0))
array([[1, 3],
[2, 4]])
>>> a.transpose(1, 0)
array([[1, 3],
[2, 4]])
In [116]:
arr
arr.swapaxes(1, 2)
Out[116]:
Docstring:
a.swapaxes(axis1, axis2)
Return a view of the array with `axis1` and `axis2` interchanged.
Refer to `numpy.swapaxes` for full documentation.
See Also
--------
numpy.swapaxes : equivalent function
Type: builtin_function_or_method
Universal Functions: Fast Element-Wise Array Functions
In [129]:
arr = np.arange(10)
arr
Out[129]:
In [130]:
np.sqrt(arr)
Out[130]:
In [131]:
np.exp(arr)
Out[131]:
In [132]:
x = np.random.randn(8)
y = np.random.randn(8)
x
Out[132]:
In [133]:
y
Out[133]:
In [134]:
np.maximum(x, y)
Out[134]:
In [143]:
arr = np.random.randn(7) * 5
remainder, whole_part = np.modf(arr)
arr
Out[143]:
Call signature: np.modf(*args, **kwargs)
Type: ufunc
String form: <ufunc 'modf'>
File: c:\users\qq123\anaconda3\lib\site-packages\numpy\__init__.py
Docstring:
modf(x[, out1, out2], / [, out=(None, None)], *, where=True, casting='same_kind', order='K', dtype=None, subok=True[, signature, extobj])
Return the fractional and integral parts of an array, element-wise.
The fractional and integral parts are negative if the given number is
negative.
Parameters
----------
x : array_like
Input array.
out : ndarray, None, or tuple of ndarray and None, optional
A location into which the result is stored. If provided, it must have
a shape that the inputs broadcast to. If not provided or `None`,
a freshly-allocated array is returned. A tuple (possible only as a
keyword argument) must have length equal to the number of outputs.
where : array_like, optional
Values of True indicate to calculate the ufunc at that position, values
of False indicate to leave the value in the output alone.
**kwargs
For other keyword-only arguments, see the
:ref:`ufunc docs <ufuncs.kwargs>`.
Returns
-------
y1 : ndarray
Fractional part of `x`.
y2 : ndarray
Integral part of `x`.
Notes
-----
For integer input the return values are floats.
See Also
--------
divmod : ``divmod(x, 1)`` is equivalent to ``modf`` with the return values
switched, except it always has a positive remainder.
Examples
--------
>>> np.modf([0, 3.5])
(array([ 0. , 0.5]), array([ 0., 3.]))
>>> np.modf(-0.5)
(-0.5, -0)
Class docstring:
Functions that operate element by element on whole arrays.
To see the documentation for a specific ufunc, use `info`. For
example, ``np.info(np.sin)``. Because ufuncs are written in C
(for speed) and linked into Python with NumPy's ufunc facility,
Python's help() function finds this page whenever help() is called
on a ufunc.
A detailed explanation of ufuncs can be found in the docs for :ref:`ufuncs`.
Calling ufuncs:
===============
op(*x[, out], where=True, **kwargs)
Apply `op` to the arguments `*x` elementwise, broadcasting the arguments.
The broadcasting rules are:
* Dimensions of length 1 may be prepended to either array.
* Arrays may be repeated along dimensions of length 1.
Parameters
----------
*x : array_like
Input arrays.
out : ndarray, None, or tuple of ndarray and None, optional
Alternate array object(s) in which to put the result; if provided, it
must have a shape that the inputs broadcast to. A tuple of arrays
(possible only as a keyword argument) must have length equal to the
number of outputs; use `None` for outputs to be allocated by the ufunc.
where : array_like, optional
Values of True indicate to calculate the ufunc at that position, values
of False indicate to leave the value in the output alone.
**kwargs
For other keyword-only arguments, see the :ref:`ufunc docs <ufuncs.kwargs>`.
Returns
-------
r : ndarray or tuple of ndarray
`r` will have the shape that the arrays in `x` broadcast to; if `out` is
provided, `r` will be equal to `out`. If the function has more than one
output, then the result will be a tuple of arrays.
In [141]:
remainder
Out[141]:
In [142]:
whole_part
Out[142]:
In [144]:
arr
np.sqrt(arr)
np.sqrt(arr, arr)
arr
Out[144]:
In [ ]:
Array-Oriented Programming with Arrays
In [145]:
points = np.arange(-5, 5, 0.01) # 1000 equally spaced points
In [158]:
xs, ys = np.meshgrid(points, points)
ys
Out[158]:
In [159]:
xs
Out[159]:
In [155]:
ys.shape
Out[155]:
In [164]:
z = np.sqrt(xs ** 2 + ys ** 2)
z
Out[164]:
In [166]:
import matplotlib.pyplot as plt
plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")
Out[166]:
In [167]:
%matplotlib inline
In [168]:
plt.draw()
In [ ]:
In [153]:
plt.close('all')
Expressing Conditional Logic as Array Operations
In [170]:
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])
In [171]:
result = [(x if c else y)
for x, y, c in zip(xarr, yarr, cond)]
result
Out[171]:
In [172]:
result = np.where(cond, xarr, yarr)
result
Out[172]:
In [173]:
arr = np.random.randn(4, 4)
arr
arr > 0
np.where(arr > 0, 2, -2)
Out[173]:
In [174]:
np.where(arr > 0, 2, arr) # set only positive values to 2
Out[174]:
Mathematical and Statistical Methods
In [177]:
arr = np.random.randn(5, 4)
print(arr)
print(arr.mean())
print(np.mean(arr))
print(arr.sum())
In [178]:
print(arr.mean(axis=1))
print(arr.sum(axis=0))
In [180]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7])
arr.cumsum()#累计和
Out[180]:
In [ ]:
Docstring:
a.cumsum(axis=None, dtype=None, out=None)
Return the cumulative sum of the elements along the given axis.
Refer to `numpy.cumsum` for full documentation.
See Also
--------
numpy.cumsum : equivalent function
Type: builtin_function_or_method
In [184]:
arr = np.array([[0, 1, 2], [3, 10, 5], [6, 7, 8]])
print(arr)
print(arr.cumsum(axis=0))
print(arr.cumprod(axis=0))
Docstring:
a.cumprod(axis=None, dtype=None, out=None)
Return the cumulative product of the elements along the given axis.
Refer to `numpy.cumprod` for full documentation.
See Also
--------
numpy.cumprod : equivalent function
Type: builtin_function_or_method
Methods for Boolean Arrays
In [185]:
arr = np.random.randn(100)
(arr > 0).sum() # Number of positive values
Out[185]:
In [187]:
bools = np.array([False, False, True, False])
bools.any()
Out[187]:
In [188]:
bools.all()
Out[188]:
In [ ]:
Sorting
In [189]:
arr = np.random.randn(6)
arr
Out[189]:
In [190]:
arr.sort()
arr
Out[190]:
In [191]:
arr = np.random.randn(5, 3)
arr
Out[191]:
In [197]:
arr.sort(-1)
arr
Out[197]:
In [ ]:
large_arr = np.random.randn(1000)
large_arr.sort()
large_arr[int(0.05 * len(large_arr))] # 5% quantile
Unique and Other Set Logic
In [198]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)
Out[198]:
In [199]:
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)
Out[199]:
In [200]:
sorted(set(names))
Out[200]:
In [202]:
values = np.array([6, 0, 0, 3, 2, 5, 6])
np.in1d(values, [2, 3, 6])
Out[202]:
Signature: np.in1d(ar1, ar2, assume_unique=False, invert=False)
Docstring:
Test whether each element of a 1-D array is also present in a second array.
Returns a boolean array the same length as `ar1` that is True
where an element of `ar1` is in `ar2` and False otherwise.
We recommend using :func:`isin` instead of `in1d` for new code.
Parameters
----------
ar1 : (M,) array_like
Input array.
ar2 : array_like
The values against which to test each value of `ar1`.
assume_unique : bool, optional
If True, the input arrays are both assumed to be unique, which
can speed up the calculation. Default is False.
invert : bool, optional
If True, the values in the returned array are inverted (that is,
False where an element of `ar1` is in `ar2` and True otherwise).
Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
to (but is faster than) ``np.invert(in1d(a, b))``.
.. versionadded:: 1.8.0
Returns
-------
in1d : (M,) ndarray, bool
The values `ar1[in1d]` are in `ar2`.
See Also
--------
isin : Version of this function that preserves the
shape of ar1.
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.
Notes
-----
`in1d` can be considered as an element-wise function version of the
python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
equivalent to ``np.array([item in b for item in a])``.
However, this idea fails if `ar2` is a set, or similar (non-sequence)
container: As ``ar2`` is converted to an array, in those cases
``asarray(ar2)`` is an object array rather than the expected array of
contained values.
.. versionadded:: 1.4.0
Examples
--------
>>> test = np.array([0, 1, 2, 5, 0])
>>> states = [0, 2]
>>> mask = np.in1d(test, states)
>>> mask
array([ True, False, True, False, True])
>>> test[mask]
array([0, 2, 0])
>>> mask = np.in1d(test, states, invert=True)
>>> mask
array([False, True, False, True, False])
>>> test[mask]
array([1, 5])
File Input and Output with Arrays
In [206]:
arr = np.arange(10)
np.save('some_array', arr)
Signature: np.savez(file, *args, **kwds)
Docstring:
Save several arrays into a single file in uncompressed ``.npz`` format.
If arguments are passed in with no keywords, the corresponding variable
names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword
arguments are given, the corresponding variable names, in the ``.npz``
file will match the keyword names.
Parameters
----------
file : str or file
Either the file name (string) or an open file (file-like object)
where the data will be saved. If file is a string or a Path, the
``.npz`` extension will be appended to the file name if it is not
already there.
args : Arguments, optional
Arrays to save to the file. Since it is not possible for Python to
know the names of the arrays outside `savez`, the arrays will be saved
with names "arr_0", "arr_1", and so on. These arguments can be any
expression.
kwds : Keyword arguments, optional
Arrays to save to the file. Arrays will be saved in the file with the
keyword names.
Returns
-------
None
See Also
--------
save : Save a single array to a binary file in NumPy format.
savetxt : Save an array to a file as plain text.
savez_compressed : Save several arrays into a compressed ``.npz`` archive
Notes
-----
The ``.npz`` file format is a zipped archive of files named after the
variables they contain. The archive is not compressed and each file
in the archive contains one variable in ``.npy`` format. For a
description of the ``.npy`` format, see `numpy.lib.format` or the
NumPy Enhancement Proposal
http://docs.scipy.org/doc/numpy/neps/npy-format.html
When opening the saved ``.npz`` file with `load` a `NpzFile` object is
returned. This is a dictionary-like object which can be queried for
its list of arrays (with the ``.files`` attribute), and for the arrays
themselves.
Examples
--------
>>> from tempfile import TemporaryFile
>>> outfile = TemporaryFile()
>>> x = np.arange(10)
>>> y = np.sin(x)
Using `savez` with \*args, the arrays are saved with default names.
>>> np.savez(outfile, x, y)
>>> outfile.seek(0) # Only needed here to simulate closing & reopening file
>>> npzfile = np.load(outfile)
>>> npzfile.files
['arr_1', 'arr_0']
>>> npzfile['arr_0']
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Using `savez` with \**kwds, the arrays are saved with the keyword names.
>>> outfile = TemporaryFile()
>>> np.savez(outfile, x=x, y=y)
>>> outfile.seek(0)
>>> npzfile = np.load(outfile)
>>> npzfile.files
['y', 'x']
>>> npzfile['x']
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [204]:
np.load('some_array.npy')
Out[204]:
In [207]:
np.savez('array_archive.npz', a=arr, b=arr)
In [209]:
arch = np.load('array_archive.npz')
arch['b']
Out[209]:
In [210]:
np.savez_compressed('arrays_compressed.npz', a=arr, b=arr)
In [211]:
!rm some_array.npy
!rm array_archive.npz
!rm arrays_compressed.npz
Linear Algebra
In [213]:
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])
print(x)
print(y)
x.dot(y)
Out[213]:
In [ ]:
np.dot(x, y)
In [ ]:
np.dot(x, np.ones(3))
In [ ]:
x @ np.ones(3)
In [215]:
from numpy.linalg import inv, qr
X = np.random.randn(5, 5)
mat = X.T.dot(X)
mat
Out[215]:
In [217]:
inv(mat)
Out[217]:
Signature: inv(a)
Docstring:
Compute the (multiplicative) inverse of a matrix.
Given a square matrix `a`, return the matrix `ainv` satisfying
``dot(a, ainv) = dot(ainv, a) = eye(a.shape[0])``.
Parameters
----------
a : (..., M, M) array_like
Matrix to be inverted.
Returns
-------
ainv : (..., M, M) ndarray or matrix
(Multiplicative) inverse of the matrix `a`.
Raises
------
LinAlgError
If `a` is not square or inversion fails.
Notes
-----
.. versionadded:: 1.8.0
Broadcasting rules apply, see the `numpy.linalg` documentation for
details.
Examples
--------
>>> from numpy.linalg import inv
>>> a = np.array([[1., 2.], [3., 4.]])
>>> ainv = inv(a)
>>> np.allclose(np.dot(a, ainv), np.eye(2))
True
>>> np.allclose(np.dot(ainv, a), np.eye(2))
True
If a is a matrix object, then the return value is a matrix as well:
>>> ainv = inv(np.matrix(a))
>>> ainv
matrix([[-2. , 1. ],
[ 1.5, -0.5]])
Inverses of several matrices can be computed at once:
>>> a = np.array([[[1., 2.], [3., 4.]], [[1, 3], [3, 5]]])
>>> inv(a)
array([[[-2. , 1. ],
[ 1.5, -0.5]],
[[-5. , 2. ],
[ 3. , -1. ]]])
In [219]:
mat.dot(inv(mat))
Out[219]:
In [221]:
q, r = qr(mat)
r
Out[221]:
In [222]:
q
Out[222]:
Pseudorandom Number Generation
In [223]:
samples = np.random.normal(size=(4, 4))
samples
Out[223]:
In [224]:
from random import normalvariate
N = 1000000
%timeit samples = [normalvariate(0, 1) for _ in range(N)]
%timeit np.random.normal(size=N)
In [225]:
np.random.seed(1234)
In [226]:
rng = np.random.RandomState(1234)
rng.randn(10)
Out[226]:
Example: Random Walks
In [227]:
import random
position = 0
walk = [position]
steps = 1000
for i in range(steps):
step = 1 if random.randint(0, 1) else -1
position += step
walk.append(position)
In [228]:
plt.figure()
Out[228]:
In [229]:
plt.plot(walk[:100])
Out[229]:
In [230]:
np.random.seed(12345)
In [231]:
nsteps = 1000
draws = np.random.randint(0, 2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()
In [232]:
walk.min()
walk.max()
Out[232]:
In [233]:
(np.abs(walk) >= 10).argmax()
Out[233]:
Simulating Many Random Walks at Once
In [234]:
nwalks = 5000
nsteps = 1000
draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # 0 or 1
steps = np.where(draws > 0, 1, -1)
walks = steps.cumsum(1)
walks
Out[234]:
In [235]:
walks.max()
walks.min()
Out[235]:
In [236]:
hits30 = (np.abs(walks) >= 30).any(1)
hits30
hits30.sum() # Number that hit 30 or -30
Out[236]:
In [237]:
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)
crossing_times.mean()
Out[237]:
In [238]:
steps = np.random.normal(loc=0, scale=0.25,
size=(nwalks, nsteps))