萌新向Python数据分析及数据挖掘 第二章 pandas 第四节 NumPy Basics: Arrays and Vectorized Computation¶

NumPy Basics: Arrays and Vectorized Computation

In [1]:
 
 
 
 
 
import numpy as np
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)
 
 
In [2]:
 
 
 
 
 
import numpy as np
my_arr = np.arange(1000000)
my_list = list(range(1000000))
 
 
In [3]:
 
 
 
 
 
%time for _ in range(10): my_arr2 = my_arr * 2 #数组计算快很多
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]
 
 
 
Wall time: 32 ms
Wall time: 1.18 s
 

The NumPy ndarray: A Multidimensional Array Object

In [4]:
 
 
 
 
 
import numpy as np
# Generate some random data
data = np.random.randn(2, 3)
data
 
 
Out[4]:
array([[-0.2047,  0.4789, -0.5194],
       [-0.5557,  1.9658,  1.3934]])
In [5]:
 
 
 
 
 
data * 10
data + data
 
 
Out[5]:
array([[-0.4094,  0.9579, -1.0389],
       [-1.1115,  3.9316,  2.7868]])
In [7]:
 
 
 
 
 
data.shape
 
 
Out[7]:
(2, 3)
In [8]:
 
 
 
 
 
data.dtype
 
 
Out[8]:
dtype('float64')
 

Creating ndarrays

In [9]:
 
 
 
 
 
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
arr1
 
 
Out[9]:
array([6. , 7.5, 8. , 0. , 1. ])
In [10]:
 
 
 
 
 
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2
 
 
Out[10]:
array([[1, 2, 3, 4],
       [5, 6, 7, 8]])
In [13]:
 
 
 
 
 
arr2.ndim#维度
 
 
Out[13]:
2
In [17]:
 
 
 
 
 
arr2.shape
 
 
Out[17]:
(2, 4)
In [15]:
 
 
 
 
 
arr1.dtype
 
 
Out[15]:
dtype('float64')
In [16]:
 
 
 
 
 
arr2.dtype
 
 
Out[16]:
dtype('int32')
In [20]:
 
 
 
 
 
np.zeros(10)
 
 
Out[20]:
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
In [23]:
 
 
 
 
 
np.empty((2, 3, 2))
 
 
Out[23]:
array([[[1.4437e-311, 3.1620e-322],
        [0.0000e+000, 0.0000e+000],
        [0.0000e+000, 4.5459e+174]],

       [[3.6964e-033, 1.1431e-071],
        [1.7259e-047, 5.3531e-038],
        [1.0054e-070, 2.5278e-052]]])
 
 
 
 
 
 
Docstring:
empty(shape, dtype=float, order='C')
Return a new array of given shape and type, without initializing entries.
Parameters
----------
shape : int or tuple of int
    Shape of the empty array
dtype : data-type, optional
    Desired output data-type.
order : {'C', 'F'}, optional
    Whether to store multi-dimensional data in row-major
    (C-style) or column-major (Fortran-style) order in
    memory.
Returns
-------
out : ndarray
    Array of uninitialized (arbitrary) data of the given shape, dtype, and
    order.  Object arrays will be initialized to None.
See Also
--------
empty_like, zeros, ones
Notes
-----
`empty`, unlike `zeros`, does not set the array values to zero,
and may therefore be marginally faster.  On the other hand, it requires
the user to manually set all the values in the array, and should be
used with caution.
Examples
--------
>>> np.empty([2, 2])
array([[ -9.74499359e+001,   6.69583040e-309],
       [  2.13182611e-314,   3.06959433e-309]])         #random
>>> np.empty([2, 2], dtype=int)
array([[-1073741821, -1067949133],
       [  496041986,    19249760]])                     #random
Type:      builtin_function_or_method
 
In [24]:
 
 
 
 
 
np.zeros((3, 6))
 
 
Out[24]:
array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])
 
 
 
 
 
 
Docstring:
zeros(shape, dtype=float, order='C')
Return a new array of given shape and type, filled with zeros.
Parameters
 
In [25]:
 
 
 
 
 
np.arange(15)
 
 
Out[25]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])
 
 
 
 
 
 
Docstring:
zeros(shape, dtype=float, order='C')
Return a new array of given shape and type, filled with zeros.
Parameters
 
 

Data Types for ndarrays

In [26]:
 
 
 
 
 
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([1, 2, 3], dtype=np.int32)
arr1.dtype
 
 
Out[26]:
dtype('float64')
In [27]:
 
 
 
 
 
arr2.dtype
 
 
Out[27]:
dtype('int32')
In [28]:
 
 
 
 
 
arr = np.array([1, 2, 3, 4, 5])
arr.dtype
 
 
Out[28]:
dtype('int32')
In [29]:
 
 
 
 
 
float_arr = arr.astype(np.float64)
float_arr.dtype
 
 
Out[29]:
dtype('float64')
In [30]:
 
 
 
 
 
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr
 
 
Out[30]:
array([ 3.7, -1.2, -2.6,  0.5, 12.9, 10.1])
In [31]:
 
 
 
 
 
arr.astype(np.int32)
 
 
Out[31]:
array([ 3, -1, -2,  0, 12, 10])
In [32]:
 
 
 
 
 
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
numeric_strings.astype(float)
 
 
Out[32]:
array([ 1.25, -9.6 , 42.  ])
In [33]:
 
 
 
 
 
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)
int_array.astype(calibers.dtype#引用数据类型
 
 
Out[33]:
array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
In [34]:
 
 
 
 
 
empty_uint32 = np.empty(8, dtype='u4')
empty_uint32
 
 
Out[34]:
array([         0, 1075314688,          0, 1075707904,          0,
       1075838976,          0, 1072693248], dtype=uint32)
 

Arithmetic with NumPy Arrays

In [37]:
 
 
 
 
 
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr
 
 
Out[37]:
array([[1., 2., 3.],
       [4., 5., 6.]])
In [38]:
 
 
 
 
 
arr * arr
 
 
Out[38]:
array([[ 1.,  4.,  9.],
       [16., 25., 36.]])
In [39]:
 
 
 
 
 
arr - arr
 
 
Out[39]:
array([[0., 0., 0.],
       [0., 0., 0.]])
In [40]:
 
 
 
 
 
1 / arr
 
 
Out[40]:
array([[1.    , 0.5   , 0.3333],
       [0.25  , 0.2   , 0.1667]])
In [41]:
 
 
 
 
 
arr ** 0.5#开方
 
 
Out[41]:
array([[1.    , 1.4142, 1.7321],
       [2.    , 2.2361, 2.4495]])
In [ ]:
 
 
 
 
 
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2
arr2 > arr
 
 
 

Basic Indexing and Slicing

In [45]:
 
 
 
 
 
arr = np.arange(10)
arr
 
 
Out[45]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [46]:
 
 
 
 
 
arr[5]
 
 
Out[46]:
5
In [47]:
 
 
 
 
 
arr[5:8]
 
 
Out[47]:
array([5, 6, 7])
In [48]:
 
 
 
 
 
arr[5:8] = 12
 
 
In [49]:
 
 
 
 
 
arr
 
 
Out[49]:
array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])
In [50]:
 
 
 
 
 
arr_slice = arr[5:8]
arr_slice
 
 
Out[50]:
array([12, 12, 12])
In [51]:
 
 
 
 
 
arr_slice[1] = 12345
arr
 
 
Out[51]:
array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])
In [ ]:
 
 
 
 
 
arr_slice[:] = 64
arr
 
 
In [52]:
 
 
 
 
 
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]
 
 
Out[52]:
array([7, 8, 9])
In [55]:
 
 
 
 
 
arr2d[0][2]
 
 
Out[55]:
3
In [58]:
 
 
 
 
 
arr2d[2, 2]
 
 
Out[58]:
9
In [59]:
 
 
 
 
 
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d
 
 
Out[59]:
array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])
In [64]:
 
 
 
 
 
arr3d[0,1,2]
 
 
Out[64]:
6
In [66]:
 
 
 
 
 
old_values = arr3d[0].copy()
arr3d[0] = 42
arr3d
 
 
Out[66]:
array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])
In [67]:
 
 
 
 
 
arr3d[0] = old_values
arr3d
 
 
Out[67]:
array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])
In [68]:
 
 
 
 
 
arr3d[1, 0]
 
 
Out[68]:
array([7, 8, 9])
In [69]:
 
 
 
 
 
x = arr3d[1]
x
x[0]
 
 
Out[69]:
array([7, 8, 9])
 

Indexing with slices

In [70]:
 
 
 
 
 
arr
arr[1:6]
 
 
Out[70]:
array([ 1,  2,  3,  4, 12])
In [71]:
 
 
 
 
 
arr2d
arr2d[:2]
 
 
Out[71]:
array([[1, 2, 3],
       [4, 5, 6]])
In [72]:
 
 
 
 
 
arr2d[:2, 1:]
 
 
Out[72]:
array([[2, 3],
       [5, 6]])
In [73]:
 
 
 
 
 
arr2d[1, :2]
 
 
Out[73]:
array([4, 5])
In [74]:
 
 
 
 
 
arr2d[:2, 2]
 
 
Out[74]:
array([3, 6])
In [75]:
 
 
 
 
 
arr2d[:, :1]
 
 
Out[75]:
array([[1],
       [4],
       [7]])
In [76]:
 
 
 
 
 
arr2d[:2, 1:] = 0
arr2d
 
 
Out[76]:
array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])
 

Boolean Indexing

In [78]:
 
 
 
 
 
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
names
 
 
Out[78]:
array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')
In [79]:
 
 
 
 
 
data
 
 
Out[79]:
array([[-0.8608,  0.5601, -1.2659,  0.1198],
       [-1.0635,  0.3329, -2.3594, -0.1995],
       [-1.542 , -0.9707, -1.307 ,  0.2863],
       [ 0.378 , -0.7539,  0.3313,  1.3497],
       [ 0.0699,  0.2467, -0.0119,  1.0048],
       [ 1.3272, -0.9193, -1.5491,  0.0222],
       [ 0.7584, -0.6605,  0.8626, -0.01  ]])
In [80]:
 
 
 
 
 
names == 'Bob'
 
 
Out[80]:
array([ True, False, False,  True, False, False, False])
In [81]:
 
 
 
 
 
data[names == 'Bob'] #数据筛选
 
 
Out[81]:
array([[-0.8608,  0.5601, -1.2659,  0.1198],
       [ 0.378 , -0.7539,  0.3313,  1.3497]])
In [82]:
 
 
 
 
 
data[names == 'Bob', 2:]
data[names == 'Bob', 3]
 
 
Out[82]:
array([0.1198, 1.3497])
In [84]:
 
 
 
 
 
names != 'Bob'
 
 
Out[84]:
array([False,  True,  True, False,  True,  True,  True])
In [85]:
 
 
 
 
 
data[~(names == 'Bob')]#反向选择
 
 
Out[85]:
array([[-1.0635,  0.3329, -2.3594, -0.1995],
       [-1.542 , -0.9707, -1.307 ,  0.2863],
       [ 0.0699,  0.2467, -0.0119,  1.0048],
       [ 1.3272, -0.9193, -1.5491,  0.0222],
       [ 0.7584, -0.6605,  0.8626, -0.01  ]])
In [ ]:
 
 
 
 
 
 
 
In [86]:
 
 
 
 
 
cond = names == 'Bob'
data[~cond]
 
 
Out[86]:
array([[-1.0635,  0.3329, -2.3594, -0.1995],
       [-1.542 , -0.9707, -1.307 ,  0.2863],
       [ 0.0699,  0.2467, -0.0119,  1.0048],
       [ 1.3272, -0.9193, -1.5491,  0.0222],
       [ 0.7584, -0.6605,  0.8626, -0.01  ]])
In [87]:
 
 
 
 
 
mask = (names == 'Bob') | (names == 'Will')
mask
data[mask]
 
 
Out[87]:
array([[-0.8608,  0.5601, -1.2659,  0.1198],
       [-1.542 , -0.9707, -1.307 ,  0.2863],
       [ 0.378 , -0.7539,  0.3313,  1.3497],
       [ 0.0699,  0.2467, -0.0119,  1.0048]])
In [88]:
 
 
 
 
 
data[data < 0] = 0 #筛选并修改数组
data
 
 
Out[88]:
array([[0.    , 0.5601, 0.    , 0.1198],
       [0.    , 0.3329, 0.    , 0.    ],
       [0.    , 0.    , 0.    , 0.2863],
       [0.378 , 0.    , 0.3313, 1.3497],
       [0.0699, 0.2467, 0.    , 1.0048],
       [1.3272, 0.    , 0.    , 0.0222],
       [0.7584, 0.    , 0.8626, 0.    ]])
In [89]:
 
 
 
 
 
data[names != 'Joe'] = 7
data
 
 
Out[89]:
array([[7.    , 7.    , 7.    , 7.    ],
       [0.    , 0.3329, 0.    , 0.    ],
       [7.    , 7.    , 7.    , 7.    ],
       [7.    , 7.    , 7.    , 7.    ],
       [7.    , 7.    , 7.    , 7.    ],
       [1.3272, 0.    , 0.    , 0.0222],
       [0.7584, 0.    , 0.8626, 0.    ]])
 

Fancy Indexing

In [96]:
 
 
 
 
 
arr = np.empty((8, 4))
 
 
In [97]:
 
 
 
 
 
for i in range(8):
    arr[i] = i
arr
 
 
Out[97]:
array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])
In [ ]:
 
 
 
 
 
 
 
In [98]:
 
 
 
 
 
arr[[4, 3, 0, 6]]#二维数组赋值
 
 
Out[98]:
array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])
In [99]:
 
 
 
 
 
arr[[-3, -5, -7]]
 
 
Out[99]:
array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])
In [101]:
 
 
 
 
 
arr = np.arange(32).reshape((8, 4))
arr
 
 
Out[101]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])
In [102]:
 
 
 
 
 
arr[[1, 5, 7, 2], [0, 3, 1, 2]]#数组中提取特点位置的数字形成新的数组
 
 
Out[102]:
array([ 4, 23, 29, 10])
In [105]:
 
 
 
 
 
arr[[1, 5, 7, 2]][:, [2, 0, 1]]##数组中提取特点位置的数字并按特点顺序形成新的数组
 
 
Out[105]:
array([[ 6,  4,  5],
       [22, 20, 21],
       [30, 28, 29],
       [10,  8,  9]])
 

Transposing Arrays and Swapping Axes

In [107]:
 
 
 
 
 
arr = np.arange(15).reshape((3, 5))
arr
 
 
Out[107]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
In [108]:
 
 
 
 
 
arr.T
 
 
Out[108]:
array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])
In [109]:
 
 
 
 
 
arr = np.random.randn(6, 3)
arr
 
 
Out[109]:
array([[ 0.05  ,  0.6702,  0.853 ],
       [-0.9559, -0.0235, -2.3042],
       [-0.6525, -1.2183, -1.3326],
       [ 1.0746,  0.7236,  0.69  ],
       [ 1.0015, -0.5031, -0.6223],
       [-0.9212, -0.7262,  0.2229]])
In [111]:
 
 
 
 
 
np.dot(arr.T, arr)
 
 
Out[111]:
array([[4.3484, 1.7936, 3.0276],
       [1.7936, 3.2381, 2.8998],
       [3.0276, 2.8998, 8.7259]])
 
 
 
 
 
 
Docstring:
dot(a, b, out=None)
Dot product of two arrays. Specifically,
- If both `a` and `b` are 1-D arrays, it is inner product of vectors
  (without complex conjugation).
- If both `a` and `b` are 2-D arrays, it is matrix multiplication,
  but using :func:`matmul` or ``a @ b`` is preferred.
- If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
  and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
- If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
  the last axis of `a` and `b`.
- If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
  sum product over the last axis of `a` and the second-to-last axis of `b`::
    dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
Parameters
----------
a : array_like
    First argument.
b : array_like
    Second argument.
out : ndarray, optional
    Output argument. This must have the exact kind that would be returned
    if it was not used. In particular, it must have the right type, must be
    C-contiguous, and its dtype must be the dtype that would be returned
    for `dot(a,b)`. This is a performance feature. Therefore, if these
    conditions are not met, an exception is raised, instead of attempting
    to be flexible.
Returns
-------
output : ndarray
    Returns the dot product of `a` and `b`.  If `a` and `b` are both
    scalars or both 1-D arrays then a scalar is returned; otherwise
    an array is returned.
    If `out` is given, then it is returned.
Raises
------
ValueError
    If the last dimension of `a` is not the same size as
    the second-to-last dimension of `b`.
See Also
--------
vdot : Complex-conjugating dot product.
tensordot : Sum products over arbitrary axes.
einsum : Einstein summation convention.
matmul : '@' operator as method with out parameter.
Examples
--------
>>> np.dot(3, 4)
12
Neither argument is complex-conjugated:
>>> np.dot([2j, 3j], [2j, 3j])
(-13+0j)
For 2-D arrays it is the matrix product:
>>> a = [[1, 0], [0, 1]]
>>> b = [[4, 1], [2, 2]]
>>> np.dot(a, b)
array([[4, 1],
       [2, 2]])
>>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
>>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
>>> np.dot(a, b)[2,3,2,1,2,2]
499128
>>> sum(a[2,3,2,:] * b[1,2,:,2])
499128
 
In [123]:
 
 
 
 
 
arr = np.arange(16).reshape((2, 2, 4))
arr
 
 
Out[123]:
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])
In [120]:
 
 
 
 
 
arr.transpose((1, 2, 0))
 
 
Out[120]:
array([[[ 0,  8],
        [ 1,  9],
        [ 2, 10],
        [ 3, 11]],

       [[ 4, 12],
        [ 5, 13],
        [ 6, 14],
        [ 7, 15]]])
In [121]:
 
 
 
 
 
arr.transpose((1, 2, 0)).shape
 
 
Out[121]:
(2, 4, 2)
In [124]:
 
 
 
 
 
arr.transpose((2, 1, 0))
 
 
Out[124]:
array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])
In [125]:
 
 
 
 
 
arr.transpose((2, 1, 0)).shape
 
 
Out[125]:
(4, 2, 2)
In [127]:
 
 
 
 
 
arr.transpose((1, 0, 2))
 
 
Out[127]:
array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])
In [128]:
 
 
 
 
 
arr.transpose((1, 0, 2)).shape
 
 
Out[128]:
(2, 2, 4)
 
 
 
 
 
 
Docstring:
a.transpose(*axes)
Returns a view of the array with axes transposed.
For a 1-D array, this has no effect. (To change between column and
row vectors, first cast the 1-D array into a matrix object.)
For a 2-D array, this is the usual matrix transpose.
For an n-D array, if axes are given, their order indicates how the
axes are permuted (see Examples). If axes are not provided and
``a.shape = (i[0], i[1], ... i[n-2], i[n-1])``, then
``a.transpose().shape = (i[n-1], i[n-2], ... i[1], i[0])``.
Parameters
----------
axes : None, tuple of ints, or `n` ints
 * None or no argument: reverses the order of the axes.
 * tuple of ints: `i` in the `j`-th place in the tuple means `a`'s
   `i`-th axis becomes `a.transpose()`'s `j`-th axis.
 * `n` ints: same as an n-tuple of the same ints (this form is
   intended simply as a "convenience" alternative to the tuple form)
Returns
-------
out : ndarray
    View of `a`, with axes suitably permuted.
See Also
--------
ndarray.T : Array property returning the array transposed.
Examples
--------
>>> a = np.array([[1, 2], [3, 4]])
>>> a
array([[1, 2],
       [3, 4]])
>>> a.transpose()
array([[1, 3],
       [2, 4]])
>>> a.transpose((1, 0))
array([[1, 3],
       [2, 4]])
>>> a.transpose(1, 0)
array([[1, 3],
       [2, 4]])
 
In [116]:
 
 
 
 
 
arr
arr.swapaxes(1, 2)
 
 
Out[116]:
array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])
 
 
 
 
 
 
Docstring:
a.swapaxes(axis1, axis2)
Return a view of the array with `axis1` and `axis2` interchanged.
Refer to `numpy.swapaxes` for full documentation.
See Also
--------
numpy.swapaxes : equivalent function
Type:      builtin_function_or_method
 
 

Universal Functions: Fast Element-Wise Array Functions

In [129]:
 
 
 
 
 
arr = np.arange(10)
arr
 
 
Out[129]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [130]:
 
 
 
 
 
np.sqrt(arr)
 
 
Out[130]:
array([0.    , 1.    , 1.4142, 1.7321, 2.    , 2.2361, 2.4495, 2.6458,
       2.8284, 3.    ])
In [131]:
 
 
 
 
 
np.exp(arr)
 
 
Out[131]:
array([   1.    ,    2.7183,    7.3891,   20.0855,   54.5982,  148.4132,
        403.4288, 1096.6332, 2980.958 , 8103.0839])
In [132]:
 
 
 
 
 
x = np.random.randn(8)
y = np.random.randn(8)
x
 
 
Out[132]:
array([ 0.0513, -1.1577,  0.8167,  0.4336,  1.0107,  1.8249, -0.9975,
        0.8506])
In [133]:
 
 
 
 
 
y
 
 
Out[133]:
array([-0.1316,  0.9124,  0.1882,  2.1695, -0.1149,  2.0037,  0.0296,
        0.7953])
In [134]:
 
 
 
 
 
np.maximum(x, y)
 
 
Out[134]:
array([0.0513, 0.9124, 0.8167, 2.1695, 1.0107, 2.0037, 0.0296, 0.8506])
In [143]:
 
 
 
 
 
arr = np.random.randn(7) * 5
remainder, whole_part = np.modf(arr)
arr
 
 
Out[143]:
array([ -0.967 ,   3.3458,  -8.2449, -11.264 ,  -5.8342,   1.768 ,
         3.5106])
 
 
 
 
 
 
Call signature:  np.modf(*args, **kwargs)
Type:            ufunc
String form:     <ufunc 'modf'>
File:            c:\users\qq123\anaconda3\lib\site-packages\numpy\__init__.py
Docstring:      
modf(x[, out1, out2], / [, out=(None, None)], *, where=True, casting='same_kind', order='K', dtype=None, subok=True[, signature, extobj])
Return the fractional and integral parts of an array, element-wise.
The fractional and integral parts are negative if the given number is
negative.
Parameters
----------
x : array_like
    Input array.
out : ndarray, None, or tuple of ndarray and None, optional
    A location into which the result is stored. If provided, it must have
    a shape that the inputs broadcast to. If not provided or `None`,
    a freshly-allocated array is returned. A tuple (possible only as a
    keyword argument) must have length equal to the number of outputs.
where : array_like, optional
    Values of True indicate to calculate the ufunc at that position, values
    of False indicate to leave the value in the output alone.
**kwargs
    For other keyword-only arguments, see the
    :ref:`ufunc docs <ufuncs.kwargs>`.
Returns
-------
y1 : ndarray
    Fractional part of `x`.
y2 : ndarray
    Integral part of `x`.
Notes
-----
For integer input the return values are floats.
See Also
--------
divmod : ``divmod(x, 1)`` is equivalent to ``modf`` with the return values
         switched, except it always has a positive remainder.
Examples
--------
>>> np.modf([0, 3.5])
(array([ 0. ,  0.5]), array([ 0.,  3.]))
>>> np.modf(-0.5)
(-0.5, -0)
Class docstring:
Functions that operate element by element on whole arrays.
To see the documentation for a specific ufunc, use `info`.  For
example, ``np.info(np.sin)``.  Because ufuncs are written in C
(for speed) and linked into Python with NumPy's ufunc facility,
Python's help() function finds this page whenever help() is called
on a ufunc.
A detailed explanation of ufuncs can be found in the docs for :ref:`ufuncs`.
Calling ufuncs:
===============
op(*x[, out], where=True, **kwargs)
Apply `op` to the arguments `*x` elementwise, broadcasting the arguments.
The broadcasting rules are:
* Dimensions of length 1 may be prepended to either array.
* Arrays may be repeated along dimensions of length 1.
Parameters
----------
*x : array_like
    Input arrays.
out : ndarray, None, or tuple of ndarray and None, optional
    Alternate array object(s) in which to put the result; if provided, it
    must have a shape that the inputs broadcast to. A tuple of arrays
    (possible only as a keyword argument) must have length equal to the
    number of outputs; use `None` for outputs to be allocated by the ufunc.
where : array_like, optional
    Values of True indicate to calculate the ufunc at that position, values
    of False indicate to leave the value in the output alone.
**kwargs
    For other keyword-only arguments, see the :ref:`ufunc docs <ufuncs.kwargs>`.
Returns
-------
r : ndarray or tuple of ndarray
    `r` will have the shape that the arrays in `x` broadcast to; if `out` is
    provided, `r` will be equal to `out`. If the function has more than one
    output, then the result will be a tuple of arrays.
 
In [141]:
 
 
 
 
 
remainder
 
 
Out[141]:
array([-0.6437,  0.285 ,  0.6498, -0.8464, -0.1124, -0.0141,  0.1024])
In [142]:
 
 
 
 
 
whole_part
 
 
Out[142]:
array([-2.,  2.,  4., -7., -5., -2.,  1.])
In [144]:
 
 
 
 
 
arr
np.sqrt(arr)
np.sqrt(arr, arr)
arr
 
 
 
c:\users\qq123\anaconda3\lib\site-packages\ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in sqrt
  
c:\users\qq123\anaconda3\lib\site-packages\ipykernel_launcher.py:3: RuntimeWarning: invalid value encountered in sqrt
  This is separate from the ipykernel package so we can avoid doing imports until
Out[144]:
array([   nan, 1.8292,    nan,    nan,    nan, 1.3297, 1.8736])
In [ ]:
 
 
 
 
 
 
 
 

Array-Oriented Programming with Arrays

In [145]:
 
 
 
 
 
points = np.arange(-5, 5, 0.01) # 1000 equally spaced points
 
 
In [158]:
 
 
 
 
 
xs, ys = np.meshgrid(points, points)
ys
 
 
Out[158]:
array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
       [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
       [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
       ...,
       [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
       [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
       [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])
In [159]:
 
 
 
 
 
xs
 
 
Out[159]:
array([[-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       ...,
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99]])
In [155]:
 
 
 
 
 
ys.shape
 
 
Out[155]:
(10, 1000)
In [164]:
 
 
 
 
 
z = np.sqrt(xs ** 2 + ys ** 2)
z
 
 
Out[164]:
array([[7.0711, 7.064 , 7.0569, ..., 7.0499, 7.0569, 7.064 ],
       [7.064 , 7.0569, 7.0499, ..., 7.0428, 7.0499, 7.0569],
       [7.0569, 7.0499, 7.0428, ..., 7.0357, 7.0428, 7.0499],
       ...,
       [7.0499, 7.0428, 7.0357, ..., 7.0286, 7.0357, 7.0428],
       [7.0569, 7.0499, 7.0428, ..., 7.0357, 7.0428, 7.0499],
       [7.064 , 7.0569, 7.0499, ..., 7.0428, 7.0499, 7.0569]])
In [166]:
 
 
 
 
 
import matplotlib.pyplot as plt
plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")
 
 
Out[166]:
Text(0.5,1,'Image plot of $\\sqrt{x^2 + y^2}$ for a grid of values')
 
In [167]:
 
 
 
 
 
%matplotlib inline
 
 
In [168]:
 
 
 
 
 
plt.draw()
 
 
 
<matplotlib.figure.Figure at 0x2a860d00d68>
In [ ]:
 
 
 
 
 
 
 
In [153]:
 
 
 
 
 
plt.close('all')
 
 
 

Expressing Conditional Logic as Array Operations

In [170]:
 
 
 
 
 
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])
 
 
In [171]:
 
 
 
 
 
result = [(x if c else y)
          for x, y, c in zip(xarr, yarr, cond)]
result
 
 
Out[171]:
[1.1, 2.2, 1.3, 1.4, 2.5]
In [172]:
 
 
 
 
 
result = np.where(cond, xarr, yarr)
result
 
 
Out[172]:
array([1.1, 2.2, 1.3, 1.4, 2.5])
In [173]:
 
 
 
 
 
arr = np.random.randn(4, 4)
arr
arr > 0
np.where(arr > 0, 2, -2)
 
 
Out[173]:
array([[-2, -2,  2, -2],
       [-2, -2, -2, -2],
       [ 2,  2,  2,  2],
       [ 2, -2, -2,  2]])
In [174]:
 
 
 
 
 
np.where(arr > 0, 2, arr) # set only positive values to 2
 
 
Out[174]:
array([[-0.2746, -0.1391,  2.    , -0.6065],
       [-0.4171, -0.017 , -1.2241, -1.8008],
       [ 2.    ,  2.    ,  2.    ,  2.    ],
       [ 2.    , -0.4406, -0.3014,  2.    ]])
 

Mathematical and Statistical Methods

In [177]:
 
 
 
 
 
arr = np.random.randn(5, 4)
print(arr)
print(arr.mean())
print(np.mean(arr))
print(arr.sum())
 
 
 
[[-0.4162 -0.1167 -1.8448  2.0687]
 [-0.777   1.4402 -0.1106  1.2274]
 [ 1.9208  0.7464  2.2247 -0.6794]
 [ 0.7274 -0.8687 -1.2139 -0.4706]
 [-0.9192 -0.8388  0.4352 -0.5578]]
0.09884425107663111
0.09884425107663111
1.9768850215326221
In [178]:
 
 
 
 
 
print(arr.mean(axis=1))
print(arr.sum(axis=0))
 
 
 
[-0.0773  0.445   1.0531 -0.4565 -0.4702]
[ 0.5357  0.3623 -0.5094  1.5883]
In [180]:
 
 
 
 
 
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7])
arr.cumsum()#累计和
 
 
Out[180]:
array([ 0,  1,  3,  6, 10, 15, 21, 28], dtype=int32)
In [ ]:
 
 
 
 
 
Docstring:
a.cumsum(axis=None, dtype=None, out=None)
Return the cumulative sum of the elements along the given axis.
Refer to `numpy.cumsum` for full documentation.
See Also
--------
numpy.cumsum : equivalent function
Type:      builtin_function_or_method
 
 
In [184]:
 
 
 
 
 
arr = np.array([[0, 1, 2], [3, 10, 5], [6, 7, 8]])
print(arr)
print(arr.cumsum(axis=0))
print(arr.cumprod(axis=0))
 
 
 
[[ 0  1  2]
 [ 3 10  5]
 [ 6  7  8]]
[[ 0  1  2]
 [ 3 11  7]
 [ 9 18 15]]
[[ 0  1  2]
 [ 0 10 10]
 [ 0 70 80]]
 
 
 
 
 
 
Docstring:
a.cumprod(axis=None, dtype=None, out=None)
Return the cumulative product of the elements along the given axis.
Refer to `numpy.cumprod` for full documentation.
See Also
--------
numpy.cumprod : equivalent function
Type:      builtin_function_or_method
 
 

Methods for Boolean Arrays

In [185]:
 
 
 
 
 
arr = np.random.randn(100)
(arr > 0).sum() # Number of positive values
 
 
Out[185]:
41
In [187]:
 
 
 
 
 
bools = np.array([False, False, True, False])
bools.any()
 
 
Out[187]:
True
In [188]:
 
 
 
 
 
bools.all()
 
 
Out[188]:
False
In [ ]:
 
 
 
 
 
 
 
 

Sorting

In [189]:
 
 
 
 
 
arr = np.random.randn(6)
arr
 
 
Out[189]:
array([-0.1154, -0.3507,  0.0447, -0.8978,  0.8909, -1.1512])
In [190]:
 
 
 
 
 
arr.sort()
arr
 
 
Out[190]:
array([-1.1512, -0.8978, -0.3507, -0.1154,  0.0447,  0.8909])
In [191]:
 
 
 
 
 
arr = np.random.randn(5, 3)
arr
 
 
Out[191]:
array([[-2.6123,  1.1413, -0.8671],
       [ 0.3836, -0.437 ,  0.3475],
       [-1.2302,  0.5711,  0.0601],
       [-0.2255,  1.3497,  1.3503],
       [-0.3867,  0.866 ,  1.7472]])
In [197]:
 
 
 
 
 
arr.sort(-1)
arr
 
 
Out[197]:
array([[-2.6123, -0.8671,  0.3836],
       [-1.2302,  0.0601,  0.5711],
       [-0.437 ,  0.3475,  1.1413],
       [-0.3867,  0.866 ,  1.3503],
       [-0.2255,  1.3497,  1.7472]])
In [ ]:
 
 
 
 
 
large_arr = np.random.randn(1000)
large_arr.sort()
large_arr[int(0.05 * len(large_arr))] # 5% quantile
 
 
 

Unique and Other Set Logic

In [198]:
 
 
 
 
 
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)
 
 
Out[198]:
array(['Bob', 'Joe', 'Will'], dtype='<U4')
In [199]:
 
 
 
 
 
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)
 
 
Out[199]:
array([1, 2, 3, 4])
In [200]:
 
 
 
 
 
sorted(set(names))
 
 
Out[200]:
['Bob', 'Joe', 'Will']
In [202]:
 
 
 
 
 
values = np.array([6, 0, 0, 3, 2, 5, 6])
np.in1d(values, [2, 3, 6])
 
 
Out[202]:
array([ True, False, False,  True,  True, False,  True])
 
 
 
 
 
 
Signature: np.in1d(ar1, ar2, assume_unique=False, invert=False)
Docstring:
Test whether each element of a 1-D array is also present in a second array.
Returns a boolean array the same length as `ar1` that is True
where an element of `ar1` is in `ar2` and False otherwise.
We recommend using :func:`isin` instead of `in1d` for new code.
Parameters
----------
ar1 : (M,) array_like
    Input array.
ar2 : array_like
    The values against which to test each value of `ar1`.
assume_unique : bool, optional
    If True, the input arrays are both assumed to be unique, which
    can speed up the calculation.  Default is False.
invert : bool, optional
    If True, the values in the returned array are inverted (that is,
    False where an element of `ar1` is in `ar2` and True otherwise).
    Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
    to (but is faster than) ``np.invert(in1d(a, b))``.
    .. versionadded:: 1.8.0
Returns
-------
in1d : (M,) ndarray, bool
    The values `ar1[in1d]` are in `ar2`.
See Also
--------
isin                  : Version of this function that preserves the
                        shape of ar1.
numpy.lib.arraysetops : Module with a number of other functions for
                        performing set operations on arrays.
Notes
-----
`in1d` can be considered as an element-wise function version of the
python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
equivalent to ``np.array([item in b for item in a])``.
However, this idea fails if `ar2` is a set, or similar (non-sequence)
container:  As ``ar2`` is converted to an array, in those cases
``asarray(ar2)`` is an object array rather than the expected array of
contained values.
.. versionadded:: 1.4.0
Examples
--------
>>> test = np.array([0, 1, 2, 5, 0])
>>> states = [0, 2]
>>> mask = np.in1d(test, states)
>>> mask
array([ True, False,  True, False,  True])
>>> test[mask]
array([0, 2, 0])
>>> mask = np.in1d(test, states, invert=True)
>>> mask
array([False,  True, False,  True, False])
>>> test[mask]
array([1, 5])
 
 

File Input and Output with Arrays

In [206]:
 
 
 
 
 
arr = np.arange(10)
np.save('some_array', arr)
 
 
 
 
 
 
 
 
Signature: np.savez(file, *args, **kwds)
Docstring:
Save several arrays into a single file in uncompressed ``.npz`` format.
If arguments are passed in with no keywords, the corresponding variable
names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword
arguments are given, the corresponding variable names, in the ``.npz``
file will match the keyword names.
Parameters
----------
file : str or file
    Either the file name (string) or an open file (file-like object)
    where the data will be saved. If file is a string or a Path, the
    ``.npz`` extension will be appended to the file name if it is not
    already there.
args : Arguments, optional
    Arrays to save to the file. Since it is not possible for Python to
    know the names of the arrays outside `savez`, the arrays will be saved
    with names "arr_0", "arr_1", and so on. These arguments can be any
    expression.
kwds : Keyword arguments, optional
    Arrays to save to the file. Arrays will be saved in the file with the
    keyword names.
Returns
-------
None
See Also
--------
save : Save a single array to a binary file in NumPy format.
savetxt : Save an array to a file as plain text.
savez_compressed : Save several arrays into a compressed ``.npz`` archive
Notes
-----
The ``.npz`` file format is a zipped archive of files named after the
variables they contain.  The archive is not compressed and each file
in the archive contains one variable in ``.npy`` format. For a
description of the ``.npy`` format, see `numpy.lib.format` or the
NumPy Enhancement Proposal
http://docs.scipy.org/doc/numpy/neps/npy-format.html
When opening the saved ``.npz`` file with `load` a `NpzFile` object is
returned. This is a dictionary-like object which can be queried for
its list of arrays (with the ``.files`` attribute), and for the arrays
themselves.
Examples
--------
>>> from tempfile import TemporaryFile
>>> outfile = TemporaryFile()
>>> x = np.arange(10)
>>> y = np.sin(x)
Using `savez` with \*args, the arrays are saved with default names.
>>> np.savez(outfile, x, y)
>>> outfile.seek(0) # Only needed here to simulate closing & reopening file
>>> npzfile = np.load(outfile)
>>> npzfile.files
['arr_1', 'arr_0']
>>> npzfile['arr_0']
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Using `savez` with \**kwds, the arrays are saved with the keyword names.
>>> outfile = TemporaryFile()
>>> np.savez(outfile, x=x, y=y)
>>> outfile.seek(0)
>>> npzfile = np.load(outfile)
>>> npzfile.files
['y', 'x']
>>> npzfile['x']
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
 
In [204]:
 
 
 
 
 
np.load('some_array.npy')
 
 
Out[204]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [207]:
 
 
 
 
 
np.savez('array_archive.npz', a=arr, b=arr)
 
 
In [209]:
 
 
 
 
 
arch = np.load('array_archive.npz')
arch['b']
 
 
Out[209]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [210]:
 
 
 
 
 
np.savez_compressed('arrays_compressed.npz', a=arr, b=arr)
 
 
In [211]:
 
 
 
 
 
!rm some_array.npy
!rm array_archive.npz
!rm arrays_compressed.npz
 
 
 
'rm' 不是内部或外部命令,也不是可运行的程序
或批处理文件。
'rm' 不是内部或外部命令,也不是可运行的程序
或批处理文件。
'rm' 不是内部或外部命令,也不是可运行的程序
或批处理文件。
 

Linear Algebra

In [213]:
 
 
 
 
 
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])
print(x)
print(y)
x.dot(y)
 
 
 
[[1. 2. 3.]
 [4. 5. 6.]]
[[ 6. 23.]
 [-1.  7.]
 [ 8.  9.]]
Out[213]:
array([[ 28.,  64.],
       [ 67., 181.]])
In [ ]:
 
 
 
 
 
np.dot(x, y)
 
 
In [ ]:
 
 
 
 
 
np.dot(x, np.ones(3))
 
 
In [ ]:
 
 
 
 
 
x @ np.ones(3)
 
 
In [215]:
 
 
 
 
 
from numpy.linalg import inv, qr
X = np.random.randn(5, 5)
mat = X.T.dot(X)
mat
 
 
Out[215]:
array([[ 5.401 , -0.4241, -4.9658, -0.0532,  0.6168],
       [-0.4241,  5.8224, -4.9233,  1.1634, -1.0204],
       [-4.9658, -4.9233,  9.5217, -0.8643,  0.232 ],
       [-0.0532,  1.1634, -0.8643,  3.331 ,  1.4328],
       [ 0.6168, -1.0204,  0.232 ,  1.4328,  2.4548]])
In [217]:
 
 
 
 
 
inv(mat)
 
 
Out[217]:
array([[19.2452, 18.7119, 19.4533, -2.2097,  2.3938],
       [18.7119, 18.63  , 19.112 , -2.377 ,  2.6237],
       [19.4533, 19.112 , 19.863 , -2.2931,  2.5178],
       [-2.2097, -2.377 , -2.2931,  0.7919, -0.6783],
       [ 2.3938,  2.6237,  2.5178, -0.6783,  1.0545]])
 
 
 
 
 
 
Signature: inv(a)
Docstring:
Compute the (multiplicative) inverse of a matrix.
Given a square matrix `a`, return the matrix `ainv` satisfying
``dot(a, ainv) = dot(ainv, a) = eye(a.shape[0])``.
Parameters
----------
a : (..., M, M) array_like
    Matrix to be inverted.
Returns
-------
ainv : (..., M, M) ndarray or matrix
    (Multiplicative) inverse of the matrix `a`.
Raises
------
LinAlgError
    If `a` is not square or inversion fails.
Notes
-----
.. versionadded:: 1.8.0
Broadcasting rules apply, see the `numpy.linalg` documentation for
details.
Examples
--------
>>> from numpy.linalg import inv
>>> a = np.array([[1., 2.], [3., 4.]])
>>> ainv = inv(a)
>>> np.allclose(np.dot(a, ainv), np.eye(2))
True
>>> np.allclose(np.dot(ainv, a), np.eye(2))
True
If a is a matrix object, then the return value is a matrix as well:
>>> ainv = inv(np.matrix(a))
>>> ainv
matrix([[-2. ,  1. ],
        [ 1.5, -0.5]])
Inverses of several matrices can be computed at once:
>>> a = np.array([[[1., 2.], [3., 4.]], [[1, 3], [3, 5]]])
>>> inv(a)
array([[[-2. ,  1. ],
        [ 1.5, -0.5]],
       [[-5. ,  2. ],
        [ 3. , -1. ]]])
 
In [219]:
 
 
 
 
 
mat.dot(inv(mat))
 
 
Out[219]:
array([[ 1., -0., -0.,  0.,  0.],
       [ 0.,  1., -0., -0., -0.],
       [ 0.,  0.,  1., -0., -0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [-0.,  0., -0.,  0.,  1.]])
In [221]:
 
 
 
 
 
q, r = qr(mat)
r
 
 
Out[221]:
array([[-7.3752, -2.5757,  9.7389, -0.5719, -0.5492],
       [ 0.    , -7.3539,  6.7439, -1.6307,  1.3051],
       [ 0.    ,  0.    , -0.1837, -1.7792, -0.2576],
       [ 0.    ,  0.    ,  0.    , -3.0167, -2.7267],
       [ 0.    ,  0.    ,  0.    ,  0.    ,  0.2207]])
In [222]:
 
 
 
 
 
q
 
 
Out[222]:
array([[-0.7323,  0.3142, -0.258 ,  0.1388,  0.5284],
       [ 0.0575, -0.8119,  0.0432,  0.0168,  0.5791],
       [ 0.6733,  0.4336, -0.2168,  0.0523,  0.5557],
       [ 0.0072, -0.1607, -0.813 , -0.5392, -0.1497],
       [-0.0836,  0.1681,  0.4729, -0.8288,  0.2327]])
 

Pseudorandom Number Generation

In [223]:
 
 
 
 
 
samples = np.random.normal(size=(4, 4))
samples
 
 
Out[223]:
array([[-1.1581,  1.1046,  0.6342,  1.2597],
       [ 0.9649, -0.4344, -0.8796, -0.6948],
       [ 1.2264,  0.4573,  0.1157,  1.014 ],
       [-1.135 , -0.2634,  1.3064, -1.6108]])
In [224]:
 
 
 
 
 
from random import normalvariate
N = 1000000
%timeit samples = [normalvariate(0, 1) for _ in range(N)]
%timeit np.random.normal(size=N)
 
 
 
1.13 s ± 10.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
40.5 ms ± 361 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [225]:
 
 
 
 
 
np.random.seed(1234)
 
 
In [226]:
 
 
 
 
 
rng = np.random.RandomState(1234)
rng.randn(10)
 
 
Out[226]:
array([ 0.4714, -1.191 ,  1.4327, -0.3127, -0.7206,  0.8872,  0.8596,
       -0.6365,  0.0157, -2.2427])
 

Example: Random Walks

In [227]:
 
 
 
 
 
import random
position = 0
walk = [position]
steps = 1000
for i in range(steps):
    step = 1 if random.randint(0, 1) else -1
    position += step
    walk.append(position)
 
 
In [228]:
 
 
 
 
 
plt.figure()
 
 
Out[228]:
<matplotlib.figure.Figure at 0x2a860d5fbe0>
 
<matplotlib.figure.Figure at 0x2a860d5fbe0>
In [229]:
 
 
 
 
 
plt.plot(walk[:100])
 
 
Out[229]:
[<matplotlib.lines.Line2D at 0x2a869c06ac8>]
 
In [230]:
 
 
 
 
 
np.random.seed(12345)
 
 
In [231]:
 
 
 
 
 
nsteps = 1000
draws = np.random.randint(0, 2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()
 
 
In [232]:
 
 
 
 
 
walk.min()
walk.max()
 
 
Out[232]:
31
In [233]:
 
 
 
 
 
(np.abs(walk) >= 10).argmax()
 
 
Out[233]:
37
 

Simulating Many Random Walks at Once

In [234]:
 
 
 
 
 
nwalks = 5000
nsteps = 1000
draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # 0 or 1
steps = np.where(draws > 0, 1, -1)
walks = steps.cumsum(1)
walks
 
 
Out[234]:
array([[  1,   0,   1, ...,   8,   7,   8],
       [  1,   0,  -1, ...,  34,  33,  32],
       [  1,   0,  -1, ...,   4,   5,   4],
       ...,
       [  1,   2,   1, ...,  24,  25,  26],
       [  1,   2,   3, ...,  14,  13,  14],
       [ -1,  -2,  -3, ..., -24, -23, -22]], dtype=int32)
In [235]:
 
 
 
 
 
walks.max()
walks.min()
 
 
Out[235]:
-133
In [236]:
 
 
 
 
 
hits30 = (np.abs(walks) >= 30).any(1)
hits30
hits30.sum() # Number that hit 30 or -30
 
 
Out[236]:
3410
In [237]:
 
 
 
 
 
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)
crossing_times.mean()
 
 
Out[237]:
498.8897360703812
In [238]:
 
 
 
 
 
steps = np.random.normal(loc=0, scale=0.25,
                         size=(nwalks, nsteps))
posted @ 2019-04-10 21:20  对抗拖延症的二傻子  阅读(189)  评论(0编辑  收藏  举报