numpy

数据类型

import numpy as np
import random

t1 = np.array([1, 2, 3])
print(t1, type(t1))     # [1 2 3] <class 'numpy.ndarray'>

t2 = np.array(range(5))
print(t2, type(t2))     # [0 1 2 3 4] <class 'numpy.ndarray'>

t3 = np.arange(5)       # arange用法参数类range
print(t3, type(t3))     # [0 1 2 3 4] <class 'numpy.ndarray'>
print(t3.dtype)         # int64

# numpy数据类型
t4 = np.array(range(1, 4), dtype=float)     # dtype指定类型
print(t4, t4.dtype)     # [1. 2. 3.] float64

# numpy bool类型
t5 = np.array([1, 0, 0, 1, 0], dtype=bool)
print(t5, t5.dtype)     # [ True False False  True False] bool

# 调整数据类型
t6 = t5.astype('int8')
print(t6, t6.dtype)     # [1 0 0 1 0] int8

# numpy小数
t7 = np.array([random.random() for i in range(10)])
print(t7, t7.dtype)     # [0.84702583 0.916558   0.44216734 0.53020263 0.44274757 0.0559538 0.53722744 0.04059448 0.70912489 0.94199106] float64

t8 = np.round(t7, 2)
print(t8, t8.dtype)     # [0.85 0.92 0.44 0.53 0.44 0.06 0.54 0.04 0.71 0.94] float64

t9 = '%.2f'%random.random()     # %:占位符, 2:保留二位小数, f:浮点型
print(t9, type(t9))     # 0.65 <class 'str'>

 

In [1]: import numpy as np

In [2]: t1 = np.arange(12)

In [3]: t1
Out[3]: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [4]: t1.shape    # 查看数组形状
Out[4]: (12,)       # arrary() 参数只有一个列表时, 一维数组,t1.shape元祖一个值,12为列表元素数量

In [5]: t2 = np.array([[1,2,3],[4,5,6]])

In [6]: t2
Out[6]:
array([[1, 2, 3],
       [4, 5, 6]])

In [7]: t2.shape
Out[7]: (2, 3)      # arrary() 参数只有二个列表时, 二维数组,t1.shape元祖有二个值,(2, 3), 2行数,3列数

In [8]: t3 = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])

In [9]: t3
Out[9]:
array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [10]: t3.shape   # t3为三维数组,t3.shape元祖有三个值
Out[10]: (2, 2, 3)

In [11]: t4 = np.arange(12)

In [12]: t4
Out[12]: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [13]: t4.reshape((3,4))      # 修改数组形状,变成3行4列
Out[13]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [14]: t4.reshape((3,5))
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-14-7634752f2bda> in <module>
----> 1 t4.reshape((3,5))

ValueError: cannot reshape array of size 12 into shape (3,5)


In [15]: t5 = np.arange(24).reshape((2,3,4))    # (2,3,4):2块,3行,4列

In [16]: t5
Out[16]:
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])


In [17]: t5.reshape((4,6))
Out[17]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [18]: t5
Out[18]:
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [19]: t5 = t5.reshape((4,6))

In [20]: t5
Out[20]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [21]: t5.reshape((24,))
Out[21]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [22]: t5.reshape((24,1))
Out[22]:
array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16],
       [17],
       [18],
       [19],
       [20],
       [21],
       [22],
       [23]])

In [23]: t5.reshape((1,24))
Out[23]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23]])

In [24]: t6 = t5.reshape((t5.shape[0]*t5.shape[1],))    # t5.shape[0]:t5行数,t5.shape[1]:t5列数

In [25]: t6
Out[25]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [26]: t5
Out[26]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [27]: t5.flatten()       # t5.flatten()数据转为一维数组
Out[27]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [28]: t5
Out[28]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [29]: t5+2
Out[29]:
array([[ 2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25]])

In [30]: t5*2
Out[30]:
array([[ 0,  2,  4,  6,  8, 10],
       [12, 14, 16, 18, 20, 22],
       [24, 26, 28, 30, 32, 34],
       [36, 38, 40, 42, 44, 46]])

In [31]: t5/2
Out[31]:
array([[ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5],
       [ 3. ,  3.5,  4. ,  4.5,  5. ,  5.5],
       [ 6. ,  6.5,  7. ,  7.5,  8. ,  8.5],
       [ 9. ,  9.5, 10. , 10.5, 11. , 11.5]])

In [32]: t5/0
/Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/ipython:1: RuntimeWarning: divide by zero encountered in true_divide
  #!/Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/python
/Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/ipython:1: RuntimeWarning: invalid value encountered in true_divide
  #!/Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/python
Out[32]:            # inf +∞
array([[nan, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf]])

In [33]: t5
Out[33]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [34]: t6 = np.arange(100,124).reshape((4,6))

In [35]: t6
Out[35]:
array([[100, 101, 102, 103, 104, 105],
       [106, 107, 108, 109, 110, 111],
       [112, 113, 114, 115, 116, 117],
       [118, 119, 120, 121, 122, 123]])

In [36]: t5+t6
Out[36]:
array([[100, 102, 104, 106, 108, 110],
       [112, 114, 116, 118, 120, 122],
       [124, 126, 128, 130, 132, 134],
       [136, 138, 140, 142, 144, 146]])

In [37]: t5*t6
Out[37]:
array([[   0,  101,  204,  309,  416,  525],
       [ 636,  749,  864,  981, 1100, 1221],
       [1344, 1469, 1596, 1725, 1856, 1989],
       [2124, 2261, 2400, 2541, 2684, 2829]])

In [38]: t6/t5
/Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/ipython:1: RuntimeWarning: divide by zero encountered in true_divide
  #!/Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/python
Out[38]:
array([[         inf, 101.        ,  51.        ,  34.33333333,
         26.        ,  21.        ],
       [ 17.66666667,  15.28571429,  13.5       ,  12.11111111,
         11.        ,  10.09090909],
       [  9.33333333,   8.69230769,   8.14285714,   7.66666667,
          7.25      ,   6.88235294],
       [  6.55555556,   6.26315789,   6.        ,   5.76190476,
          5.54545455,   5.34782609]])

In [39]: t7 = np.arange(0,6)

In [40]: t7
Out[40]: array([0, 1, 2, 3, 4, 5])

In [41]: t5
Out[41]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [42]: t5-t7
Out[42]:
array([[ 0,  0,  0,  0,  0,  0],
       [ 6,  6,  6,  6,  6,  6],
       [12, 12, 12, 12, 12, 12],
       [18, 18, 18, 18, 18, 18]])

In [43]: t8 = np.arange(4).reshape((4,1))

In [44]: t8
Out[44]:
array([[0],
       [1],
       [2],
       [3]])

In [45]: t5-t8
Out[45]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 5,  6,  7,  8,  9, 10],
       [10, 11, 12, 13, 14, 15],
       [15, 16, 17, 18, 19, 20]])

In [46]: t9 = np.arange(10)

In [47]: t9
Out[47]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [48]: t5
Out[48]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [49]: t5-t9
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-49-dcf9c8dd3788> in <module>
----> 1 t5-t9

ValueError: operands could not be broadcast together with shapes (4,6) (10,)

  

文件读取

import numpy as np

us_file_path = './youtube_video_data/US_video_data_numbers.csv'

# delimiter读取csv用,分割;unpack默认False,按行读取数据,unpack=True按列读取数据,转置;dtype数据类型,默认科学计数方式;skiprows跳过前X行;usecols读取指定列,索引,元祖类型
t1 = np.loadtxt(us_file_path, delimiter=',', dtype=np.int)
print(t1, '\n', '*'*100)
t2 = np.loadtxt(us_file_path, delimiter=',', dtype=np.int, unpack=True)
print(t2)

[[4394029  320053    5931   46245]
 [7860119  185853   26679       0]
 [5845909  576597   39774  170708]
 ...
 [ 142463    4231     148     279]
 [2162240   41032    1384    4737]
 [ 515000   34727     195    4722]] 
 ****************************************************************************************************
[[4394029 7860119 5845909 ...  142463 2162240  515000]
 [ 320053  185853  576597 ...    4231   41032   34727]
 [   5931   26679   39774 ...     148    1384     195]
 [  46245       0  170708 ...     279    4737    4722]]

 

转置三种方法和简单运算

# 转置三种方法

In [2]: import numpy as np

In [3]: t2 = np.arange(24).reshape((4,6))

In [4]: t2
Out[4]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [6]: t2.transpose()          # 转置
Out[6]:
array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])

In [7]: t2.T                    # 转置
Out[7]:
array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])

In [8]: t2.swapaxes(1,0)        # 交换轴
Out[8]:
array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])

In [9]: t2
Out[9]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [10]: t2<10
Out[10]:
array([[ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False]])

In [11]: t2[t2<10]=3

In [12]: t2
Out[12]:
array([[ 3,  3,  3,  3,  3,  3],
       [ 3,  3,  3,  3, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [13]: t2[t2>20]
Out[13]: array([21, 22, 23])

In [16]: t2
Out[16]:
array([[ 3,  3,  3,  3,  3,  3],
       [ 3,  3,  3,  3, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [17]: np.where(t2<=3,100,300)            # numpy三元运算符,t2 = 100 if t2<=3 else 300
Out[17]:
array([[100, 100, 100, 100, 100, 100],
       [100, 100, 100, 100, 300, 300],
       [300, 300, 300, 300, 300, 300],
       [300, 300, 300, 300, 300, 300]])

In [19]: t = np.arange(20)

In [20]: t
Out[20]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [21]: t.clip(10,18)                      # clip(10,18),小于10的替换成10,大于18的替换成18
Out[21]:
array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 18])

In [22]: t[2]=np.nan
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-22-1aa5d7dd59fe> in <module>
----> 1 t[2]=np.nan

ValueError: cannot convert float NaN to integer

In [23]: t=t.astype(float)

In [24]: t[2]=np.nan

In [25]: t
Out[25]:
array([ 0.,  1., nan,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 17., 18., 19.]) 

  

索引和切片

import numpy as np

us_file_path = './youtube_video_data/US_video_data_numbers.csv'
uk_file_path = './youtube_video_data/GB_video_data_numbers.csv'

# delimiter读取csv用,分割;unpack默认False,按行读取数据,unpack=True按列读取数据,转置;dtype数据类型,默认科学计数方式;skiprows跳过前X行;usecols读取指定列,索引,元祖类型
t1 = np.loadtxt(us_file_path, delimiter=',', dtype=np.int)
print(t1, '\n', '*'*100)
t2 = np.loadtxt(us_file_path, delimiter=',', dtype=np.int, unpack=True)
# print(t2)
# 取行
# print(t1[2])
# 取多行
# print(t1[2:])
# 取指定行,2,8,10为索引
print(t1[[2, 8, 10]], '\n', '*'*100)
# 取连续列和列,[1, :]、[2, :]、[[2, 3, 10], :]、[2, [0, 2]] 逗号前为指定行,逗号后为指定列
# print(t1[1, :])
# print(t1[2, :])
# print(t1[[2, 3, 10], :])
# print(t1[2, [0, 2]])
print(t1[2:5, 1:4], '\n', '*'*100)
# 取指定行和列,[[0, 2, 3], [0, 1, 3]]不是取索引为0,2,3的行和0,1,3的列,取的是t1坐标(0,2),(2,1),(3,3)的数据
print(t1[[0, 2, 3], [0, 1, 3]])


[[4394029  320053    5931   46245]
 [7860119  185853   26679       0]
 [5845909  576597   39774  170708]
 ...
 [ 142463    4231     148     279]
 [2162240   41032    1384    4737]
 [ 515000   34727     195    4722]] 
 ****************************************************************************************************
[[5845909  576597   39774  170708]
 [1338533   69687     678    5643]
 [ 859289   34485     726    1914]] 
 ****************************************************************************************************
[[576597  39774 170708]
 [ 24975   4542  12829]
 [ 96666    568   6666]] 
 ****************************************************************************************************
[4394029  576597   12829] 

  

数组的拼接

In [31]: t1
Out[31]:
array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [32]: t2
Out[32]:
array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [33]: np.vstack((t1,t2))     # 竖直拼接
Out[33]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [34]: np.hstack((t1,t2))     # 水平拼接
Out[34]:
array([[ 0,  1,  2,  3,  8,  9, 10, 11],
       [ 4,  5,  6,  7, 12, 13, 14, 15]])
 

数组的行列交换
In [39]: t = np.arange(12,24).reshape(3,4)

In [40]: t
Out[40]:
array([[12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [41]: t[[1,2],:] = t[[2,1],:]    # 行交换

In [42]: t
Out[42]:
array([[12, 13, 14, 15],
       [20, 21, 22, 23],
       [16, 17, 18, 19]])

In [43]: t[:,[0,2]] = t[:,[2,0]]    # 列交换

In [44]: t
Out[44]:
array([[14, 13, 12, 15],
       [22, 21, 20, 23],
       [18, 17, 16, 19]])

  

numpy中nan和常用方法

In [39]: np.nan == np.nan
Out[39]: False

In [40]: np.nan != np.nan
Out[40]: True

In [49]: t
Out[49]:
array([[0., 1., 2., 3., 4.],
       [5., 6., 7., 8., 9.]])

In [50]: t[:,0] = 0

In [51]: t
Out[51]:
array([[0., 1., 2., 3., 4.],
       [0., 6., 7., 8., 9.]])

In [52]: np.count_nonzero(t)        # 判断t数组中非0个数
Out[52]: 8

In [56]: t[:,0] = np.nan

In [57]: t
Out[57]:
array([[nan,  1.,  2.,  3.,  4.],
       [nan,  6.,  7.,  8.,  9.]])

In [58]: t != t             # 当t!=t时,数组中为nan时为True
Out[58]:
array([[ True, False, False, False, False],
       [ True, False, False, False, False]])

In [59]: np.count_nonzero(t!=t)     # 数组t中为nan的个数
Out[59]: 2

In [60]: np.isnan(t)                # 数组中为nan时为True
Out[60]:
array([[ True, False, False, False, False],
       [ True, False, False, False, False]])

In [61]: np.count_nonzero(np.isnan(t))      # 数组t中为nan的个数
Out[61]: 2

In [62]: np.sum(t)      # nan和任何值的计算都为nan
Out[62]: nan

In [63]: np.sum(t,axis=0)       # sum(t,axis=0) 列相加结果
Out[63]: array([nan,  7.,  9., 11., 13.])

In [64]: np.sum(t,axis=1)       # sum(t,axis=1) 行相加结果
Out[64]: array([nan, nan])

  

numpy中常用统计方法

In [75]: t
Out[75]:
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [76]: t.sum(axis=0)
Out[76]: array([ 5,  7,  9, 11, 13])

In [77]: t.sum(axis=1)
Out[77]: array([10, 35])

In [78]: t.mean(axis=0)
Out[78]: array([2.5, 3.5, 4.5, 5.5, 6.5])

In [79]: t.mean(axis=1)     # 均值,当数组中有nan时,剩余非nan元素的均值替换nan
Out[79]: array([2., 7.])

In [80]: np.median(t)       # 中值
Out[80]: 4.5

In [81]: np.median(t,axis=0)
Out[81]: array([2.5, 3.5, 4.5, 5.5, 6.5])

In [82]: np.median(t,axis=1)
Out[82]: array([2., 7.])

In [83]: t.max()
Out[83]: 9

In [84]: t.max(axis=0)
Out[84]: array([5, 6, 7, 8, 9])

In [85]: t.max(axis=1)
Out[85]: array([4, 9])

In [86]: t.min(axis=1)
Out[86]: array([0, 5])

In [87]: np.ptp(t,axis=0)       # 极值,最大值和最小值差
Out[87]: array([5, 5, 5, 5, 5])

In [88]: np.ptp(t,axis=1)
Out[88]: array([4, 4])

In [92]: t
Out[92]:
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [93]: t.std()            # 标准差
Out[93]: 2.8722813232690143

In [94]: t.std(axis=0)
Out[94]: array([2.5, 2.5, 2.5, 2.5, 2.5])

In [95]: t.std(axis=1)
Out[95]: array([1.41421356, 1.41421356])

   

import numpy as np

us_file_path = './youtube_video_data/US_video_data_numbers.csv'
uk_file_path = './youtube_video_data/GB_video_data_numbers.csv'

# 加载国家数据
us_data = np.loadtxt(us_file_path, delimiter=',', dtype='int')
uk_data = np.loadtxt(uk_file_path, delimiter=',', dtype='int')

# 添加国家信息
# 构造全为0数据
zeros_data = np.zeros((us_data.shape[0], 1)).astype(int)
ones_data = np.ones((uk_data.shape[0], 1)).astype(int)

# 分别添加一列全为0,1的数组
us_data = np.hstack((us_data, zeros_data))
uk_data = np.hstack((uk_data, ones_data))

# 拼接两组数据,最后一列全为0的代表us,为1的代表uk
final_data = np.vstack((us_data, uk_data))
print(final_data)

# 把二个国家数据拼接一起研究分析数据
import numpy as np

us_file_path = './youtube_video_data/US_video_data_numbers.csv'
uk_file_path = './youtube_video_data/GB_video_data_numbers.csv'

# 加载国家数据
us_data = np.loadtxt(us_file_path, delimiter=',', dtype='int')
uk_data = np.loadtxt(uk_file_path, delimiter=',', dtype='int')

# 添加国家信息
# 构造全为0数据
zeros_data = np.zeros((us_data.shape[0], 1)).astype(int)
ones_data = np.ones((uk_data.shape[0], 1)).astype(int)

# 分别添加一列全为0,1的数组
us_data = np.hstack((us_data, zeros_data))
uk_data = np.hstack((uk_data, ones_data))

# 拼接两组数据,最后一列全为0的代表us,为1的代表uk
final_data = np.vstack((us_data, uk_data))
print(final_data)
把二个国家数据拼接

 

# numpy更多方法
In [1]: import numpy as np

In [2]: np.ones((3,4))      # 创建全为1的数组
Out[2]:
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [3]: np.zeros((3,4))     # 创建全为0的数组
Out[3]:
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [4]: np.eye(5)           # 创建对角线全为1的正方形数组
Out[4]:
array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [5]: t = np.eye(5)

In [6]: np.argmax(t,axis=0)         # 获取最大值位置
Out[6]: array([0, 1, 2, 3, 4])

In [7]: t[t==1] = -1

In [8]: t
Out[8]:
array([[-1.,  0.,  0.,  0.,  0.],
       [ 0., -1.,  0.,  0.,  0.],
       [ 0.,  0., -1.,  0.,  0.],
       [ 0.,  0.,  0., -1.,  0.],
       [ 0.,  0.,  0.,  0., -1.]])

In [9]: np.argmin(t,axis=1)         # 获取最小值位置
Out[9]: array([0, 1, 2, 3, 4])

  

numpy随机方法

In [12]: np.random.rand(2,3)        # 创建二维三列的均匀分布范围0~1浮点型数组
Out[12]:
array([[0.06364283, 0.91082238, 0.78795567],
       [0.0627046 , 0.33476692, 0.5778516 ]])

In [13]: np.random.randn(2,3)       # 创建二维三列的标准正态分布随机数,平均数为0标准差为1浮点型数组
Out[13]:
array([[ 0.06391798,  0.19011529, -0.17431257],
       [-0.45543116, -0.02290774,  0.11979098]])

In [14]: np.random.randint(0,100,(2,3))     # 创建二维三列最低为0最高为99的随机整数
Out[14]:
array([[13, 98, 78],
       [36, 59, 97]])

In [20]: np.random.uniform(0,5,(2,3))       # 创建二维三列最低为0最高为99的随机浮点型小数
Out[20]:
array([[4.95142868, 1.39926247, 4.21451073],
       [2.25316875, 2.67873448, 2.84466319]])


np.random.seed(0)    # 随机数种子,这样每次生成相同的随机数,参数为给定的种子值
t = np.random.randint(0, 10, (2, 3))
print(t)

  

 

posted @ 2019-08-27 16:28  许二哈哈哈  阅读(175)  评论(0编辑  收藏  举报