numpy数组

1. numpy数组

1.1 创建数组

1.1.1 创建一维数组

import numpy as np

a = np.array( [1,2,3,4,5] )
print(a)
print(a.dtype)
print(a.shape)

[1 2 3 4 5]
int32
(5,)

a = np.arange(5)
print(a)

[0 1 2 3 4]

1.1.2 创建多维数组

m = np.array( [[1,2], [3,4]] )
print( m )
print(m.shape)
print(m.dtype)

[[1 2]
 [3 4]]
(2, 2)
int32

m = np.array([np.arange(2), np.arange(2)])
print(m)

[[0 1]
 [0 1]]

#创建特殊的数组: zeros, empty, ones, full, zeors_like, empty_like, ones_like, full_like

print( np.zeros(10) )          # 创建全0数组

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

print( np.zeros((3, 6)) )      # 创建多维全0数组

[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]

print( np.empty((2, 3, 2)) )   # 创建空数组, 不初始化数据

[[[1.50671562e-312 0.00000000e+000]
  [2.26839301e+161 2.89847616e+131]
  [6.02182527e+151 9.30537465e+199]]

 [[2.20835466e-094 9.78750380e+199]
  [6.97843734e+252 8.76739361e+252]
  [2.15895723e+227 4.05173898e-317]]]

print( np.ones((3, 3)) )       # 创建全1数组

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]

print( np.full((3,3), 250))    # 创建数组，全部填充为250

[[250 250 250]
 [250 250 250]
 [250 250 250]]

1.1.3 数组的属性

b = np.arange(24).reshape(2,12)
print( b.dtype )        # 元素的数据类型
print( b.ndim )         # 维度
print( b.size )         # 大小
print( b.itemsize )     # 每个元素大小
print( b.nbytes )       # 数组大小

b = np.array([ 1.+1.j,  3.+2.j])
print( b.real )         # 实部
print( b.imag )         # 虚部

[1. 3.]
[1. 2.]

1.1.4 数组的转换

b = np.array([1, 3])
print( b )

[1 3]

print( b.tolist() )         # 转成python列表

[1, 3]

print( b.tostring() )       # 转成字符串
print( np.frombuffer(b'\x01\x00\x00\x00\x03\x00\x00\x00', dtype=int) )   # 转成数组
print( np.frombuffer(b'\x01\x00\x00\x00\x03\x00\x00\x00', dtype=np.int8) )   # 转成数组, 按每个字节

b'\x01\x00\x00\x00\x03\x00\x00\x00'
[1 3]
[1 0 0 0 3 0 0 0]

c = b.astype(str)           # 转成字符串数组
print( c )                  
print( c.astype(int) )      # 转回int数组

['1' '3']
[1 3]

print( np.fromstring('1:2:3:4', sep=':', dtype=int) )       # 从字符串中以分割符转换数组

[1 2 3 4]

1.2 选择数组元素

a = np.array([[1,2],[3,4]])
print(a)

[[1 2]
 [3 4]]

print(a[0,0])
print(a[0,1])
print(a[1,0])
print(a[1,1])

1.3 数据类型

1.3.1 基本数据类型

print(np.float64(42))

42.0

print(np.int8(42.0))

print(np.bool(42))

True

print(np.bool(0))

False

print( np.bool(42.0))

True

print(np.float(True))

1.0

print(np.float(False))

0.0

print( np.arange(7, dtype=np.uint16) )

[0 1 2 3 4 5 6]

# 产生TypeError异常 can't convert complex to float
import traceback

try:
    print(np.float(42.0 + 1.j))  
except TypeError:    
    print('Type Error')

Type Error

1.3.2 数据类型转换

arr = np.array([1, 2, 3, 4, 5])
print(arr.dtype, arr)
float_arr = arr.astype(np.float64)
print(float_arr.dtype, float_arr)

int32 [1 2 3 4 5]
float64 [1. 2. 3. 4. 5.]

arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
print(arr.dtype, arr)
int_arr = arr.astype(np.int32)
print(int_arr.dtype, int_arr )

float64 [ 3.7 -1.2 -2.6  0.5 12.9 10.1]
int32 [ 3 -1 -2  0 12 10]

1.3.3 字符串数组

str_arr = np.array(['好天','好天','学向','习上'])     # unicode字符串
print( str_arr )

['好天' '好天' '学向' '习上']

num_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_ )  # byte字符串
print( num_strings )

[b'1.25' b'-9.6' b'42']

print( num_strings.astype( float ) )  # 可以转换为float数组

[ 1.25 -9.6  42.  ]

1.3.4 数据类型对象

a = np.array([[1,2],[3,4]])
print( a.dtype.byteorder )         # 字节顺序 '=','\': 代表本地或不适用；'<': 代表小头；'>': 代表大头
print( a.dtype.itemsize )          # 数据类型对象的元素大小

=
4

print(np.arange(7, dtype='f'))     # 也可以使用缩写数据类型
print(np.arange(7, dtype='D'))

[0. 1. 2. 3. 4. 5. 6.]
[0.+0.j 1.+0.j 2.+0.j 3.+0.j 4.+0.j 5.+0.j 6.+0.j]

print(np.dtype(float))
print(np.dtype('f8'))
print(np.dtype('d'))

float64
float64
float64

print(np.dtype('f'))

float32

t = np.dtype('float64')
print(t.char)
print(t.type)
print(t.str)

d
<class 'numpy.float64'>
<f8

1.4 创建自定义数据类型

custom_type = np.dtype([('title', np.str_, 40), ('num_in_stock', np.int32), ('price', np.float32)])
print(custom_type)
print(custom_type['title'])
products = np.array([('铅笔', 42, 2.5), ('小刀', 13, 5.6)], dtype=custom_type)
print(products[1])
print('库存中有', products[1]['num_in_stock'], '个', products[1]['title'] )

[('title', '<U40'), ('num_in_stock', '<i4'), ('price', '<f4')]
<U40
('小刀', 13, 5.6)
库存中有 13 个 小刀

1.5. 数组与标量的运算

arr = np.array([[1., 2., 3.], [4., 5., 6.]])
print(arr)

[[1. 2. 3.]
 [4. 5. 6.]]

print(arr * arr)

[[ 1.  4.  9.]
 [16. 25. 36.]]

print(arr - arr)

[[0. 0. 0.]
 [0. 0. 0.]]

print(1 / arr)

[[1.         0.5        0.33333333]
 [0.25       0.2        0.16666667]]

print(arr ** 0.5)

[[1.         1.41421356 1.73205081]
 [2.         2.23606798 2.44948974]]

1.6 数组的索引与切片

1.6.1 一维数组

a = np.arange(9)
print( a )

[0 1 2 3 4 5 6 7 8]

print( a[3:7] )    # 3到7，共4个

[3 4 5 6]

print( a[:7:2] )   # 0到7，每2个取

[0 2 4 6]

print( a[::-1] )   # 反转数组

[8 7 6 5 4 3 2 1 0]

s = slice(3,7,2)   
print( a[s] )

[3 5]

s = slice(None, None, -1)
print( a[s] )

[8 7 6 5 4 3 2 1 0]

1.6.2 多维数组

# 比方说我家的家属楼，这个楼有3个单元(或叫楼门），每个单元有4层，每层有3户（左中右门）
# 第1位是单元，第2位是楼层，第3位是房号
b = np.empty((3,4,3), dtype='i')
for u in range(3):
    for l in range(4):
        for d in range(3):
            b[u,l,d] = ( u + 1 ) * 100 + ( l + 1 ) * 10 + d + 1
print( b )

[[[111 112 113]
  [121 122 123]
  [131 132 133]
  [141 142 143]]

 [[211 212 213]
  [221 222 223]
  [231 232 233]
  [241 242 243]]

 [[311 312 313]
  [321 322 323]
  [331 332 333]
  [341 342 343]]]

print( b[0,0,0] )     # 取第1单元第1层第1户

print( b[:,0,0] )     # 取每个单元的第1层第1户

[111 211 311]

print( b[0, :, :] )   # 取第1个单元所有户
print( b[0] )         # 冒号可以省略
print( b[0, ...] )    # 也可以以三个点代替

[[111 112 113]
 [121 122 123]
 [131 132 133]
 [141 142 143]]
[[111 112 113]
 [121 122 123]
 [131 132 133]
 [141 142 143]]
[[111 112 113]
 [121 122 123]
 [131 132 133]
 [141 142 143]]

print( b[0,1] )      # 取第1个单元第2层所有户

[121 122 123]

print( b[0,1,::2] )   # 取第1个单元第2层，每隔2户取1个，就是不取中门，取左右门

[121 123]

print( b[...,1] )     # 取每个单元每个楼层第2户人家，就是所有的中门人家

[[112 122 132 142]
 [212 222 232 242]
 [312 322 332 342]]

print( b[:,1] )      # 取所有单元的第2层的所有人家

[[121 122 123]
 [221 222 223]
 [321 322 323]]

print( b[0,:,1] )    # 取第1单元的所有楼层的第2户人家，就是第1单元的所有中门了

[112 122 132 142]

print( b[0,:,-1] )      # 取第1单元的所有楼层的最后那户人家，就是第1单元的所有右门了
print( b[0,::-1, -1] )  # 与上一样，只是逆序取

[113 123 133 143]
[143 133 123 113]

print( b[0,::2,-1] )    # 取第1个单元，每隔2层取（就是1，4层），最后1户（就是右门）

[113 133]

print( b[::-1] )        # 所有，单元逆序取，就是第3，第2，第1单元所有户人家

[[[311 312 313]
  [321 322 323]
  [331 332 333]
  [341 342 343]]

 [[211 212 213]
  [221 222 223]
  [231 232 233]
  [241 242 243]]

 [[111 112 113]
  [121 122 123]
  [131 132 133]
  [141 142 143]]]

s = slice(None, None, -1)  # 所有数据逆序取
print( b[(s, s, s)] )

[[[343 342 341]
  [333 332 331]
  [323 322 321]
  [313 312 311]]

 [[243 242 241]
  [233 232 231]
  [223 222 221]
  [213 212 211]]

 [[143 142 141]
  [133 132 131]
  [123 122 121]
  [113 112 111]]]

1.6.3 布尔型索引

# 比方说有3台位于不同地点的服务器，每个服务器不定时采集一定的数据
# 数据如下，有7行，每一行都是下面相应的服务器采集的，如第1行，第4行是北京站服务器采集的数据
data = np.random.rand(7, 4) * 100
print( data )
servers = np.array(['北京站', '上海站', '广州站', '北京站', '广州站', '上海站', '上海站'])
print( servers  )

[[25.88951926 81.02127917 81.65911148 10.54582293]
 [19.88202433 71.59153418 17.59577045 17.9124011 ]
 [28.26739131 42.55828296  5.35820796 49.74906725]
 [70.79558748 38.60725419 84.20858241 80.57925252]
 [97.09357428 32.93812722 81.46985854 98.7732454 ]
 [56.46702002 59.03617618 99.32236983 40.70151618]
 [77.99103033 53.07227406 67.61787932 37.19828265]]
['北京站' '上海站' '广州站' '北京站' '广州站' '上海站' '上海站']

print( servers == '北京站' )
print( data[servers == '北京站'] )    # 取北京站采集的相关数据

[ True False False  True False False False]
[[25.88951926 81.02127917 81.65911148 10.54582293]
 [70.79558748 38.60725419 84.20858241 80.57925252]]

print( data[servers == '北京站', 2:] )  # 从第3个开始取北京站采集的数据

[[81.65911148 10.54582293]
 [84.20858241 80.57925252]]

print( data[servers == '北京站', 3] )   # 取北京站所有采集数据的第4条数据

[10.54582293 80.57925252]

print( data[(servers != '北京站')] )    # 取所有非北京站的数据
print( data[~(servers == '北京站')] )   # 同上，用波浪号~代表非

[[19.88202433 71.59153418 17.59577045 17.9124011 ]
 [28.26739131 42.55828296  5.35820796 49.74906725]
 [97.09357428 32.93812722 81.46985854 98.7732454 ]
 [56.46702002 59.03617618 99.32236983 40.70151618]
 [77.99103033 53.07227406 67.61787932 37.19828265]]
[[19.88202433 71.59153418 17.59577045 17.9124011 ]
 [28.26739131 42.55828296  5.35820796 49.74906725]
 [97.09357428 32.93812722 81.46985854 98.7732454 ]
 [56.46702002 59.03617618 99.32236983 40.70151618]
 [77.99103033 53.07227406 67.61787932 37.19828265]]

data[(servers == '上海站') | (servers == '广州站')]  #取所有上海站和广州站的数据

array([[19.88202433, 71.59153418, 17.59577045, 17.9124011 ],
       [28.26739131, 42.55828296,  5.35820796, 49.74906725],
       [97.09357428, 32.93812722, 81.46985854, 98.7732454 ],
       [56.46702002, 59.03617618, 99.32236983, 40.70151618],
       [77.99103033, 53.07227406, 67.61787932, 37.19828265]])

data[data < 50] = 0      # 将所有小于50的数据改为0
print( data )

[[ 0.         81.02127917 81.65911148  0.        ]
 [ 0.         71.59153418  0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [70.79558748  0.         84.20858241 80.57925252]
 [97.09357428  0.         81.46985854 98.7732454 ]
 [56.46702002 59.03617618 99.32236983  0.        ]
 [77.99103033 53.07227406 67.61787932  0.        ]]

data[servers != '北京站'] = 0   # 将所有非北京站的数据清零
print( data )

[[ 0.         81.02127917 81.65911148  0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [70.79558748  0.         84.20858241 80.57925252]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]]

1.6.4 其它索引

arr = np.arange(32).reshape((8, 4))
print( arr )

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]

arr[[2, 5, 6]]   # 取2，5，6行

array([[ 8,  9, 10, 11],
       [20, 21, 22, 23],
       [24, 25, 26, 27]])

arr[[-3, -5, -7]]  # 取倒数第3，5，7 行

array([[20, 21, 22, 23],
       [12, 13, 14, 15],
       [ 4,  5,  6,  7]])

arr[[1, 5, 7, 2], [0, 3, 1, 2]]  # 取[1,0],[5,3],[7,1],[2,2]

array([ 4, 23, 29, 10])

arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]  # 取1，5，7，2行所有数据，按0，3，1，2顺序

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

1.7 数组操作

1.7.1 数组转置

arr = np.arange(15).reshape((3, 5))
print(arr)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]

print( arr.transpose() )   # 转置数组
print( arr.T )             # 同上

[[ 0  5 10]
 [ 1  6 11]
 [ 2  7 12]
 [ 3  8 13]
 [ 4  9 14]]
[[ 0  5 10]
 [ 1  6 11]
 [ 2  7 12]
 [ 3  8 13]
 [ 4  9 14]]

1.7.2 改变数组的维度

b = np.arange(24).reshape(2,3,4)
print(b)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]

print( b.flat )            # 平面枚举器，将数组降维到1维数组
print( b.flat[10] )        
print( b.flat[2:7] )

<numpy.flatiter object at 0x05247488>
10
[2 3 4 5 6]

b_1 = b.flatten()    # 多维降为1维，返回的数组是一份copy, 不影响原数组
print( b_1 )      
b_1[0] = 100
print( b )

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]

b_1 = b.ravel()    # 多维降为1维，但返回的数组是一份引用，修改后会影响原数组
print( b_1 )
b_1[0] = 100
print( b )

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[[[100   1   2   3]
  [  4   5   6   7]
  [  8   9  10  11]]

 [[ 12  13  14  15]
  [ 16  17  18  19]
  [ 20  21  22  23]]]

b.shape = (6,4)
print( b )
b.resize((2,12))
print( b )

[[100   1   2   3]
 [  4   5   6   7]
 [  8   9  10  11]
 [ 12  13  14  15]
 [ 16  17  18  19]
 [ 20  21  22  23]]
[[100   1   2   3   4   5   6   7   8   9  10  11]
 [ 12  13  14  15  16  17  18  19  20  21  22  23]]

1.7.3 数组的组合

a = np.arange(9).reshape(3,3)
print( a )
b = 100 + a
print( b )

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[100 101 102]
 [103 104 105]
 [106 107 108]]

print( np.column_stack((a, b)) )         # 列组合 
print(np.hstack((a, b)))                 # 水平组合( horizontal stack ),效果同上
print(np.concatenate((a, b), axis=1) )   # 同上

[[  0   1   2 100 101 102]
 [  3   4   5 103 104 105]
 [  6   7   8 106 107 108]]
[[  0   1   2 100 101 102]
 [  3   4   5 103 104 105]
 [  6   7   8 106 107 108]]
[[  0   1   2 100 101 102]
 [  3   4   5 103 104 105]
 [  6   7   8 106 107 108]]

print( np.row_stack((a, b)) )            # 行组合
print( np.vstack((a, b)) )               # 垂直组合 ( vertical stack )，效果同上
print( np.concatenate((a, b), axis=0) )  # 同上

[[  0   1   2]
 [  3   4   5]
 [  6   7   8]
 [100 101 102]
 [103 104 105]
 [106 107 108]]
[[  0   1   2]
 [  3   4   5]
 [  6   7   8]
 [100 101 102]
 [103 104 105]
 [106 107 108]]
[[  0   1   2]
 [  3   4   5]
 [  6   7   8]
 [100 101 102]
 [103 104 105]
 [106 107 108]]

print( np.dstack((a, b)) )               # 深度组合( depth stack )

[[[  0 100]
  [  1 101]
  [  2 102]]

 [[  3 103]
  [  4 104]
  [  5 105]]

 [[  6 106]
  [  7 107]
  [  8 108]]]

1.7.4 数组的分割

a = np.arange(9).reshape(3, 3)
print( a )

[[0 1 2]
 [3 4 5]
 [6 7 8]]

print( np.vsplit(a, 3) )                # 垂直分割( vertical split )    
print( np.split(a, 3, axis=0) )         # 同上

[array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]
[array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]

print( np.hsplit(a, 3) )                # 水平分割( horizontabl split)
print( np.split(a, 3, axis=1) )         # 同上

[array([[0],
       [3],
       [6]]), array([[1],
       [4],
       [7]]), array([[2],
       [5],
       [8]])]
[array([[0],
       [3],
       [6]]), array([[1],
       [4],
       [7]]), array([[2],
       [5],
       [8]])]

c = np.arange(27).reshape(3, 3, 3)
print( c )

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]

print( np.dsplit(c, 3) )              # 深度分割 ( depth split )

[array([[[ 0],
        [ 3],
        [ 6]],

       [[ 9],
        [12],
        [15]],

       [[18],
        [21],
        [24]]]), array([[[ 1],
        [ 4],
        [ 7]],

       [[10],
        [13],
        [16]],

       [[19],
        [22],
        [25]]]), array([[[ 2],
        [ 5],
        [ 8]],

       [[11],
        [14],
        [17]],

       [[20],
        [23],
        [26]]])]

posted @ 2018-11-02 11:14 hanjackcyw 阅读(197) 评论(0) 编辑收藏举报

刷新页面返回顶部

hanjackcyw

numpy数组

1. numpy数组

1.1 创建数组

1.1.1 创建一维数组

1.1.2 创建多维数组

1.1.3 数组的属性

1.1.4 数组的转换

1.2 选择数组元素

1.3 数据类型

1.3.1 基本数据类型

1.3.2 数据类型转换

1.3.3 字符串数组

1.3.4 数据类型对象

1.4 创建自定义数据类型

1.5. 数组与标量的运算

1.6 数组的索引与切片

1.6.1 一维数组

1.6.2 多维数组

1.6.3 布尔型索引

1.6.4 其它索引

1.7 数组操作

1.7.1 数组转置

1.7.2 改变数组的维度

1.7.3 数组的组合

1.7.4 数组的分割

公告