4-1张量的结构操作——eat_tensorflow2_in_30_days

4-1张量的结构操作

张量的操作主要包括张量的结构操作和张量的数学运算

张量的结构操作诸如：张量创建，索引切片，维度变换，合并分割

张量数学运算主要有：标量运算，向量运算，矩阵运算。另外，这里会介绍张量运算的广播机制

创建张量

张量创建的许多方法和numpy中创建array的方法很像

import tensorflow as tf
import numpy as np

a = tf.constant([1,2,3], dtype = tf.float32)
tf.print(a)

"""
[1 2 3]
"""

# tf.range(start,limit=None,delta=1,name='range') 返回一个tensor等差数列，
# 该tensor中的数值在start到limit之间，不包括limit，delta是等差数列的差值。
b = tf.range(1, 10, delta=2)
tf.print(b)
print(b)

"""
[1 3 5 7 9]
tf.Tensor([1 3 5 7 9], shape=(5,), dtype=int32)
"""

# tf.linspace(start,stop,num,name=None) 返回一个tensor，
# 该tensor中的数值在start到stop区间之间取等差数列（包含start和stop）
c = tf.linspace(0.0, 2*3.14, 100)
tf.print(c, len(c))

"""
[0 0.0634343475 0.126868695 ... 6.15313148 6.21656609 6.28] 100
"""

d = tf.zeros([3, 3])
tf.print(d)

"""
[[0 0 0]
 [0 0 0]
 [0 0 0]]
"""

a = tf.ones([3, 3])  # 创建全1矩阵
b = tf.ones_like(a)  # 新建一个与给定的tensor类型大小一致的tensor，其所有元素为1
c = tf.zeros_like(a, dtype=tf.float32)  # 新建一个与给定tensor大小一致，类型自定义的tensor，其所有元素为0
tf.print(a)
tf.print(b)
tf.print(c)

"""
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[0 0 0]
 [0 0 0]
 [0 0 0]]
"""

# tf.fill(dim, value, name=None)创建一个形状大小为dim的tensor，其初始值为value
b =  tf.fill([3, 2], 5)
tf.print(b)

"""
[[5 5]
 [5 5]
 [5 5]]
"""

# 均匀分布，元素服从minval和maxval之间的均匀分布
tf.random.set_seed(42)
a = tf.random.uniform([5], minval=0, maxval=10)
tf.print(a)

"""
[6.6456213 4.41006756 3.52882504 4.64482546 0.336604118]
"""

# 正态分布
b = tf.random.normal([3, 3], mean=0.0, stddev=1.0)
tf.print(b)

"""
[[0.0842245817 -0.86090374 0.378123045]
 [-0.00519627379 -0.494531959 0.61781919]
 [-0.330820471 -0.00138408062 -0.423734099]]
"""

# 正态分布，剔除2倍方差以外2数据重新生成
c = tf.random.truncated_normal((5, 5), mean=0.0, stddev=1.0, dtype=tf.float32)
tf.print(c)

"""
[[-0.559097409 -0.534721375 -1.57259309 0.805505633 -0.00413081655]
 [0.172509521 0.292330414 0.447463274 -0.205126196 1.06962538]
 [-1.62758112 0.45803377 -0.150247112 -1.12316787 1.42139065]
 [-0.589732349 0.156277 0.379462808 -1.34369051 0.0508602373]
 [0.481147289 -1.28629398 0.173078209 -0.173267394 1.01196456]]
"""

# 特殊矩阵
I = tf.eye(3, 3)  # 单位矩阵
tf.print(I)
tf.print("")

t = tf.linalg.diag([1, 2, 3])  # 对角阵
tf.print(t)

"""
[[1 0 0]
 [0 1 0]
 [0 0 1]]

[[1 0 0]
 [0 2 0]
 [0 0 3]]
"""

索引切片

张量的索引切片方式和numpy几乎是一样的。切片时支持缺省参数和省略号
对于tf.Variable,可以通过索引和切片对部分元素进行修改
对于提取张量的连续子区域，也可以使用tf.slice.
此外，对于不规则的切片提取,可以使用tf.gather, tf.gather_nd, tf.boolean_mask
tf.boolean_mask功能最为强大，它可以实现tf.gather, tf.gather_nd的功能，并且tf.boolean_mask还可以实现布尔索引
如果要通过修改张量的某些元素得到新的张量，可以使用tf.where, tf.scatter_nd

tf.random.set_seed(42)
t = tf.random.uniform([5, 5], minval=0, maxval=10, dtype=tf.int32)
tf.print(t)

"""
[[7 9 1 6 2]
 [4 3 3 1 1]
 [2 0 1 1 0]
 [8 9 2 9 9]
 [1 2 7 4 9]]
"""

tf.print("第0行 t[0]:", t[0])
tf.print("倒数第一行 t[-1]:", t[-1])

# 第一行第三列
tf.print("第一行第三列 t[1, 3]:", t[1, 3])
tf.print("第一行第三列 t[1][3]:", t[1][3])

"""
第0行 t[0]: [7 9 1 6 2]
倒数第一行 t[-1]: [1 2 7 4 9]
第一行第三列 t[1, 3]: 1
第一行第三列 t[1][3]: 1
"""

# 第一行至第三行
tf.print("第一行至第三行 \n")
tf.print("t[1:4, :]", "\n", t[1:4, :])

# tf.slice(input, begin_vector, size_vector)
tf.print("\n tf.slice切片法")
tf.print(tf.slice(t, [1, 0], [3, 5]))  # 第一行第0列开始切，切3行5列

"""
第一行至第三行 

t[1:4, :] 
 [[4 3 3 1 1]
 [2 0 1 1 0]
 [8 9 2 9 9]]

 tf.slice切片法
[[4 3 3 1 1]
 [2 0 1 1 0]
 [8 9 2 9 9]]
"""

# 第一行至最后一行，第0列到最后一列每隔两列取一列
tf.print(t[1:, ::2])

"""
[[4 3 1]
 [2 1 0]
 [8 2 9]
 [1 7 9]]
"""

# 第一行至倒数第二行，第0列到倒数第二列每隔两列取一列
tf.print(t[1:-1, :-1:2])

"""
[[4 3]
 [2 1]
 [8 2]]
"""

# 对变量来说，还可以使用索引和切片修改部分元素
x = tf.Variable([[1, 2], [3, 4]], dtype=tf.float32)
x[1, :].assign(tf.constant([0.0, 0.0]))
tf.print(x)

"""
[[1 2]
 [0 0]]
"""

a = tf.random.uniform([3, 3, 3], minval=0, maxval=10, dtype=tf.int32)
tf.print(a)

"""
[[[8 3 9]
  [4 2 3]
  [4 2 6]]

 [[4 1 3]
  [6 0 9]
  [9 0 1]]

 [[4 7 0]
  [8 1 6]
  [2 4 9]]]
"""

# 省略号可以表示多个冒号
tf.print(a[..., 1])  # 第一个维度和第二个维度全部取，第三个维度取第二列(维度1-三个矩阵，维度2-行，维度3-列)

"""
[[3 2 2]
 [1 0 0]
 [7 1 4]]
"""

以上切片方式相对规则，对于不规则的切片提取,可以使用tf.gather, tf.gather_nd, tf.boolean_mask。

tf.gather: 类似于数组的索引，可以把向量中某些索引值提取出来，得到新的向量，适用于要提取的索引为不连续的情况。根据indices从params的指定轴axis索引元素(类似于仅能在指定轴进行一维索引).

tf.gather_nd:将params索引为indices指定形状的切片数组中(indices代表索引后的数组形状) indices将切片定义为params的前N个维度，其中N = indices.shape [-1]

tf.greater 判断函数。首先张量x和张量y的尺寸要相同，输出的tf.greater(x, y)也是一个和x，y尺寸相同的张量。如果x的某个元素比y中对应位置的元素大，则tf.greater(x, y)对应位置返回True，否则返回False。与此类似的函数还有tf.greater_equal。

考虑班级成绩册的例子，有4个班级，每个班级10个学生，每个学生7门科目成绩。可以用一个4107的张量来表示。

scores = tf.random.uniform((4, 10, 7), minval=0, maxval=100, dtype=tf.int32)
tf.print(scores)

"""
[[[4 92 77 ... 23 7 84]
  [19 12 27 ... 74 62 94]
  [27 9 87 ... 33 14 17]
  ...
  [67 92 3 ... 36 62 6]
  [69 86 88 ... 78 60 89]
  [80 38 72 ... 16 84 99]]

 [[65 54 78 ... 0 68 9]
  [2 51 28 ... 63 78 87]
  [19 75 20 ... 39 72 21]
  ...
  [23 37 23 ... 92 31 25]
  [98 25 92 ... 86 27 57]
  [95 44 33 ... 56 5 62]]

 [[2 38 23 ... 74 90 94]
  [7 80 46 ... 53 14 96]
  [97 49 2 ... 36 32 33]
  ...
  [88 73 99 ... 51 36 71]
  [45 47 91 ... 64 16 31]
  [97 50 40 ... 62 91 90]]

 [[4 24 12 ... 45 65 60]
  [53 89 56 ... 14 92 11]
  [10 11 64 ... 58 72 38]
  ...
  [89 12 25 ... 58 21 23]
  [93 99 32 ... 97 62 61]
  [92 43 98 ... 43 50 75]]]
"""

# 抽取每个班级第0个学生，第5个学生，第9个学生的全部成绩
p = tf.gather(scores, [0, 5, 9], axis=1)
tf.print(p)

"""
[[[4 92 77 ... 23 7 84]
  [30 78 10 ... 61 18 38]
  [80 38 72 ... 16 84 99]]

 [[65 54 78 ... 0 68 9]
  [82 23 21 ... 90 6 54]
  [95 44 33 ... 56 5 62]]

 [[2 38 23 ... 74 90 94]
  [2 95 58 ... 28 86 55]
  [97 50 40 ... 62 91 90]]

 [[4 24 12 ... 45 65 60]
  [82 74 67 ... 84 72 90]
  [92 43 98 ... 43 50 75]]]
"""

# 抽取每个班级第0个学生，第5个学生，第9个学生的第一门课程，第三门课程，第六门课程成绩
q = tf.gather(tf.gather(scores, (0, 5, 9), axis=1), [1, 3, 6], axis=2)
tf.print(q)

"""
[[[92 24 84]
  [78 67 38]
  [38 99 99]]

 [[54 61 9]
  [23 3 54]
  [44 64 62]]

 [[38 45 94]
  [95 1 55]
  [50 10 90]]

 [[24 60 60]
  [74 13 90]
  [43 70 75]]]
"""

# 抽取第0个班级第0个学生，第2个班级的第四个学生，第三个班级的第6个学生的全部成绩
# indices的长度为采样样本的个数，每个元素为采样位置的坐标
s = tf.gather_nd(scores, indices=[(0, 0), (2, 4), (3, 6)])
tf.print(s)

"""
[[4 92 77 ... 23 7 84]
 [80 66 37 ... 27 35 99]
 [35 69 41 ... 91 3 97]]
"""

# 抽取每个班级第0个学生，第5个学生，第9个学生的全部成绩
p = tf.boolean_mask(scores, [True, False, False, False, False, True, False, False, False, True], axis=1)
tf.print(p)

"""
[[[4 92 77 ... 23 7 84]
  [30 78 10 ... 61 18 38]
  [80 38 72 ... 16 84 99]]

 [[65 54 78 ... 0 68 9]
  [82 23 21 ... 90 6 54]
  [95 44 33 ... 56 5 62]]

 [[2 38 23 ... 74 90 94]
  [2 95 58 ... 28 86 55]
  [97 50 40 ... 62 91 90]]

 [[4 24 12 ... 45 65 60]
  [82 74 67 ... 84 72 90]
  [92 43 98 ... 43 50 75]]]
"""

# 抽取第0个班级第0个学生，第二个班级的第四个学生，第三个班级的第6个学生的全部成绩
s = tf.boolean_mask(
    scores,          
    [[True,False,False,False,False,False,False,False,False,False],
     [False,False,False,False,False,False,False,False,False,False],
     [False,False,False,False,True,False,False,False,False,False],
     [False,False,False,False,False,False,True,False,False,False]])
tf.print(s)

"""
[[4 92 77 ... 23 7 84]
 [80 66 37 ... 27 35 99]
 [35 69 41 ... 91 3 97]]
"""

# 利用tf.boolean_mask可以实现布尔索引
# 找到矩阵中小于0的元素
c = tf.constant([[-1, 1, -1], [2, 2, -2], [3, -3, 3]], dtype=tf.float32)
tf.print(c, "\n")
tf.print(tf.boolean_mask(c, c<0), "\n")
tf.print(c[c<0])  # 布尔索引，为boolean_mask的语法糖形式

"""
[[-1 1 -1]
 [2 2 -2]
 [3 -3 3]] 

[-1 -1 -2 -3] 

[-1 -1 -2 -3]
"""

以上这些方法仅能提取张量的部分元素值，但不能更改张量的部分元素值得到新的张量。

如果要通过修改张量的部分元素值得到新的张量，可以使用tf.where和tf.scatter_nd。

tf.where可以理解为if的张量版本，此外它还可以用于找到满足条件的所有元素的位置坐标。

tf.scatter_nd的作用和tf.gather_nd有些相反，tf.gather_nd用于收集张量的给定位置的元素，tf.scatter_nd可以将某些值插入到一个给定shape的全0的张量的指定位置处。

# 如果where只有一个参数，将返回所有满足条件的位置坐标
indices = tf.where(c<0)
tf.print(indices)

"""
[[0 0]
 [0 2]
 [1 2]
 [2 1]]
"""

# 将张量的第[0, 0], [2, 1]两个位置元素替换为0得到新的张量
d = c - tf.scatter_nd([[0, 0], [2, 1]], [c[0, 0], c[2, 1]], c.shape)
tf.print(d)

"""
[[0 1 -1]
 [2 2 -2]
 [3 0 3]]
"""

# scatter_nd的作用和gather_nd有些相反
# 可以将某些值插到一个给定shape的全0的张量的指定位置处
indices = tf.where(c<0)
tf.scatter_nd(indices, tf.gather_nd(c, indices), c.shape)

"""
<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[-1.,  0., -1.],
       [ 0.,  0., -2.],
       [ 0., -3.,  0.]], dtype=float32)>
"""

维度变换

维度变换相关函数主要有 tf.reshape, tf.squeeze, tf.expand_dims, tf.transpose.
- tf.reshape 可以改变张量的形状。
- tf.squeeze 可以减少维度。
- tf.expand_dims 可以增加维度。
- tf.transpose 可以交换维度

a = tf.random.uniform(shape=[1, 3, 3, 2], minval=0, maxval=255, dtype=tf.int32)
tf.print(a.shape)
tf.print(a)

"""
TensorShape([1, 3, 3, 2])
[[[[29 120]
   [14 54]
   [132 110]]

  [[27 202]
   [12 3]
   [90 161]]

  [[57 129]
   [191 227]
   [71 128]]]]
"""

# 改成(3, 6)的形状
b = tf.reshape(a, [3, 6])
tf.print(b.shape)
tf.print(b)

"""
TensorShape([3, 6])
[[29 120 14 54 132 110]
 [27 202 12 3 90 161]
 [57 129 191 227 71 128]]
"""

# 改回成[1, 3, 3, 2]形状的张量
c = tf.reshape(b, [1, 3, 3, 2])
tf.print(c)

"""
[[[[29 120]
   [14 54]
   [132 110]]

  [[27 202]
   [12 3]
   [90 161]]

  [[57 129]
   [191 227]
   [71 128]]]]
"""

如果张量在某个维度上只有一个元素，利用tf.squeeze可以消除这个维度。和tf.reshape相似，它本质上不会改变张量元素的存储顺序。

张量的各个元素在内存中是线性存储的，其一般规律是，同一层级中的相邻元素的物理地址也相邻。

s = tf.squeeze(a)
tf.print(s.shape)
tf.print(s)

"""
TensorShape([3, 3, 2])
[[[29 120]
  [14 54]
  [132 110]]

 [[27 202]
  [12 3]
  [90 161]]

 [[57 129]
  [191 227]
  [71 128]]]
"""

d = tf.expand_dims(s, axis=0)  # 在第0维插入长度为1的一个维度
tf.print(d)
tf.print(d.shape)

"""
[[[[29 120]
   [14 54]
   [132 110]]

  [[27 202]
   [12 3]
   [90 161]]

  [[57 129]
   [191 227]
   [71 128]]]]
TensorShape([1, 3, 3, 2])
"""

d = tf.expand_dims(s, axis=2)  # 在第2维度插入长度为1的一个维度
tf.print(d)
tf.print(d.shape)

"""
[[[[29 120]]

  [[14 54]]

  [[132 110]]]


 [[[27 202]]

  [[12 3]]

  [[90 161]]]


 [[[57 129]]

  [[191 227]]

  [[71 128]]]]
TensorShape([3, 3, 1, 2])
"""

tf.transpose可以交换张量的维度，与tf.reshape不同，它会改变张量元素的存储顺序。 tf.transpose常用于图片存储格式的变换上。

# Batch, Height, Width, Chanel
a = tf.random.uniform(shape=[100,600,600,4], minval=0, maxval=255, dtype=tf.int32)
tf.print(a.shape)

# 转换成Channel, Height, Width, Batch
s = tf.transpose(a, perm=[3, 1, 2, 0])
tf.print(s.shape)

"""
TensorShape([100, 600, 600, 4])
TensorShape([4, 600, 600, 100])
"""

合并分割

和numpy类似，可以用：
- tf.concat和tf.stack方法对多个张量进行合并；
- tf.split方法把一个张量分割成多个张量。
- tf.concat和tf.stack有略微的区别：
  - tf.concat是连接，不会增加维度；
  - tf.stack是堆叠，会增加维度。

a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
b = tf.constant([[5.0, 6.0], [7.0, 8.0]])
c = tf.constant([[9.0, 10.0], [11.0, 12.0]])

tf.print(tf.concat([a, b, c], axis=0))

"""
[[1 2]
 [3 4]
 [5 6]
 [7 8]
 [9 10]
 [11 12]]
"""

tf.concat([a, b, c], axis=1)

"""
tf.concat([a, b, c], axis=1)

<tf.Tensor: shape=(2, 6), dtype=float32, numpy=
array([[ 1.,  2.,  5.,  6.,  9., 10.],
       [ 3.,  4.,  7.,  8., 11., 12.]], dtype=float32)>
"""

tf.print((tf.concat([a,b,c], axis=0)).shape)
tf.print((tf.concat([a,b,c], axis=1)).shape)

"""
TensorShape([6, 2])
TensorShape([2, 6])
"""

t = tf.stack([a, b, c])
tf.print(t)
tf.print(t.shape)

"""
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]

 [[9 10]
  [11 12]]]
TensorShape([3, 2, 2])
"""

t = tf.stack([a, b, c], axis=1)
tf.print(t)
tf.print(t.shape)

"""
[[[1 2]
  [5 6]
  [9 10]]

 [[3 4]
  [7 8]
  [11 12]]]
TensorShape([2, 3, 2])
"""

a = tf.constant([[1.0,2.0],[3.0,4.0]])
b = tf.constant([[5.0,6.0],[7.0,8.0]])
c = tf.constant([[9.0,10.0],[11.0,12.0]])

c = tf.concat([a,b,c],axis = 0)

tf.split是tf.concat的逆运算，可以指定分割份数平均分割，也可以通过指定每份的记录数量进行分割。

# tf.split(value, num_or_size_splits, axis)
tf.split(c, 3, axis=0)  # 指定分割分数，平均分割

"""
[<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
 array([[1., 2.],
        [3., 4.]], dtype=float32)>,
 <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
 array([[5., 6.],
        [7., 8.]], dtype=float32)>,
 <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
 array([[ 9., 10.],
        [11., 12.]], dtype=float32)>]
"""

tf.split(c, [2, 1, 3], axis=0)  # 指定每份的记录数量

"""
[<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
 array([[1., 2.],
        [3., 4.]], dtype=float32)>,
 <tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[5., 6.]], dtype=float32)>,
 <tf.Tensor: shape=(3, 2), dtype=float32, numpy=
 array([[ 7.,  8.],
        [ 9., 10.],
        [11., 12.]], dtype=float32)>]
"""

posted @ 2022-06-19 18:34 lotuslaw 阅读(37) 评论(0) 编辑收藏举报

刷新页面返回顶部

Loading

lotuslaw

4-1张量的结构操作——eat_tensorflow2_in_30_days

4-1张量的结构操作

创建张量

索引切片

维度变换

合并分割

公告