易于将模型导出成为GraphDef+checkpoint 或者SavedModel使得eager execution可以默认打开
t = tf.constant([[1.,2.,3.],[4.,5.,6.]]) print(t) print(t[:,1:]) print(t[...,1]) """ tf.Tensor( [[1. 2. 3.] [4. 5. 6.]], shape=(2, 3), dtype=float32) tf.Tensor( [[2. 3.] [5. 6.]], shape=(2, 2), dtype=float32) tf.Tensor([2. 5.], shape=(2,), dtype=float32) """
# 相加 print(t+10) # 平方 print(tf.square(t)) # 转置 print(t @ tf.transpose(t))
tf 转numpy
t.numpy() 或 np.square(t)
np_t = np.array([[1.,2.,3.],[4.,5.,6.]]) print(tf.constant(np_t))
t = tf.constant(2.766) print(t.numpy()) print(t.shape)# () 代表零维 """ 2.766 () """
3.tf.strings 与ragged_tensor
t = tf.constant("tom") # print(t) # 打印长度 print(tf.strings.length(t)) # 长度 print(tf.strings.length(t,unit="UTF8_CHAR")) # utf8 的 编码 print(tf.strings.unicode_decode(t,"UTF8"))
t = tf.constant(["cafe","coffee","咖啡"]) print(tf.strings.length(t,unit="UTF8_CHAR")) r = tf.strings.unicode_decode(t,"UTF8") print(r)# 将数组中每一个字符串编码打印 """ tf.Tensor([4 6 2], shape=(3,), dtype=int32) <tf.RaggedTensor [[99, 97, 102, 101], [99, 111, 102, 102, 101, 101], [21654, 21857]]> """
ragged tensor
- 解决分布不固定的tensor,长度不固定,有长有短。
r = tf.ragged.constant([[11,12],[21,22,23],[],[41]]) print(r) print(r[1]) print(r[1:3]) """ <tf.RaggedTensor [[11, 12], [21, 22, 23], [], [41]]> tf.Tensor([21 22 23], shape=(3,), dtype=int32) <tf.RaggedTensor [[21, 22, 23], []]> """
- 合并
r2 = tf.ragged.constant([[51,52],[],[7]]) print(tf.concat([r,r2],axis=0)) # <tf.RaggedTensor [[11, 12], [21, 22, 23], [], [41], [51, 52], [], [7]]>
- 将ragged tensor 转换成tensor
print(r.to_tensor()) # 把没有的值用0补齐,补齐方式放在正常值后面 """ tf.Tensor( [[11 12 0] [21 22 23] [ 0 0 0] [41 0 0]], shape=(4, 3), dtype=int32) """
# indices 所有正常值位置 # values 上述正常值位置对应的值 # dense_shape 矩阵具体大小 # 注意SparseTensor必须排序好的,否则调用to_dense会报错 s = tf.SparseTensor(indices=[[0,1],[1,0],[2,3]],values=[1.,2.,3.],dense_shape=[3,4]) print(s) # 密集型矩阵 print(tf.sparse.to_dense(s)) """ SparseTensor(indices=tf.Tensor( [[0 1] [1 0] [2 3]], shape=(3, 2), dtype=int64), values=tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32), dense_shape=tf.Tensor([3 4], shape=(2,), dtype=int64)) tf.Tensor( [[0. 1. 0. 0.] [2. 0. 0. 0.] [0. 0. 0. 3.]], shape=(3, 4), dtype=float32) """ # 如果不是排好序的,可以先调用reorder再to_dense. s4 = tf.SparseTensor(indices=[[0,2],[0,1],[2,3]],values=[1.,2.,3.],dense_shape=[3,4]) s5 = tf.sparse.reorder(s4) print(tf.sparse.to_dense(s5))
s2 = s * 2.0 # 密集矩阵与sparse矩阵相乘 s4 = tf.constant([ [10.,20.], [30.,40.], [50.,60.], [70.,80.] ]) # 3*4矩阵 乘以 4*2 得到3*2矩阵 print(tf.sparse.sparse_dense_matmul(s,s4)) """ tf.Tensor( [[ 30. 40.] [ 20. 40.] [210. 240.]], shape=(3, 2), dtype=float32) """
Variables 变量
- 变量创建
v = tf.Variable([[1.,2.,3.],[4.,5.,6.]]) """ <tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy= array([[1., 2., 3.], [4., 5., 6.]], dtype=float32)> """ v.numpy() """ array([[1., 2., 3.], [4., 5., 6.]], dtype=float32) """ v.value() """ <tf.Tensor: shape=(2, 3), dtype=float32, numpy= array([[1., 2., 3.], [4., 5., 6.]], dtype=float32)> """
- 变量可以被重新赋值,使用assign函数
# 对变量赋值 v.assign(2*v) # 对某一位置重新赋值 v.assign(2*v) # 对某一行重新赋值 v[1].assign([7.,8.,9.])
5.自定义损失函数 + 自定义损失层次
- custonmized_mse为自定义损失函数。
def custonmized_mse(y_true,y_pred):
# 求差值平方后的均值
return tf.redece_mean(tf.square(y_pred-y_true))
model= keras.models.Sequential([
callbacks = [keras.callbacks.EarlyStopping(patience=5,min_delta=1e-2)]
history = model.fit(x_train_scaled,y_train,validation_data=(x_valid_scaled,y_valid),epochs=100,callbacks=callbacks)
- 自定义层次
- DenseLayer做的事情是: x*w+b
layer.variables #打印所有参数
layer = tf.keras.layers.Dense(100, input_shape=(None,5))
layer(tf.zeros([10,5]))# 10*100矩阵
- 使用子类自定义层次
# 常规导报
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
# 打印版本
for module in mpl,np,pd,sklearn,tf,keras:
print(module.__name__, module.__version__)
# 导入fetch_california_housing类
from sklearn.datasets import fetch_california_housing
# 实例化对象
housing = fetch_california_housing()
# 打印相关信息
# 输出:(20640, 8)
from sklearn.model_selection import train_test_split
x_train_all,x_test,y_train_all,y_test = train_test_split(
housing.data , housing.target , random_state=7,test_size = 0.2)
# 拆分训练集,验证集
x_train,x_valid,y_train,y_valid = train_test_split(
x_train_all , y_train_all , random_state=11)
print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape, y_test.shape)
from sklearn.preprocessing import StandardScaler
# 实例化
scaler = StandardScaler()
# 调用scaler。fit 或者scaler.transform方法
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)
子类自定义层 CustomizedDenseLayer
class CustomizedDenseLayer(keras.layers.Layer): def __init__(self, units, activation=None,**kwargs): self.units = units# 输出单元数 self.activation = keras.layers.Activation(activation)# 要用到激活函数 # 调用父类函数 super(CustomizedDenseLayer, self).__init__(**kwargs) def build(self, input_shape): """构建需要的参数""" # x * w + b # input_shape:[None, a] w: [a,b] output_shape: [None, b] # self.kernel = self.add_weight(name='kernel', shape=(input_shape[1], self.units),initializer='uniform',trainable=True) self.bias = self.add_weight(name='bias', shape=(self.units,), initializer='zeros', trainable=True) super(CustomizedDenseLayer, self).build(input_shape) def call(self, x): """完成正向计算""" return self.activation(x @ self.kernel + self.bias) model= keras.models.Sequential([ CustomizedDenseLayer(30,activation='relu',input_shape=x_train.shape[1:]), CustomizedDenseLayer(1) ]) model.summary() model.compile(loss="mean_squared_error",optimizer="sgd") callbacks = [keras.callbacks.EarlyStopping(patience=5,min_delta=1e-2)] history = model.fit(x_train_scaled,y_train,validation_data=(x_valid_scaled,y_valid),epochs=100,callbacks=callbacks)
customized_softplus = keras.layers.Lambda(lambda x: tf.nn.softplus(x)) # 上面表示带softplus激活函数的Dense层,它等价于: 1.keras.layers.Dense(1, activation="softplus") 2. keras.layers.Dense(1), keras.layers.Activation('softplus') # 示例: model= keras.models.Sequential([ CustomizedDenseLayer(30,activation='relu',input_shape=x_train.shape[1:]), CustomizedDenseLayer(1), customized_softplus ])
tf.function可以将python式的函数转换成tensorflow 能运行的函数, 转换后运行的速度会得到提高,特别GPU加速的tensorflow运行的比一般python函数更快。
# tf.function and auto-graph def scaled_elu(z, scale=1.0, alpha=1.0): # z>=0 ? scale * z :scale *alpha * tf.nn.elu(z) is_positive = tf.greater_equal(z, 0.0) return scale * tf.where(is_positive, z , alpha * tf.nn.elu(z))
# 输入标量 print(scaled_elu(tf.constant(-3.))) # 输入向量 print(scaled_elu(tf.constant([-3.,-2.5]))) """ tf.Tensor(-0.95021296, shape=(), dtype=float32) tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32) """
scaled_elu_tf = tf.function(scaled_elu) # 输入标量 print(scaled_elu_tf(tf.constant(-3.))) # 输入向量 print(scaled_elu_tf(tf.constant([-3.,-2.5]))) """ tf.Tensor(-0.95021296, shape=(), dtype=float32) tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32) """
# 获取python的函数 print(scaled_elu_tf.python_function is scaled_elu)# True
%timeit scaled_elu(tf.random.normal((1000,1000))) %timeit scaled_elu_tf(tf.random.normal((1000,1000))) """ 44.9 ms ± 7.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) 44.6 ms ± 8.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) """
# 1 + 1/2 + 1/2^2 + ... 1/2^n @tf.function def converge_to_2(n_iters): total = tf.constant(0.) increment = tf.constant(1.) for _ in range(n_iters): total += increment increment /= 2.0 return total print(converge_to_2(20))
def display_tf_code(func): code = tf.autograph.to_code(func) from IPython.display import display, Markdown display(Markdown("```python\n{}\n```".format(code))) display_tf_code(scaled_elu) display_tf_code(converge_to_2)
tf.Variable()不能定义再tf.function里面,否则报错:ValueError: tf.function-decorated function tried to create variables on non-first call.,应该定义再函数外面做初始化。
@tf.function输入int类型和float类型 得到值:
@tf.function def cube(z): return tf.pow(z,3) print(cube(tf.constant([1.,2.,3.]))) print(cube(tf.constant([1,2,3]))) """ tf.Tensor([ 1. 8. 27.], shape=(3,), dtype=float32) tf.Tensor([ 1 8 27], shape=(3,), dtype=int32) """
tf.function 函数签名
@tf.function(input_signature=[tf.TensorSpec([None], tf.int32, name='x')]) def cube(z): return tf.pow(z,3)
使用get_concrete_function加上input signature成为SaveModel
# 要把类型传入进去 cube_func_int32 = cube.get_concrete_function(tf.TensorSpec([None,],tf.int32)) # <tensorflow.python.eager.function.ConcreteFunction object at 0x7f8d65a86d10> print(cube_func_int32 is cube.get_concrete_function(tf.TensorSpec([5], tf.int32))) # True print(cube_func_int32 is cube.get_concrete_function(tf.constant([1,2,3]))) # True
通过 .graph获取图定义tf
cube_func_int32.graph # <tensorflow.python.framework.func_graph.FuncGraph at 0x7f8d692fbd10>
cube_func_int32.graph.get_operations() """ [<tf.Operation 'x' type=Placeholder>, <tf.Operation 'Pow/y' type=Const>, <tf.Operation 'Pow' type=Pow>, <tf.Operation 'Identity' type=Identity>] """
pow_op = cube_func_int32.graph.get_operations()[2] print(pow_op) """ name: "Pow" op: "Pow" input: "x" input: "Pow/y" attr { key: "T" value { type: DT_INT32 } } """ # 获取输入】 print(list(pow_op.inputs)) # [<tf.Tensor 'x:0' shape=(None,) dtype=int32>, <tf.Tensor 'Pow/y:0' shape=() dtype=int32>] # 获取输出 print(list(pow_op.outputs)) # [<tf.Tensor 'Pow:0' shape=(None,) dtype=int32>]
cube_func_int32.graph.as_graph_def() """ node { name: "x" op: "Placeholder" attr { key: "_user_specified_name" value { s: "x" } } attr { key: "dtype" value { type: DT_INT32 } } attr { key: "shape" value { shape { dim { size: -1 } } } } } node { name: "Pow/y" op: "Const" attr { key: "dtype" value { type: DT_INT32 } } attr { key: "value" value { tensor { dtype: DT_INT32 tensor_shape { } int_val: 3 } } } } node { name: "Pow" op: "Pow" input: "x" input: "Pow/y" attr { key: "T" value { type: DT_INT32 } } } node { name: "Identity" op: "Identity" input: "Pow" attr { key: "T" value { type: DT_INT32 } } } versions { producer: 175 } """ # 根据name获取tf.Operation cube_func_int32.graph.get_operation_by_name("x") # 根据name获取tf.Tensor cube_func_int32.graph.get_tensor_by_name("x:0")
- 什么是求导???
- 一元求导
def f(x): # 3x^2 + 2x -1 return 3. * x**2 + 2. * x -1 def approximae_derivative(f, x, eps = 1e-3): return (f(x + eps) - f(x-eps)) / (2. * eps) print(approximae_derivative(f, 1.))# 7.999999999999119
- 二元求导
def g(x1, x2): return (x1 + 5) * (x2 ** 2) def approximae_gradiend(g, x1, x2, eps=1e-3): # g 对 x1 求导 需要把 x2 固定下来 dg_x1 = approximae_derivative(lambda x: g(x, x2), x1, eps) # g 对 x2 求导 需要把 x1 固定下来 dg_x2 = approximae_derivative(lambda x: g(x1, x), x2, eps) return dg_x1,dg_x2 print(approximae_gradiend(g,2.,3.))# (8.999999999993236, 41.999999999994486)
x1 = tf.Variable(2.0) x2 = tf.Variable(3.0) # persistent表示可以保存tape with tf.GradientTape(persistent=True) as tape: z = g(x1, x2) # x1偏导 dz_x1 = tape.gradient(z, x1) print(dz_x1) # x2偏导 dz_x2 = tape.gradient(z, x2) print(dz_x2) # 删除tape释放资源 del tape """ tf.Tensor(9.0, shape=(), dtype=float32) tf.Tensor(42.0, shape=(), dtype=float32) """
x1 = tf.Variable(2.0) x2 = tf.Variable(3.0) # persistent表示可以保存tape with tf.GradientTape() as tape: z = g(x1, x2) # x1偏导 dz_x = tape.gradient(z, [x1, x2]) print(dz_x) # [<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]
x = tf.Variable(5.0) with tf.GradientTape() as tape: z1 = 3 * x z2 = x ** 2 tape.gradient([z1, z2], x) # <tf.Tensor: shape=(), dtype=float32, numpy=13.0>
x1 = tf.Variable(2.0) x2 = tf.Variable(3.0) with tf.GradientTape(persistent=True) as outer_tape: with tf.GradientTape(persistent=True) as inner_tape: z = g(x1, x2) inner_grads = inner_tape.gradient(z, [x1, x2]) outer_grads = [outer_tape.gradient(inner_grad, [x1, x2]) for inner_grad in inner_grads] print(outer_grads) del inner_tape del outer_tape # 2 * 2矩阵 """ [[None, <tf.Tensor: shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: shape=(), dtype=float32, numpy=14.0>]] """
learning_rate = 0.1 x = tf.Variable(0.0) for _ in range(100): with tf.GradientTape() as tape: z = f(x) dz_dx = tape.gradient(z, x) x.assign_sub(learning_rate * dz_dx) print(x)
与keras optimizer结合使用
learning_rate = 0.1 x = tf.Variable(0.0) optimizer = keras.optimizers.SGD(lr = learning_rate) for _ in range(100): with tf.GradientTape() as tape: z = f(x) dz_dx = tape.gradient(z, x) optimizer.apply_gradients([(dz_dx, x)]) print(x) # <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
10.Tf.Gradient 与 tf.keras结合使用
metric = keras.metrics.MeanSquaredError()# 求差方函数 # 下述相当图 (5 - 2)^2 print(metric([5.], [2.]))# 9 print(metric([0.],[1.]))# 5 print(metric.result())# 5 上面函数进行了累加操作 # 如果不想累加,重置。 metric.reset_states() print(metric([2.], [6.]))# 16
# 实现步骤 # 1. 遍历训练集 # 2. 取出数据,计算梯度,apply更新梯度 # 3. 最后验证集上验证 # 变量定义 epochs = 100 batch_size = 32 # 所有样本数 steps_per_epoch = len(x_train_scaled) // batch_size optimizer = keras.optimizers.SGD() metric = keras.metrics.MeanSquaredError()# 求差方函数 函数形式为mse def random_batch(x, y, batch_size=32): idx = np.random.randint(n, len(x), size=batch_size) return x[idx], y[idx] model = keras.models.Sequential([ keras.layers.Dense(30, activation="relu"), keras.layers.Dense(1) ]) model.compile(loss="mean_squared_error", optimizer="sgd") for epoch in range(epochgs): metric.reset_states() for step in range(steps_per_epoch): x_batch, y_batch = random_batch(x_train_scaled, y_train,batch_size) with tf.GradientTape() as tape: # 获取预测值 y_pred = model(x_batch) # 获取损失值 loss = tf.reduce_mean(keras.losses.mean_squared_error(y_batch, y_pred)) # 累计计算 metric metric(y_batch, y_pred) # 手动求梯度 grads = tape.gradient(loss, model.variables) # 梯度与变量绑定 grads_and vars = zip(grads, model.variables) # 更新梯度 optimizer.apply_gradients(grads_and_vars) # 打印训练的mse print("\r Epoch", epoch, " train mse", metric.result().numpy(), end = " ") # 验证 y_valid_pred = model(x_valid_scaled) valid_loss = tf.reduce_mean(keras.losses.mean_squared_error(y_valid_preed, y_valid)) print("\t","valid mse:", valid_loss.numpy())
