简单粗暴的tensorflow-tf.data
# 数据集对象建立,tf.data import tensorflow as tf import numpy as np X = tf.constant([2013, 2014, 2015, 2016, 2017]) Y = tf.constant([12000, 14000, 15000, 16500, 17500]) # 也可以使用NumPy数组,效果相同 # X = np.array([2013, 2014, 2015, 2016, 2017]) # Y = np.array([12000, 14000, 15000, 16500, 17500]) dataset = tf.data.Dataset.from_tensor_slices((X, Y)) for x, y in dataset: print(x.numpy(), y.numpy()) # 数据集对象建立 tf.data import matplotlib.pyplot as plt (train_data, train_label), (_, _) = tf.keras.datasets.mnist.load_data() train_data = np.expand_dims(train_data.astype(np.float32) / 255.0, axis=-1) # [60000, 28, 28, 1] mnist_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_label)) # for image, label in mnist_dataset: plt.title(label.numpy()) plt.imshow(image.numpy()[:, :, 0]) plt.show() # 数据集预处理 tf.data.Dataset.map def rot90(image, label): image = tf.image.rot90(image) return image, label mnist_dataset = mnist_dataset.map(rot90) #map对所有元素执行ror90函数 for image, label in mnist_dataset: plt.title(label.numpy()) plt.imshow(image.numpy()[:, :, 0]) plt.show() # 数据集预处理 tf.data.Dataset.batch() mnist_dataset = mnist_dataset.batch(4) #划分批次,每个批次为4 for images, labels in mnist_dataset: # image: [4, 28, 28, 1], labels: [4] fig, axs = plt.subplots(1, 4) for i in range(4): axs[i].set_title(labels.numpy()[i]) axs[i].imshow(images.numpy()[i, :, :, 0]) plt.show() # 数据集预处理 tf.data.Dataset.shuffle() mnist_dataset = mnist_dataset.shuffle(buffer_size=10000).batch(4) #打撒后再进行划分批次 for images, labels in mnist_dataset: fig, axs = plt.subplots(1, 4) for i in range(4): axs[i].set_title(labels.numpy()[i]) axs[i].imshow(images.numpy()[i, :, :, 0]) plt.show() # 数据并行化提高效率 tf.data.Dataset.prefetch mnist_dataset = mnist_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # 数据元素的访问 通过FOR dataset = tf.data.Dataset.from_tensor_slices((A, B, C, ...)) for a, b, c, ... in dataset: # 对张量a, b, c等进行操作,例如送入模型进行训练 # 数据元素的访问 通过迭代器 dataset = tf.data.Dataset.from_tensor_slices((A, B, C, ...)) it = iter(dataset) a_0, b_0, c_0, ... = next(it) a_1, b_1, c_1, ... = next(it) # tf.keras.Model 的 fit() 和 evaluate() 直接使用Dataset model.fit(x=train_data, y=train_label, epochs=num_epochs, batch_size=batch_size)#原有x,y输入 model.fit(mnist_dataset, epochs=num_epochs) #新的dataset
实例
import tensorflow as tf import os num_epochs = 10 batch_size = 32 learning_rate = 0.001 data_dir = 'C:/datasets/cats_vs_dogs' train_cats_dir = data_dir + '/train/cats/' train_dogs_dir = data_dir + '/train/dogs/' test_cats_dir = data_dir + '/valid/cats/' test_dogs_dir = data_dir + '/valid/dogs/' def _decode_and_resize(filename, label): image_string = tf.io.read_file(filename) # 读取原始文件 image_decoded = tf.image.decode_jpeg(image_string) # 解码JPEG图片 image_resized = tf.image.resize(image_decoded, [256, 256]) / 255.0 return image_resized, label if __name__ == '__main__': # 构建训练数据集 train_cat_filenames = tf.constant([train_cats_dir + filename for filename in os.listdir(train_cats_dir)]) train_dog_filenames = tf.constant([train_dogs_dir + filename for filename in os.listdir(train_dogs_dir)]) train_filenames = tf.concat([train_cat_filenames, train_dog_filenames], axis=-1) train_labels = tf.concat([ tf.zeros(train_cat_filenames.shape, dtype=tf.int32), tf.ones(train_dog_filenames.shape, dtype=tf.int32)], axis=-1) train_dataset = tf.data.Dataset.from_tensor_slices((train_filenames, train_labels))#构建dataset train_dataset = train_dataset.map( #执行解码和变换 map_func=_decode_and_resize, num_parallel_calls=tf.data.experimental.AUTOTUNE) # 取出前buffer_size个数据放入buffer,并从其中随机采样,采样后的数据用后续数据替换 train_dataset = train_dataset.shuffle(buffer_size=23000) #打散 train_dataset = train_dataset.batch(batch_size) train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE) #预加载 model = tf.keras.Sequential([ tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(256, 256, 3)), tf.keras.layers.MaxPooling2D(), tf.keras.layers.Conv2D(32, 5, activation='relu'), tf.keras.layers.MaxPooling2D(), tf.keras.layers.Flatten(), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(2, activation='softmax') ]) model.compile( optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss=tf.keras.losses.sparse_categorical_crossentropy, metrics=[tf.keras.metrics.sparse_categorical_accuracy] ) model.fit(train_dataset, epochs=num_epochs) #直接训练 # 构建测试数据集 test_cat_filenames = tf.constant([test_cats_dir + filename for filename in os.listdir(test_cats_dir)]) test_dog_filenames = tf.constant([test_dogs_dir + filename for filename in os.listdir(test_dogs_dir)]) test_filenames = tf.concat([test_cat_filenames, test_dog_filenames], axis=-1) test_labels = tf.concat([ tf.zeros(test_cat_filenames.shape, dtype=tf.int32), tf.ones(test_dog_filenames.shape, dtype=tf.int32)], axis=-1) test_dataset = tf.data.Dataset.from_tensor_slices((test_filenames, test_labels)) test_dataset = test_dataset.map(_decode_and_resize) test_dataset = test_dataset.batch(batch_size) print(model.metrics_names) print(model.evaluate(test_dataset))
天道酬勤 循序渐进 技压群雄