简单粗暴的tensorflow-tf.data
01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | # 数据集对象建立,tf.data import tensorflow as tf import numpy as np X = tf.constant([ 2013 , 2014 , 2015 , 2016 , 2017 ]) Y = tf.constant([ 12000 , 14000 , 15000 , 16500 , 17500 ]) # 也可以使用NumPy数组,效果相同 # X = np.array([2013, 2014, 2015, 2016, 2017]) # Y = np.array([12000, 14000, 15000, 16500, 17500]) dataset = tf.data.Dataset.from_tensor_slices((X, Y)) for x, y in dataset: print (x.numpy(), y.numpy()) # 数据集对象建立 tf.data import matplotlib.pyplot as plt (train_data, train_label), (_, _) = tf.keras.datasets.mnist.load_data() train_data = np.expand_dims(train_data.astype(np.float32) / 255.0 , axis = - 1 ) # [60000, 28, 28, 1] mnist_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_label)) # for image, label in mnist_dataset: plt.title(label.numpy()) plt.imshow(image.numpy()[:, :, 0 ]) plt.show() # 数据集预处理 tf.data.Dataset.map def rot90(image, label): image = tf.image.rot90(image) return image, label mnist_dataset = mnist_dataset. map (rot90) #map对所有元素执行ror90函数 for image, label in mnist_dataset: plt.title(label.numpy()) plt.imshow(image.numpy()[:, :, 0 ]) plt.show() # 数据集预处理 tf.data.Dataset.batch() mnist_dataset = mnist_dataset.batch( 4 ) #划分批次,每个批次为4 for images, labels in mnist_dataset: # image: [4, 28, 28, 1], labels: [4] fig, axs = plt.subplots( 1 , 4 ) for i in range ( 4 ): axs[i].set_title(labels.numpy()[i]) axs[i].imshow(images.numpy()[i, :, :, 0 ]) plt.show() # 数据集预处理 tf.data.Dataset.shuffle() mnist_dataset = mnist_dataset.shuffle(buffer_size = 10000 ).batch( 4 ) #打撒后再进行划分批次 for images, labels in mnist_dataset: fig, axs = plt.subplots( 1 , 4 ) for i in range ( 4 ): axs[i].set_title(labels.numpy()[i]) axs[i].imshow(images.numpy()[i, :, :, 0 ]) plt.show() # 数据并行化提高效率 tf.data.Dataset.prefetch mnist_dataset = mnist_dataset.prefetch(buffer_size = tf.data.experimental.AUTOTUNE) # 数据元素的访问 通过FOR dataset = tf.data.Dataset.from_tensor_slices((A, B, C, ...)) for a, b, c, ... in dataset: # 对张量a, b, c等进行操作,例如送入模型进行训练 # 数据元素的访问 通过迭代器 dataset = tf.data.Dataset.from_tensor_slices((A, B, C, ...)) it = iter (dataset) a_0, b_0, c_0, ... = next (it) a_1, b_1, c_1, ... = next (it) # tf.keras.Model 的 fit() 和 evaluate() 直接使用Dataset model.fit(x = train_data, y = train_label, epochs = num_epochs, batch_size = batch_size) #原有x,y输入 model.fit(mnist_dataset, epochs = num_epochs) #新的dataset |
实例
01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | import tensorflow as tf import os num_epochs = 10 batch_size = 32 learning_rate = 0.001 data_dir = 'C:/datasets/cats_vs_dogs' train_cats_dir = data_dir + '/train/cats/' train_dogs_dir = data_dir + '/train/dogs/' test_cats_dir = data_dir + '/valid/cats/' test_dogs_dir = data_dir + '/valid/dogs/' def _decode_and_resize(filename, label): image_string = tf.io.read_file(filename) # 读取原始文件 image_decoded = tf.image.decode_jpeg(image_string) # 解码JPEG图片 image_resized = tf.image.resize(image_decoded, [ 256 , 256 ]) / 255.0 return image_resized, label if __name__ = = '__main__' : # 构建训练数据集 train_cat_filenames = tf.constant([train_cats_dir + filename for filename in os.listdir(train_cats_dir)]) train_dog_filenames = tf.constant([train_dogs_dir + filename for filename in os.listdir(train_dogs_dir)]) train_filenames = tf.concat([train_cat_filenames, train_dog_filenames], axis = - 1 ) train_labels = tf.concat([ tf.zeros(train_cat_filenames.shape, dtype = tf.int32), tf.ones(train_dog_filenames.shape, dtype = tf.int32)], axis = - 1 ) train_dataset = tf.data.Dataset.from_tensor_slices((train_filenames, train_labels)) #构建dataset train_dataset = train_dataset. map ( #执行解码和变换 map_func = _decode_and_resize, num_parallel_calls = tf.data.experimental.AUTOTUNE) # 取出前buffer_size个数据放入buffer,并从其中随机采样,采样后的数据用后续数据替换 train_dataset = train_dataset.shuffle(buffer_size = 23000 ) #打散 train_dataset = train_dataset.batch(batch_size) train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE) #预加载 model = tf.keras.Sequential([ tf.keras.layers.Conv2D( 32 , 3 , activation = 'relu' , input_shape = ( 256 , 256 , 3 )), tf.keras.layers.MaxPooling2D(), tf.keras.layers.Conv2D( 32 , 5 , activation = 'relu' ), tf.keras.layers.MaxPooling2D(), tf.keras.layers.Flatten(), tf.keras.layers.Dense( 64 , activation = 'relu' ), tf.keras.layers.Dense( 2 , activation = 'softmax' ) ]) model. compile ( optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate), loss = tf.keras.losses.sparse_categorical_crossentropy, metrics = [tf.keras.metrics.sparse_categorical_accuracy] ) model.fit(train_dataset, epochs = num_epochs) #直接训练 # 构建测试数据集 test_cat_filenames = tf.constant([test_cats_dir + filename for filename in os.listdir(test_cats_dir)]) test_dog_filenames = tf.constant([test_dogs_dir + filename for filename in os.listdir(test_dogs_dir)]) test_filenames = tf.concat([test_cat_filenames, test_dog_filenames], axis = - 1 ) test_labels = tf.concat([ tf.zeros(test_cat_filenames.shape, dtype = tf.int32), tf.ones(test_dog_filenames.shape, dtype = tf.int32)], axis = - 1 ) test_dataset = tf.data.Dataset.from_tensor_slices((test_filenames, test_labels)) test_dataset = test_dataset. map (_decode_and_resize) test_dataset = test_dataset.batch(batch_size) print (model.metrics_names) print (model.evaluate(test_dataset)) |
天道酬勤 循序渐进 技压群雄
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· DeepSeek在M芯片Mac上本地化部署
· 葡萄城 AI 搜索升级:DeepSeek 加持,客户体验更智能