[Paddle学习笔记][02][MNIST转换到PNG]
说明:
MNIST手写数字数据集非常经典,它由60000个训练样本和10000个测试样本组成,每个样本都是一张28×28的灰度图片。直接下载下来的MNIST数据是无法通过解压或应用程序打开的,因为它里面的文件不是标准的图像格式,而是以字节的形式存储的,所以需要借助编程的手段来打开。
实验代码:
1 import os 2 import struct 3 import png 4 from array import array 5 6 # 文件路径 7 train_images = './train-images-idx3-ubyte' 8 train_labels = './train-labels-idx1-ubyte' 9 test_images = './t10k-images-idx3-ubyte' 10 test_labels = './t10k-labels-idx1-ubyte' 11 train_folder = './train' 12 test_folder = './test' 13 14 # 创建目录 15 if not os.path.exists(train_folder): 16 os.makedirs(train_folder) 17 if not os.path.exists(test_folder): 18 os.makedirs(test_folder) 19 20 train_folders = [os.path.join(train_folder, str(i)) for i in range(10)] 21 test_folders = [os.path.join(test_folder, str(i)) for i in range(10)] 22 23 for dir in train_folders: 24 if not os.path.exists(dir): 25 os.makedirs(dir) 26 for dir in test_folders: 27 if not os.path.exists(dir): 28 os.makedirs(dir) 29 30 # 打开文件 31 train_imgs = open(train_images, 'rb') # 以二进制读模式打开文件 32 train_labs = open(train_labels, 'rb') 33 test_imgs = open(test_images, 'rb') 34 test_labs = open(test_labels, 'rb') 35 36 # 读取数据 37 struct.unpack('>IIII', train_imgs.read(16)) # 大端模式,每次读取16字节 38 struct.unpack('>II', train_labs.read(8)) # 大端模式,每次读取8字节 39 struct.unpack('>IIII', test_imgs.read(16)) 40 struct.unpack('>II', test_labs.read(8)) 41 42 train_img = array('B', train_imgs.read()) # 无符号字节array数组类型 43 train_lab = array('b', train_labs.read()) # 有符号字节array数据类型 44 test_img = array('B', test_imgs.read()) 45 test_lab = array('b', test_labs.read()) 46 47 train_imgs.close() # 关闭文件 48 train_labs.close() 49 test_imgs.close() 50 test_imgs.close() 51 52 # 保存图像 53 for (i, label) in enumerate(train_lab): 54 filename = os.path.join(train_folders[label], str(i) + '.png') 55 print('writing ' + filename) 56 with open(filename, 'wb') as img: 57 image = png.Writer(28, 28, greyscale=True) 58 data = [train_img[(i*28*28 + j*28) : (i*28*28 + (j+1)*28)] for j in range(28)] 59 image.write(img, data) # 保存训练图像 60 for (i, label) in enumerate(test_lab): 61 filename = os.path.join(test_folders[label], str(i) + '.png') 62 print('writing ' + filename) 63 with open(filename, 'wb') as img: 64 image = png.Writer(28, 28, greyscale=True) 65 data = [test_img[(i*28*28 + j*28) : (i*28*28 + (j+1)*28)] for j in range(28)] 66 image.write(img, data) # 保存测试图像
实验结果:
参考资料:
https://blog.csdn.net/SongGu1996/article/details/98849274