tensorflow input_data.py(mnit_softmax.py)学习
最新的tensorflow1.2中,使用新的导入脚本 mnist_softmax.py,用之前老的会有问题。脚本内容大致相同。
最好还是参考www.tensorflow.org里面的教程,中文版的那个太老了。
"""Functions for downloading and reading MNIST data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gzip
import os
import numpy
from six.moves import urllib
from six.moves import xrange # pylint: disable=redefined-builtin
SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
__future__ :这个是把下个版本py3.x中的特性导入到当前版本py2.x。 2.x,3.x的功能是同时开发的,不是“预知”。
gzip: gzip.open('file', 'r'/'a'/'w'/'x') 打开压缩文档。
os:operation system,操作系统。
os.getcwd() #显示当前python路径;
os.path.exits(name) #判断是否存在文件或目录name
os.path.join(path,name) #连接目录和文件名或另一个目录
os.stat(path) #获取文档或目录信息,并返回。st_size文件大小
numpy: 数值,数组,矩阵等科学计算相关的安装包*很重要
numpy.dtype #数值数据类型转换 uint32: unsigned int 32
byteorder 字节顺序。big-endian:内存中低位地址放高位数值,‘>’; little-endian: 内存中低位地址放低位数值'<'.
numpy.frombuffer #Interpret a buffer as a 1-dimensional array
numpy.reshape 改变矩阵行列 , -1 意思是说如果行或列的值固定,另外一个用-1代替
numpy.arange(start, stop, step, dtype=none) 返回一个均匀分布的数组, 前闭后开
numpy.zeros(shape, dtype=float, order='C') 返回一个给定shape和type,以0填充的数组,shape可以为ex(2,6)etc矩阵形式
numpy.ndarray.flat 一个n维数组的一维迭代器。对数组进行行扫描,从0开始
numpy.ravel( a, order='C') 将数组返回为一个一维连续数组
numpy.random.shuffle(array) 将数组随机洗牌
six:封装python2,python3之间差别的安装包
urllib: 扒网页信息等
urllib.request.urlretrieve(url,filename=None, reporthook=None, data=None)
#urlretrieve将url的html文件下载到本地,如果没有输入filename,那么存为临时文件
#返回为二元数组(filename,mine_hdrs), 所以代码中用filepath, _
变量名大写
Part2: 函数定义
def maybe_download(filename, work_directory): """Download the data from Yann's website, unless it's already here.""" if not os.path.exists(work_directory): os.mkdir(work_directory) filepath = os.path.join(work_directory, filename) if not os.path.exists(filepath): filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath) statinfo = os.stat(filepath) print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') return filepath def _read32(bytestream): dt = numpy.dtype(numpy.uint32).newbyteorder('>') return numpy.frombuffer(bytestream.read(4), dtype=dt) def extract_images(filename): """Extract the images into a 4D uint8 numpy array [index, y, x, depth].""" print('Extracting', filename) with gzip.open(filename) as bytestream: magic = _read32(bytestream) if magic != 2051: raise ValueError( 'Invalid magic number %d in MNIST image file: %s' % (magic, filename)) num_images = _read32(bytestream) rows = _read32(bytestream) cols = _read32(bytestream) buf = bytestream.read(rows * cols * num_images) data = numpy.frombuffer(buf, dtype=numpy.uint8) data = data.reshape(num_images, rows, cols, 1) return data def dense_to_one_hot(labels_dense, num_classes=10): """Convert class labels from scalars to one-hot vectors.""" num_labels = labels_dense.shape[0] index_offset = numpy.arange(num_labels) * num_classes labels_one_hot = numpy.zeros((num_labels, num_classes)) labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 return labels_one_hot def extract_labels(filename, one_hot=False): """Extract the labels into a 1D uint8 numpy array [index].""" print('Extracting', filename) with gzip.open(filename) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) if one_hot: return dense_to_one_hot(labels) return labels
""" """文档字符串,描述函数功能,做什么。
_read32(bytestream)这个内部函数没有看太懂,后面再看。
astype 变量类型变换
@property 装饰器 负责将一个方法变成属性