常见代码

1.检测文件/目录 是否存在

from os.path import isfile, isdir
if not isdir(vgg_dir): raise Exception("VGG directory doesn't exist!") vgg_dir = 'tensorflow_vgg/' if not isdir(vgg_dir):   raise Exception("VGG directory doesn't exist!")

列出指定目录下的文件及遍历 目录名

import os
data_dir = 'flower_photos/'
contents = os.listdir(data_dir)
print(contents)
classes = [each for each in contents if os.path.isdir(data_dir + each)]
print(classes)
['daisy', 'dandelion', 'LICENSE.txt', 'roses', 'sunflowers', 'tulips']
['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']

 

 

2.进度条:

https://blog.csdn.net/qq_40666028/article/details/79335961

from tqdm import trange
import time
for i in trange(200):
    time.sleep(0.1)    

 

from tqdm import tqdm
from urllib.request import urlretrieve
class DLProgress(tqdm): # 继承tqdm类 last_block = 0 def hook(self, block_num=1, block_size=1, total_size=None): self.total = total_size self.update((block_num - self.last_block) * block_size) self.last_block = block_num with DLProgress(unit='B', unit_scale=True, miniters=1, desc='VGG16 Parameters') as pbar: ''' urlretrieve(url, filename=None, reporthook=None, data=None)方法直接将远程数据下载到本地 filename指定了保存本地路径(如果参数未指定,urllib会生成一个临时文件保存数据。 reporthook是一个回调函数,当连接上服务器、以及相应的数据块传输完毕时会触发该回调,我们可以利用这个回调函数来显示当前的下载进度。 data指post导服务器的数据,该方法返回一个包含两个元素的(filename, headers) 元组,filename 表示保存到本地的路径,header表示服务器的响应头 ''' urlretrieve( 'https://s3.amazonaws.com/content.udacity-data.com/nd101/vgg16.npy', vgg_dir + 'vgg16.npy', pbar.hook)

 

#!/usr/bin/env python
# coding=utf-8
import os
import urllib

def cbk(a,b,c):
    '''回调函数
    @a:已经下载的数据块
    @b:数据块的大小
    @c:远程文件的大小
    '''
    per=100.0*a*b/c
    if per>100:
        per=100
    print '%.2f%%' % per

url='http://www.python.org/ftp/python/2.7.5/Python-2.7.5.tar.bz2'
dir=os.path.abspath('.')
work_path=os.path.join(dir,'Python-2.7.5.tar.bz2')
urllib.urlretrieve(url,work_path,cbk)

 

3.压缩和解压缩

import tarfile

dataset_folder_path = 'flower_photos'

#先下载到当前目录 class DLProgress(tqdm): last_block = 0 def hook(self, block_num=1, block_size=1, total_size=None): self.total = total_size self.update((block_num - self.last_block) * block_size) self.last_block = block_num if not isfile('flower_photos.tar.gz'): with DLProgress(unit='B', unit_scale=True, miniters=1, desc='Flowers Dataset') as pbar: urlretrieve( 'http://download.tensorflow.org/example_images/flower_photos.tgz', 'flower_photos.tar.gz', pbar.hook)
#下载到当前目录后解压缩到当前目录
if not isdir(dataset_folder_path): with tarfile.open('flower_photos.tar.gz') as tar: tar.extractall() tar.close()

 

if not isdir('dir_path'):
    with ZipFile('imgs.zip', 'r') as zipf:   
        for name in tqdm(zipf.namelist()[:1000],desc='Extract files', unit='files'):
            zipf.extract(name, path='dir_path')
        zipf.close()

 4. numpy数组的保存 和加载

 一般tensorflow session.run后的结果是 numpy数组,可以保存到文件目录,以后可以加载

# write codes to file
with open('codes', 'w') as f:
    codes.tofile(f)
    
# write labels to file
import csv
with open('labels', 'w') as f:
    writer = csv.writer(f, delimiter='\n')
    writer.writerow(labels)
# read codes and labels from file
import csv

with open('labels') as f:
    reader = csv.reader(f, delimiter='\n')
# squeeze() 去除大小为1的维度 https://blog.csdn.net/lqfarmer/article/details/73323449
labels = np.array([each for each in reader if len(each) > 0]).squeeze()
with open('codes') as f:
    codes = np.fromfile(f, dtype=np.float32)
    # 参考https://blog.csdn.net/weixin_39449570/article/details/78619196
    # -1 表示列数 为自动计算

codes = codes.reshape((len(labels), -1))
                                             

 5. One hot encoding

# sklearn的方法:  三行搞定

from
sklearn.preprocessing import LabelBinarizer lb = LabelBinarizer() lb.fit(labels) labels_vecs = lb.transform(labels)
# keras的方法: 一行搞定
from
keras.utils import np_utils # one-hot encode the labels num_classes = len(np.unique(y_train)) y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) 
#  tensorflow的方法: 

import
tensorflow as tf
CLASS= len(np.unique([0,1,2,3,4,5,6,7]))   label1
=tf.constant([0,1,2,3,4,5,6,7]) sess1=tf.Session() print('label1:',sess1.run(label1)) b = tf.one_hot(label1,CLASS,1,0) with tf.Session() as sess: #sess.run(tf.global_variables_initializer()) sess.run(b) print('after one_hot',sess.run(b))


# 核心4行搞定:
label1=tf.constant([0,1,2,3,4,5,6,7])
b = tf.one_hot(label1,8,1,0)
with tf.Session() as sess:
sess.run(b)
     
结果:
label1: [0 1 2 3 4 5 6 7] after one_hot [[1 0 0 0 0 0 0 0] [0 1 0 0 0 0 0 0] [0 0 1 0 0 0 0 0] [0 0 0 1 0 0 0 0] [0 0 0 0 1 0 0 0] [0 0 0 0 0 1 0 0] [0 0 0 0 0 0 1 0] [0 0 0 0 0 0 0 1]]

 

 

6. split 训练集  验证集 测试集

from sklearn.model_selection import StratifiedShuffleSplit
# https://blog.csdn.net/m0_38061927/article/details/76180541
ss = StratifiedShuffleSplit(n_splits=1, test_size=0.2) # 分一次,训练 测试 8:2
train_idx, val_idx = next(ss.split(codes, labels))

half_val_len = int(len(val_idx)/2)
val_idx, test_idx = val_idx[:half_val_len], val_idx[half_val_len:]

train_x, train_y = codes[train_idx], labels_vecs[train_idx]
val_x, val_y = codes[val_idx], labels_vecs[val_idx]
test_x, test_y = codes[test_idx], labels_vecs[test_idx]

 

tf的sess 结果存到磁盘上

saver = tf.train.Saver()

with tf.Session() as sess:
    。。。  
    。。。
   saver.save(sess, "checkpoints/flowers.ckpt")

 

监测是否能用gpu,是返回true

import tensorflow as tf
tf.test.is_gpu_available(
    cuda_only=False,
    min_cuda_compute_capability=None
)

 

keras常用代码

 

from keras.layers import Conv2D

#卷积层 ,16个过滤器,过滤器大小 滑动strides默认为1,
Conv2D(filters=16, kernel_size=2, strides=2, activation='relu', input_shape=(200, 200, 1))

 

字符处理

from string import punctuation # 标点符号!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

#遍历每一个字符,去除停用词(标点符号),再连接起来
doc = ''.join([c for c in reviews if c not in punctuation]) 

#去除换行符(用换行符分割,再用空格连起来)
reviews = doc.split('\n')  
all_text = ' '.join(reviews)  

#分词。默认分隔符为空格。
words = all_text.split()

 

posted @ 2018-06-23 17:55  Daniel_Lu  阅读(705)  评论(0编辑  收藏  举报