TensorFlow简单实例:titanic实战

1. 数据读取与预处理

将数据集中缺失的字段全部填充为0

选取6个特征字段'Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare'用于分类,并对各字段进行正规化处理

用Survived和Deceased表示乘客存活或死亡的分类

# 数据读入及预处理
import os
import numpy as np
import pandas as pd
import tensorflow as tf
data = pd.read_csv("train.csv")
print(data.info())
data['Sex'] = data['Sex'].apply(lambda s: 1 if s == 'male'else 0)
# 将所有缺失的字段填充为0
data = data.fillna(0)
# 选取部分特征字段用于分类
dataset_x = data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']]
dataset_x = dataset_x.as_matrix()
# 两种分类分别是幸存和死亡,'Survived'是其中一种分类的标签,另外一种分类标签为'Deceased',取值为'Survived'字段取非
data['Deceased'] = data['Survived'].apply(lambda s: int(not s))
dataset_Y = data[['Deceased', 'Survived']]
dataset_Y = dataset_Y.as_matrix()
print(dataset_x, dataset_Y)

 2. 构建计算图

# 构建计算图
# 使用placeholder声明输入占位符
x = tf.placeholder(tf.float32, shape=[None, 6], name='input')
y = tf.placeholder(tf.float32, shape=[None, 2], name='label')
# 声明参数变量
W = tf.Variable(tf.random_normal([6, 2]), name='weights')
b = tf.Variable(tf.zeros([2]), name='bias')
# 构建前向传播计算图
# 逻辑回归公式
y_pred = tf.nn.softmax(tf.matmul(x, W) + b)
# 声明代价函数
# 使用交叉熵作为代价函数
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred + 1e-10), reduction_indices=1)
# 批量样本的代价值为所有样本交叉熵的平均值
cost = tf.reduce_mean(cross_entropy)
# 加入优化算法
# 使用梯度下降算法优化器来最小化代价,系统自动构建反向传播部分的计算图
train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
# 计算准确率
correct_pred = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1))
# tf.equal(A, B)是对比这两个矩阵或者向量的相等的元素,如果是相等的那就返回True,反正返回False,返回的值的矩阵维度和A是一样的
acc_op = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# 计算图的声明过程完成

3. 构建训练迭代过程

# 构建训练迭代过程
with tf.Session() as sess:
    # 初始化所有变量,必须最先执行
    tf.global_variables_initializer().run()

    # 以下为训练迭代,迭代10轮
    for epoch in range(10):
        total_loss = 0.
        for i in range(len(x_train)):
            feed = {x: [x_train[i]], y: [y_train[i]]}
            # 通过session.run接口出发执行
            _, loss = sess.run([train_op, cost], feed_dict=feed)
            total_loss += loss
        print('Epoch: %04d, total loss=%.9f' % (epoch+1, total_loss))
    print('Training complete!')

# 评估校验数据集上的准确率
# Accuracy calculated by Tensorflow
    accuracy = sess.run(acc_op, feed_dict={x: x_test, y: y_test})
    print("Accuracy on validation set:%.9f" % accuracy)

# Accuracy calculated by Numpy
    pred = sess.run(y_pred, feed_dict={x: x_test, y: y_test})
    correct = np.equal(np.argmax(pred, 1), np.argmax(y_test, 1))
    numpy_accuracy = np.mean(correct.astype(np.float32))
    print('Accuracy on validation set (numpy): %.9f' % numpy_accuracy)

 

posted @ 2018-07-03 21:17  松花酿酒春水煎茶  阅读(498)  评论(0编辑  收藏  举报