Tensorflow暑期实践——波士顿房价预测(全部代码)
# coding: utf-8 get_ipython().run_line_magic('matplotlib', 'notebook') import matplotlib.pyplot as plt import tensorflow as tf import tensorflow.contrib.learn as skflow from sklearn.utils import shuffle import numpy as np import pandas as pd import os os.environ["CUDA_VISIBLE_DEVICES"] = "-1" print(tf.__version__) print(tf.test.is_gpu_available()) # ** 数据集简介 ** # 本数据集包含与波士顿房价相关的多个因素:<br> # ** CRIM **:城镇人均犯罪率<br> # ** ZN **:住宅用地超过25000 sq.ft. 的比例<br> # ** INDUS ** : 城镇非零售商用土地的比例<br> # ** CHAS **:Charles河空变量(如果边界是河流,则为1;否则,为0)<br> # ** NOX **:一氧化氮浓度<br> # ** RM **:住宅平均房间数<br> # **AGE **:1940年之前建成的自用房屋比例<br> # ** DIS **:到波士顿5个中心区域的加权距离<br> # ** RAD **:辐射性公路的靠近指数<br> # ** TAX **:每1万美元的全值财产税率<br> # ** PTRATIO **:城镇师生比例<br> # ** LSTAT **:人口中地位低下者的比例<br> # ** MEDV **:自住房的平均房价,单位:千美元<br> # ** 数据集以CSV格式存储,可通过Pandas库读取并进行格式转换 ** # ** Pandas库 **可以帮助我们快速读取常规大小的数据文件<br> # 能够读取CVS文件, 文本文件、MS Excel、SQL数据库以及用于科学用途的HDF5格式文件<br> # 自动转换为Numpy的多维阵列 # ** 通过Pandas导入数据 ** # In[2]: df = pd.read_csv("data/boston.csv", header=0) print (df.describe()) df = np.array(df) for i in range(12): df[:,i] = (df[:,i]-df[:,i].min())/(df[:,i].max()-df[:,i].min()) x_data = df[:,:12] y_data = df[:,12] x = tf.placeholder(tf.float32, [None,12], name = "x") # 3个影响因素 y = tf.placeholder(tf.float32, [None,1], name = "y") with tf.name_scope("Model"): w = tf.Variable(tf.random_normal([12,1], stddev=0.01), name="w0") b = tf.Variable(1., name="b0") def model(x, w, b): return tf.matmul(x, w) + b pred= model(x, w, b) train_epochs = 500 # 迭代次数 learning_rate = 0.01 #学习率 with tf.name_scope("LossFunction"): loss_function = tf.reduce_mean(tf.pow(y-pred, 2)) #均方误差MSE optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss_function) sess = tf.Session() init = tf.global_variables_initializer() tf.train.write_graph(sess.graph, 'log2/boston','graph.pbtxt') loss_op = tf.summary.scalar("loss", loss_function) merged = tf.summary.merge_all() sess.run(init) writer = tf.summary.FileWriter('log/boston', sess.graph) loss_list = [] for epoch in range (train_epochs): loss_sum=0.0 for xs, ys in zip(x_data, y_data): z1 = xs.reshape(1,12) z2 = ys.reshape(1,1) _,loss = sess.run([optimizer,loss_function], feed_dict={x: z1, y: z2}) summary_str = sess.run(loss_op, feed_dict={x: z1, y: z2}) #lossv+=sess.run(loss_function, feed_dict={x: z1, y: z2})/506.00 loss_sum = loss_sum + loss # loss_list.append(loss) writer.add_summary(summary_str, epoch) x_data, y_data = shuffle(x_data, y_data) print (loss_sum) b0temp=b.eval(session=sess) w0temp=w.eval(session=sess) loss_average = loss_sum/len(y_data) loss_list.append(loss_average) print("epoch=", epoch+1,"loss=",loss_average,"b=", b0temp,"w=", w0temp ) print("y=",w0temp[0], "x1+",w0temp[1], "x2+",w0temp[2], "x3+", [b0temp]) print("y=",w0temp[0], "CRIM+", w0temp[1], 'DIS+', w0temp[2], "LSTAT+", [b0temp]) plt.plot(loss_list)