神经网络分类
https://www.lanqiao.cn/courses/1029
对汽车燃料效率建模
数据集
http://labfile.oss.aliyuncs.com/courses/1029/mpg.csv
代码
# coding: utf-8 import matplotlib.pyplot as plt import pandas as pd from sklearn import model_selection, metrics from sklearn import preprocessing from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense %matplotlib inline # 读取数据集 df = pd.read_csv("mpg.csv", header=0) # 将排量的参数转化为浮点类型的数字 df['displacement']=df['displacement'].astype(float) # 从数据集中获取数据列,其中第一列(每加仑英里数)和最后一列(车名)将被忽略。 X = df[df.columns[1:8]] y = df['mpg'] # 进行原始数据展示 plt.figure() # 创建一个新的图形 f, ax1 = plt.subplots() for i in range (1,8): number = 420 + i ax1.locator_params(nbins=3) ax1 = plt.subplot(number) plt.title(list(df)[i]) ax1.scatter(df[df.columns[i]],y) # 绘制数据点的散点图 plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
# 拆分数据集 X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y,test_size=0.25) # 缩放数据以进行收敛性优化 scaler = preprocessing.StandardScaler() # 设置变换参数 X_train = scaler.fit_transform(X_train) # 建立一个含有两层全连接的DNN网络,其分别有十个和五个单元。 model = Sequential() model.add(Dense(10, input_dim=7, kernel_initializer='normal', activation='relu')) model.add(Dense(5, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) # 编译模型,将均方误差作为损失函数 model.compile(loss='mean_squared_error', optimizer='adam') # 使用1000次训练匹配模型 model.fit(X_train, y_train, epochs=1000, validation_split=0.33, shuffle=True,verbose=2 ) # 准确度测试 score = metrics.mean_squared_error(model.predict(scaler.transform(X_test)),y_test) print(" Total Mean Squared Error: " + str(score))
葡萄酒分类
数据集
http://labfile.oss.aliyuncs.com/courses/1029/wine.csv
代码
# coding: utf-8 import matplotlib.pyplot as plt import pandas as pd import tensorflow as tf from sklearn.utils import shuffle from sklearn import preprocessing %matplotlib inline df = pd.read_csv("wine.csv", header=0) print (df.describe()) for i in range (1,8): number = 420 + i ax1 = plt.subplot(number) ax1.locator_params(nbins=3) plt.title ax1.scatter(df[df.columns[i]],df['Wine']) # Plot a scatter draw of the datapoints plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
X = df[df.columns[1:13]].values Y = df['Wine'].values-1 X, Y = shuffle (X, Y) scaler = preprocessing.StandardScaler() X = scaler.fit_transform(X) class Linear(tf.keras.Model): def __init__(self): super().__init__() self.dense = tf.keras.layers.Dense( units=3, activation="softmax", kernel_initializer=tf.zeros_initializer(), bias_initializer=tf.zeros_initializer() ) # 全连接层 f.keras.layers.Dense def call(self, input): output = self.dense(input) return output tf.keras.backend.set_floatx('float64') model = Linear() optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) # 使用 SGD 优化器 sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy() for i in range(100): X,Y =shuffle (X, Y, random_state=1) Xtr=X[0:140,:] Ytr=Y[0:140] Xt=X[140:178,:] Yt=Y[140:178] Xtr, Ytr = shuffle (Xtr, Ytr, random_state=0) with tf.GradientTape() as tape: y_pred = model(Xtr) loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=Ytr, y_pred=y_pred) # 使用交叉熵损失 loss = tf.reduce_mean(loss) grads = tape.gradient(loss, model.variables) optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables)) y_pred_test = model.predict(Xt) sparse_categorical_accuracy.update_state(y_true=Yt, y_pred=y_pred_test) # 评估器,输出预测正确的样本数占总样本数的比例 print("test accuracy: %f" % sparse_categorical_accuracy.result())