tensorflow knn 预测房价 注意有 Min-Max Scaling
示例数据:
0.00632 18.00 2.310 0 0.5380 6.5750 65.20 4.0900 1 296.0 15.30 396.90 4.98 24.00 0.02731 0.00 7.070 0 0.4690 6.4210 78.90 4.9671 2 242.0 17.80 396.90 9.14 21.60 0.02729 0.00 7.070 0 0.4690 7.1850 61.10 4.9671 2 242.0 17.80 392.83 4.03 34.70 0.03237 0.00 2.180 0 0.4580 6.9980 45.80 6.0622 3 222.0 18.70 394.63 2.94 33.40 0.06905 0.00 2.180 0 0.4580 7.1470 54.20 6.0622 3 222.0 18.70 396.90 5.33 36.20 0.02985 0.00 2.180 0 0.4580 6.4300 58.70 6.0622 3 222.0 18.70 394.12 5.21 28.70 0.08829 12.50 7.870 0 0.5240 6.0120 66.60 5.5605 5 311.0 15.20 395.60 12.43 22.90 0.14455 12.50 7.870 0 0.5240 6.1720 96.10 5.9505 5 311.0 15.20 396.90 19.15 27.10 0.21124 12.50 7.870 0 0.5240 5.6310 100.00 6.0821 5 311.0 15.20 386.63 29.93 16.50 0.17004 12.50 7.870 0 0.5240 6.0040 85.90 6.5921 5 311.0 15.20 386.71 17.10 18.90 0.22489 12.50 7.870 0 0.5240 6.3770 94.30 6.3467 5 311.0 15.20 392.52 20.45 15.00 0.11747 12.50 7.870 0 0.5240 6.0090 82.90 6.2267 5 311.0 15.20 396.90 13.27 18.90 0.09378 12.50 7.870 0 0.5240 5.8890 39.00 5.4509 5 311.0 15.20 390.50 15.71 21.70 0.62976 0.00 8.140 0 0.5380 5.9490 61.80 4.7075 4 307.0 21.00 396.90 8.26 20.40 0.63796 0.00 8.140 0 0.5380 6.0960 84.50 4.4619 4 307.0 21.00 380.02 10.26 18.20 0.62739 0.00 8.140 0 0.5380 5.8340 56.50 4.4986 4 307.0 21.00 395.62 8.47 19.90 1.05393 0.00 8.140 0 0.5380 5.9350 29.30 4.4986 4 307.0 21.00 386.85 6.58 23.10
代码:最大值与最小值之差:ptp()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | # k-Nearest Neighbor #---------------------------------- # # This function illustrates how to use # k-nearest neighbors in tensorflow # # We will use the 1970s Boston housing dataset # which is available through the UCI # ML data repository. # # Data: #----------x-values----------- # CRIM : per capita crime rate by town # ZN : prop. of res. land zones # INDUS : prop. of non-retail business acres # CHAS : Charles river dummy variable # NOX : nitrix oxides concentration / 10 M # RM : Avg. # of rooms per building # AGE : prop. of buildings built prior to 1940 # DIS : Weighted distances to employment centers # RAD : Index of radian highway access # TAX : Full tax rate value per $10k # PTRATIO: Pupil/Teacher ratio by town # B : 1000*(Bk-0.63)^2, Bk=prop. of blacks # LSTAT : % lower status of pop #------------y-value----------- # MEDV : Median Value of homes in $1,000's import matplotlib.pyplot as plt import numpy as np import tensorflow as tf import requests from tensorflow.python.framework import ops ops.reset_default_graph() # Create graph sess = tf.Session() # Load the data housing_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data' housing_header = [ 'CRIM' , 'ZN' , 'INDUS' , 'CHAS' , 'NOX' , 'RM' , 'AGE' , 'DIS' , 'RAD' , 'TAX' , 'PTRATIO' , 'B' , 'LSTAT' , 'MEDV' ] cols_used = [ 'CRIM' , 'INDUS' , 'NOX' , 'RM' , 'AGE' , 'DIS' , 'TAX' , 'PTRATIO' , 'B' , 'LSTAT' ] num_features = len (cols_used) housing_file = requests.get(housing_url) housing_data = [[ float (x) for x in y.split( ' ' ) if len (x)> = 1 ] for y in housing_file.text.split( '\n' ) if len (y)> = 1 ] y_vals = np.transpose([np.array([y[ 13 ] for y in housing_data])]) x_vals = np.array([[x for i,x in enumerate (y) if housing_header[i] in cols_used] for y in housing_data]) ## Min-Max Scaling x_vals = (x_vals - x_vals. min ( 0 )) / x_vals.ptp( 0 ) # Split the data into train and test sets np.random.seed( 13 ) #make results reproducible train_indices = np.random.choice( len (x_vals), round ( len (x_vals) * 0.8 ), replace = False ) test_indices = np.array( list ( set ( range ( len (x_vals))) - set (train_indices))) x_vals_train = x_vals[train_indices] x_vals_test = x_vals[test_indices] y_vals_train = y_vals[train_indices] y_vals_test = y_vals[test_indices] # Declare k-value and batch size k = 4 batch_size = len (x_vals_test) # Placeholders x_data_train = tf.placeholder(shape = [ None , num_features], dtype = tf.float32) x_data_test = tf.placeholder(shape = [ None , num_features], dtype = tf.float32) y_target_train = tf.placeholder(shape = [ None , 1 ], dtype = tf.float32) y_target_test = tf.placeholder(shape = [ None , 1 ], dtype = tf.float32) # Declare distance metric # L1 distance = tf.reduce_sum(tf. abs (tf.subtract(x_data_train, tf.expand_dims(x_data_test, 1 ))), axis = 2 ) # L2 #distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(x_data_train, tf.expand_dims(x_data_test,1))), reduction_indices=1)) # Predict: Get min distance index (Nearest neighbor) #prediction = tf.arg_min(distance, 0) top_k_xvals, top_k_indices = tf.nn.top_k(tf.negative(distance), k = k) x_sums = tf.expand_dims(tf.reduce_sum(top_k_xvals, 1 ), 1 ) x_sums_repeated = tf.matmul(x_sums,tf.ones([ 1 , k], tf.float32)) x_val_weights = tf.expand_dims(tf.div(top_k_xvals,x_sums_repeated), 1 ) top_k_yvals = tf.gather(y_target_train, top_k_indices) prediction = tf.squeeze(tf.matmul(x_val_weights,top_k_yvals), axis = [ 1 ]) # Calculate MSE mse = tf.div(tf.reduce_sum(tf.square(tf.subtract(prediction, y_target_test))), batch_size) # Calculate how many loops over training data num_loops = int (np.ceil( len (x_vals_test) / batch_size)) for i in range (num_loops): min_index = i * batch_size max_index = min ((i + 1 ) * batch_size, len (x_vals_train)) x_batch = x_vals_test[min_index:max_index] y_batch = y_vals_test[min_index:max_index] predictions = sess.run(prediction, feed_dict = {x_data_train: x_vals_train, x_data_test: x_batch, y_target_train: y_vals_train, y_target_test: y_batch}) batch_mse = sess.run(mse, feed_dict = {x_data_train: x_vals_train, x_data_test: x_batch, y_target_train: y_vals_train, y_target_test: y_batch}) print ( 'Batch #' + str (i + 1 ) + ' MSE: ' + str (np. round (batch_mse, 3 ))) # Plot prediction and actual distribution bins = np.linspace( 5 , 50 , 45 ) plt.hist(predictions, bins, alpha = 0.5 , label = 'Prediction' ) plt.hist(y_batch, bins, alpha = 0.5 , label = 'Actual' ) plt.title( 'Histogram of Predicted and Actual Values' ) plt.xlabel( 'Med Home Value in $1,000s' ) plt.ylabel( 'Frequency' ) plt.legend(loc = 'upper right' ) plt.show() |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· DeepSeek 开源周回顾「GitHub 热点速览」