[ML L2 - N19] Naive Bayes GaussianNB
ClassifyNB.py:
def classify(features_train, labels_train): ### import the sklearn module for GaussianNB from sklearn.naive_bayes import GaussianNB ### create classifier clf = GaussianNB() ### fit the classifier on the training features and labels clf.fit(features_train, labels_train) ### return the fit classifier return clf
prep_terrain_data.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | #!/usr/bin/python import random def makeTerrainData(n_points = 1000 ): ############################################################################### ### make the toy dataset random.seed( 42 ) grade = [random.random() for ii in range ( 0 ,n_points)] bumpy = [random.random() for ii in range ( 0 ,n_points)] error = [random.random() for ii in range ( 0 ,n_points)] y = [ round (grade[ii] * bumpy[ii] + 0.3 + 0.1 * error[ii]) for ii in range ( 0 ,n_points)] for ii in range ( 0 , len (y)): if grade[ii]> 0.8 or bumpy[ii]> 0.8 : y[ii] = 1.0 ### split into train/test sets X = [[gg, ss] for gg, ss in zip (grade, bumpy)] split = int ( 0.75 * n_points) X_train = X[ 0 :split] X_test = X[split:] y_train = y[ 0 :split] y_test = y[split:] grade_sig = [X_train[ii][ 0 ] for ii in range ( 0 , len (X_train)) if y_train[ii] = = 0 ] bumpy_sig = [X_train[ii][ 1 ] for ii in range ( 0 , len (X_train)) if y_train[ii] = = 0 ] grade_bkg = [X_train[ii][ 0 ] for ii in range ( 0 , len (X_train)) if y_train[ii] = = 1 ] bumpy_bkg = [X_train[ii][ 1 ] for ii in range ( 0 , len (X_train)) if y_train[ii] = = 1 ] # training_data = {"fast":{"grade":grade_sig, "bumpiness":bumpy_sig} # , "slow":{"grade":grade_bkg, "bumpiness":bumpy_bkg}} grade_sig = [X_test[ii][ 0 ] for ii in range ( 0 , len (X_test)) if y_test[ii] = = 0 ] bumpy_sig = [X_test[ii][ 1 ] for ii in range ( 0 , len (X_test)) if y_test[ii] = = 0 ] grade_bkg = [X_test[ii][ 0 ] for ii in range ( 0 , len (X_test)) if y_test[ii] = = 1 ] bumpy_bkg = [X_test[ii][ 1 ] for ii in range ( 0 , len (X_test)) if y_test[ii] = = 1 ] test_data = { "fast" :{ "grade" :grade_sig, "bumpiness" :bumpy_sig} , "slow" :{ "grade" :grade_bkg, "bumpiness" :bumpy_bkg}} return X_train, y_train, X_test, y_test # return training_data, test_data |
class_vis.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | #!/usr/bin/python #from udacityplots import * import warnings warnings.filterwarnings( "ignore" ) import matplotlib matplotlib.use( 'agg' ) import matplotlib.pyplot as plt import pylab as pl import numpy as np #import numpy as np #import matplotlib.pyplot as plt #plt.ioff() def prettyPicture(clf, X_test, y_test): x_min = 0.0 ; x_max = 1.0 y_min = 0.0 ; y_max = 1.0 # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. h = . 01 # step size in the mesh xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.xlim(xx. min (), xx. max ()) plt.ylim(yy. min (), yy. max ()) plt.pcolormesh(xx, yy, Z, cmap = pl.cm.seismic) # Plot also the test points grade_sig = [X_test[ii][ 0 ] for ii in range ( 0 , len (X_test)) if y_test[ii] = = 0 ] bumpy_sig = [X_test[ii][ 1 ] for ii in range ( 0 , len (X_test)) if y_test[ii] = = 0 ] grade_bkg = [X_test[ii][ 0 ] for ii in range ( 0 , len (X_test)) if y_test[ii] = = 1 ] bumpy_bkg = [X_test[ii][ 1 ] for ii in range ( 0 , len (X_test)) if y_test[ii] = = 1 ] plt.scatter(grade_sig, bumpy_sig, color = "b" , label = "fast" ) plt.scatter(grade_bkg, bumpy_bkg, color = "r" , label = "slow" ) plt.legend() plt.xlabel( "bumpiness" ) plt.ylabel( "grade" ) plt.savefig( "test.png" ) import base64 import json import subprocess def output_image(name, format , bytes): image_start = "BEGIN_IMAGE_f9825uweof8jw9fj4r8" image_end = "END_IMAGE_0238jfw08fjsiufhw8frs" data = {} data[ 'name' ] = name data[ 'format' ] = format data[ 'bytes' ] = base64.encodestring(bytes) print image_start + json.dumps(data) + image_end |
studentMain.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | #!/usr/bin/python """ Complete the code in ClassifyNB.py with the sklearn Naive Bayes classifier to classify the terrain data. The objective of this exercise is to recreate the decision boundary found in the lesson video, and make a plot that visually shows the decision boundary """ from prep_terrain_data import makeTerrainData from class_vis import prettyPicture, output_image from ClassifyNB import classify import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][ 0 ] for ii in range ( 0 , len (features_train)) if labels_train[ii] = = 0 ] bumpy_fast = [features_train[ii][ 1 ] for ii in range ( 0 , len (features_train)) if labels_train[ii] = = 0 ] grade_slow = [features_train[ii][ 0 ] for ii in range ( 0 , len (features_train)) if labels_train[ii] = = 1 ] bumpy_slow = [features_train[ii][ 1 ] for ii in range ( 0 , len (features_train)) if labels_train[ii] = = 1 ] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image( "test.png" , "png" , open ( "test.png" , "rb" ).read()) |
Calculating NB Accuracy
def NBAccuracy(features_train, labels_train, features_test, labels_test): from sklearn.naive_bayes import GaussianNB clf = GaussianNB() clf.fit(features_train, labels_train) pred = clf.predict(features_test) accuracy = clf.score(features_test, labels_test) return accuracy
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· AI技术革命,工作效率10个最佳AI工具
2019-06-20 [Cypress] Reuse Data with Cypress Fixtures
2017-06-20 [Preact] Integrate react-router with Preact
2017-06-20 [Angular] Change component default template (ng-content, ng-template, ngTemplateOutlet, TemplateRef)
2017-06-20 [Angular] Angular Advanced Features - ng-template , ng-container, ngTemplateOutlet
2013-06-20 【大型网站架构 原理】1. 负载均衡和冗余技术