机器学习:PLA
国庆期间的作业:
* 了解掌握线性感知机算法(PLA)的基本原理和算法流程,并使用PLA来解决一个实际的分类问题。
数据集介绍:
data1.csv —— 维度为100x3,包含100个样本,前两列是数据特征,最后一列是输出标签label。该数据集线性可分。
线性可分,采用PLA
线性不可分:采用Pocket Learning Algorithm
#利用Python实现感知机算法的原始形式 # -*- coding: utf-8 -*- import numpy as np import pandas as pd import matplotlib.pyplot as plt #1、创建数据集 def createdata(): df=pd.read_csv('data1.csv',names=['x','y','labels']) samples=df[['x','y']] samples=samples.values labels=df['labels'] labels=labels.tolist() return samples,labels #训练感知机模型 class Perceptron: def __init__(self,x,y,a=1): self.x=x self.y=y self.w=np.zeros((x.shape[1],1))#初始化权重,w1,w2均为0 self.b=0 self.a=1#学习率 self.numsamples=self.x.shape[0] self.numfeatures=self.x.shape[1] def sign(self,w,b,x): y=np.dot(x,w)+b return int(y) def update(self,label_i,data_i): tmp=label_i*self.a*data_i tmp=tmp.reshape(self.w.shape) #更新w和b self.w=tmp+self.w self.b=self.b+label_i*self.a def train(self): isFind=False while not isFind: count=0 for i in range(self.numsamples): tmpY=self.sign(self.w,self.b,self.x[i,:]) # print ('a') if tmpY*self.y[i]<=0:#如果是一个误分类实例点 # ss="误分类点为:"+self.x[i,:]+"此时的w和b为:"+self.w+ self.b print ("误分类点为:",self.x[i,:],"此时的w和b为:",self.w, self.b) count+=1 self.update(self.y[i],self.x[i,:]) if count==0: print ('最终训练得到的w和b为:',self.w,self.b) isFind=True return self.w,self.b #画图描绘 class Picture: def __init__(self,data,labels,w,b): self.b=b self.w=w self.data=data self.labels=labels plt.figure(1) plt.title('Perceptron Learning Algorithm',size=14) plt.xlabel('x0-axis',size=14) plt.ylabel('x1-axis',size=14) xData=np.linspace(4,7,100) yData=self.expression(xData) plt.plot(xData,yData,color='r',label='sample data') for i in range(data.shape[0]): if labels[i] != -1: plt.scatter(data[i][0],data[i][1],s=15) else: plt.scatter(data[i][0],data[i][1],s=15,marker='x') plt.savefig('2d.png',dpi=175) def expression(self,x): y=(-self.b-self.w[0]*x)/self.w[1]#注意在此,把x0,x1当做两个坐标轴,把x1当做自变量,x2为因变量 return y def Show(self): plt.show() if __name__ == '__main__': samples,labels=createdata() myperceptron=Perceptron(x=samples,y=labels) weights,bias=myperceptron.train() Picture=Picture(samples,labels,weights,bias) Picture.Show()
2. 线性不可分
经过测试,针对该数据庥,迭代次数=10000时,效果较好。
#!/usr/bin/python3 # -*- coding: utf-8 -*- """ Description : pocket algorithm """ import time import numpy as np import pandas as pd import matplotlib.pyplot as plt # load data from DataSet.txt data_set = [] data_label = [] #file = open('DataSet.txt') # file = open('DataSet_linear_separable.txt') ''' for line in file: line = line.split('\t') for i in range(len(line)): line[i] = float(line[i]) data_set.append(line[0:2]) data_label.append(int(line[-1])) file.close() data = np.array(data_set) for i in range(len(data_label)): if data_label[i] != 1: data_label[i] = -1 label = np.array(data_label) ''' df=pd.read_csv('data2.csv',names=['x','y','label']) data=df[['x','y']] data=data.values label=df['label'] label=label.tolist() # Initialize w, b, alpha w = np.array([0.5, 1]) b = 0 alpha = 0.4 trainLoss = [] # Calculate train_loss f = (np.dot(data, w.T) + b) * label idx = np.where(f <= 0) idx = np.array(idx, dtype=int) idx = idx.tolist() ''' l=[] for m in idx: for i in m: l.append(i) ''' idx = [i for item in idx for i in item] for i in idx: train_loss = -np.sum((np.dot(data[i], w.T) + b) * label[i]) / (np.sqrt(w[0]**2+w[1]**2)) trainLoss.append(train_loss) # iteration max_iter = 10000 iteration = 1 start = time.time() while iteration <= max_iter: print('iteration:',iteration) if f[idx].size == 0: break for sample in data[idx]: i = 0 w += alpha * sample * label[idx[i]] b += alpha * label[idx[i]] i += 1 print('Iteration:%d w:%s b:%s' % (iteration, w, b)) f = (np.dot(data, w.T) + b) * label idx = np.where(f <= 0) idx = (np.array(idx, dtype=int)).tolist() idx = [i for item in idx for i in item] for i in idx: train_loss = -np.sum((np.dot(data[i], w.T) + b) * label[i]) / (np.sqrt(w[0] ** 2 + w[1] ** 2)) trainLoss.append(train_loss) iteration = iteration + 1 if f[idx].size == 0: accuracy = 100 else: accuracy = len(f[idx]) / len(label) * 100 end = time.time() print('Pocket learning algorithm is over') print('train time is %f s.' % (end - start)) print('-'*50) print('min trainLoss: %f' % np.min(trainLoss)) print('Classification accuracy: %.2f%%' % accuracy) # draw ''' x1 = np.arange(1, 100, 0.1) x2 = (w[0] * x1 + b) / (-w[1]) idx_p = np.where(label == 1) idx_n = np.where(label != 1) data_p = data[idx_p] data_n = data[idx_n] plt.figure() plt.scatter(data_p[:, 0], data_p[:, 1], color='b') plt.scatter(data_n[:, 0], data_n[:, 1], color='r') plt.plot(x1, x2) plt.show() ''' plt.figure(1) plt.title('Pocket learning algorithm',size=14) plt.xlabel('x0-axis',size=14) plt.ylabel('x1-axis',size=14) xData=np.linspace(5,8,100) yData=(w[0] * xData + b) / (-w[1]) plt.plot(xData,yData,color='r',label='sample data') for i in range(data.shape[0]): if label[i] != -1: plt.scatter(data[i][0],data[i][1],s=15) else: plt.scatter(data[i][0],data[i][1],s=15,marker='x') plt.savefig('PLA(PocketLearning_1.png',dpi=175) plt.figure() plt.plot(trainLoss) plt.ylabel('trainLoss') plt.xlabel('Iteration') plt.savefig('PLA(PocketLearning_2.png',dpi=175) plt.show()