#支持向量机算法原理及实现
#(一)sklearn中利用SVM算法解决分类问题
import numpy as np
import matplotlib.pyplot as plt
#1-1 多算法融合思想的使用——KNN算法参数寻优
from sklearn.feature_selection import SelectKBest
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold #交叉验证Kfold方式
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score #导入整体模型的准确度
from sklearn.metrics import confusion_matrix #导入整体模型的混淆矩阵
from sklearn.metrics import precision_score #导入整体模型的精准率
from sklearn.metrics import recall_score #导入整体模型的召回率
from sklearn.metrics import f1_score
#利用管道pipeline来进行多项式核函数的SVM算法三步—多项式回归特征增加-数据归一化-线性SVM算法
from sklearn.preprocessing import PolynomialFeatures #输入多项式回归模型
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
#导入所需要训练的数据集
finaldata=pd.read_excel("C:/Users/y50014900/Desktop/过程测试_033GRR10L4105623_20200601-20200708_IL_DM_异常检测分类结果.xlsx")
feature=["p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","p11","p12","p13","p14","p15","p16","p17","p18","p19","p20","p21","p22","p23"]
DM_target1=["DM1"]
DM_target2=["DM2"]
x=finaldata.iloc[:,2:71]
print(x)
x=np.array(x) #对数据的输入需要进行numpy二维数组的转换和形式统一
y=finaldata[DM_target1].values.ravel() #将表格中的目标列向量转换为一维的数组,作为目标预测的向量
y=finaldata[DM_target2].values.ravel()
#进行数据的numpy数据形式转换,为算法的数据输入做好准备工作
#首先第一步需要进行数据据标准化处理(线性方式)
'''
from sklearn.preprocessing import StandardScaler
s1=StandardScaler()
s1.fit(x)
x_standard=s1.transform(x)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
#1-1导入sklearn中SVM的线性分类算法LinearSVC,处理原有的线性数据
from sklearn.preprocessing import StandardScaler
s1=StandardScaler()
s1.fit(x)
x=s1.transform(x)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
from sklearn.svm import LinearSVC
s11=LinearSVC(C=1e10) #需要定义超参数C,L1、L2正则化的系数,越大,容错空间越小
#对于多分类问题的实现,需要提交参数penalty=l1/l2(正则化方式)以及multi_class=ovo/ovr(采用何种方式多分类训练)
#LinearSVC默认方式为L2正则化,多分类为ovr模式
s11.fit(x_train,y_train) #训练数据集训练归一化数据集
print(s11.score(x_test,y_test))
#改变正则化的系数C的大小,C越小,容错空间越大
s12=LinearSVC(C=1) #C变小之后,容错空间增大,会有部分数据区分错误
s12.fit(x_train,y_train) #训练数据集训练归一化数据集
print(s12.score(x_test,y_test))
#1-2 sklearn中对于非线性数据的svm应用(多项式应用方式)
#SVM使用非线性数据假设的模型-手动添加多项式特征模型
#利用管道pipeline来进行多项式核函数的SVM算法三步—多项式回归特征增加-数据归一化-线性SVM算法
from sklearn.preprocessing import PolynomialFeatures #输入多项式回归模型
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
def polyniomailSVC(degree,C=10): #默认正则化系数C为1
return Pipeline([("poly",PolynomialFeatures(degree=degree)),
("std_scaler",StandardScaler()),
("LinearSVC",LinearSVC(C=C))
])
for i in range(1,3):
for C in range(1,10):
p=polyniomailSVC(degree=i,C=C) #使用三次的多项式特征进行模型的训练
p.fit(x_train,y_train)
print(p.score(x_test,y_test))
#1-3 使用自带的多项式核函数的SVM,将数据先直接转换为多项式的多维特征,和传统的多项式特征不同
#2直接利用sklearn中自带的多项式核函数SVM算法,可以自动添加多项式的特征,主要的参数kernel="poly"
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
from sklearn.svm import SVC
def polynomialkernelSVC(degree,C=1.0):
return Pipeline(
[
("std_canler",StandardScaler()),
("kernelsvc",SVC(kernel="poly",degree=degree,C=C))
]
)
for i in range(1,5):
for j in range(1,10):
p1=polynomialkernelSVC(degree=i,C=j)
p1.fit(x_train,y_train)
print(p1.score(x_test,y_test))
'''
#1-4 高斯核函数的SVM算法的使用-非线性数据训练模型
#调用sklearn中的高斯核函数RBF核(超参数主要是gamma)决定了模型的复杂度,gamma越高,越过拟合
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import numpy as np
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
def RBFkernelSVC(gamma):
return Pipeline([
("std",StandardScaler()),
("svc",SVC(kernel="rbf",gamma=gamma))
])
for i in np.arange(0.1,10,1):
sv=RBFkernelSVC(gamma=i)
sv.fit(x_train,y_train)
print(sv.score(x_test,y_test))