数据归一化
import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler iris = datasets.load_iris() X = iris.data y = iris.target X_train,X_test,y_train,y_test = train_test_split(X,y,test_size= 0.2,random_state=666) standardScaler = StandardScaler() standardScaler.fit(X_train) standardScaler.mean_ #均值 standardScaler.scale_ #方差 X_train = standardScaler.transform(X_train) #归一化处理 X_test_standerd = standardScaler.transform(X_test) #测试数据集归一化 from sklearn.neighbors import KNeighborsClassifier KNN_classifier = KNeighborsClassifier(n_neighbors=3) KNN_classifier.fit(X_train,y_train) KNN_classifier.score(X_test_standerd,y_test)
自己实现数据归一化类
class StandardScaler(object): def __init__(self): self.mean_ = None self.scale_ = None def fit(self,X): self.mean_ = np.array([np.mean(X[:,i]) for i in range(X.shape[1])]) self.scale_ = np.array([np.std(X[:,i]) for i in range(X.shape[1])]) return self def transform(self,X): resX = np.empty(shape=X.shape,dtype=float) for col in range(X.shape[1]): resX[:,col] = (X[:,col] - self.mean_[col]) / self.scale_[col] return resX