INT104-lab13[Parzen Window Method][此方法无数据集划分]
利用高斯函数,使得较近的数据点决策作用更大!
代码也比较简洁!Accuracy=0.9466666666666667
注意一下:
其实h=1的选取相当于你的先验知识;或者说多尝试几次
让电脑自己试错,有时候其实方法直接的界定其实没有那么严格啦
1 import numpy as np 2 from collections import Counter 3 4 5 def read(path: str) -> tuple: 6 f = open(path, "r") 7 text = f.readlines() 8 f.close() 9 X, y = [], [] 10 class_map, class_idx = {}, 0 11 class_anti_map = {} 12 for row in text: 13 row = row.strip() 14 if len(row) == 0: 15 continue 16 items = row.split(",") 17 X.append([float(item) for item in items[:-1]]) 18 if items[-1] not in class_map: 19 class_map[items[-1]] = class_idx 20 class_anti_map[class_idx] = items[-1] 21 class_idx += 1 22 y.append(class_map[items[-1]]) 23 return X, y, len(y), len(X[0]), class_map, class_idx, class_anti_map 24 25 26 def parzenWindowAlgorithm(X, y, class_map, class_anti_map, class_size, n, m, hyperparameter): 27 dic = Counter(y) 28 P0 = [(dic[class_map[class_anti_map[i]]] / n) for i in range(class_size)] 29 P1 = [] 30 hd = np.power(hyperparameter, m) 31 for x in X: 32 p = [0 for _ in range(class_size)] 33 for i in range(n): 34 dis = np.linalg.norm((np.array(x) - np.array(X[i]) / hyperparameter)) 35 fai = gaussianKernel(dis) 36 p[y[i]] += fai / hd 37 P1.append(p) 38 predict_y = [] 39 for i in range(n): 40 p = [] 41 for k in range(class_size): 42 p.append([-P0[k] * P1[i][k], k]) 43 p.sort(key=lambda x: x[0]) 44 predict_y.append(p[0][1]) 45 return predict_y 46 47 48 def gaussianKernel(u): 49 return np.exp(-u * u / 2) / np.sqrt(2 * np.pi) 50 51 52 if __name__ == '__main__': 53 X, y, n, m, class_map, class_size, class_anti_map = read("iris.data") 54 55 predict_y = parzenWindowAlgorithm(X, y, class_map, class_anti_map, class_size, n, m, 1) 56 57 for i in range(n): 58 print("No.", (i + 1), X[i], "y =", y[i], "predict_y =", predict_y[i], (y[i] == predict_y[i])) 59 print("Accuracy =", (len([i for i in range(n) if y[i] == predict_y[i]]) / n))
~~Jason_liu O(∩_∩)O