数据挖掘分类算法--KNN
实验中使用的数据依然是UCI上的Iris,实验中分别有样本数据和测试使用的数据,分别如下:
样本数据是分别算则iris中三类数据各30个:
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
测试数据为剩余的60条数据:
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica
算法代码如下:
1 package neugle.knn; 2 3 import java.io.BufferedReader; 4 import java.io.FileReader; 5 import java.util.ArrayList; 6 import java.util.HashMap; 7 import java.util.LinkedHashMap; 8 import java.util.List; 9 import java.util.Map.Entry; 10 import java.util.Set; 11 12 public class KNN { 13 private List<Iris> irisList = new ArrayList<KNN.Iris>(); 14 15 class Iris { 16 public double Sep_len; 17 public double Sep_wid; 18 public double Pet_len; 19 public double Pet_wid; 20 public String Iris_type; 21 } 22 23 // 读取数据 24 public List<Iris> ReadFile(String filePath) { 25 FileReader fr = null; 26 BufferedReader br = null; 27 List<Iris> irisList = new ArrayList<KNN.Iris>(); 28 try { 29 fr = new FileReader(filePath); 30 br = new BufferedReader(fr); 31 String line = null; 32 while ((line = br.readLine()) != null) { 33 Iris iris = new Iris(); 34 String[] agrs = line.split(","); 35 iris.Sep_len = Double.parseDouble(agrs[0]); 36 iris.Sep_wid = Double.parseDouble(agrs[1]); 37 iris.Pet_len = Double.parseDouble(agrs[2]); 38 iris.Pet_wid = Double.parseDouble(agrs[3]); 39 iris.Iris_type = agrs[4]; 40 irisList.add(iris); 41 } 42 } catch (Exception e) { 43 e.printStackTrace(); 44 } finally { 45 try { 46 br.close(); 47 } catch (Exception e) { 48 e.printStackTrace(); 49 } 50 } 51 return irisList; 52 } 53 54 // 计算测试数据和样本点中每个点的距离 55 public LinkedHashMap<Integer, Double> GetDistance(Iris iris) { 56 LinkedHashMap<Integer, Double> irisMap = new LinkedHashMap<Integer, Double>(); 57 for (int i = 0; i < this.irisList.size(); i++) { 58 double d = this.DistanceCalculate(iris, this.irisList.get(i)); 59 irisMap.put(i, d); 60 } 61 return irisMap; 62 } 63 64 private double DistanceCalculate(Iris iris1, Iris iris2) { 65 double sum = Math.sqrt(Math.pow((iris1.Sep_len - iris2.Sep_len), 2) 66 + Math.pow((iris1.Sep_wid - iris2.Sep_wid), 2) 67 + Math.pow((iris1.Pet_len - iris2.Pet_len), 2) 68 + Math.pow((iris1.Pet_wid - iris2.Pet_wid), 2)); 69 return sum; 70 } 71 72 // 找出前k个数据 73 public List<Iris> FindKData(int k, LinkedHashMap<Integer, Double> irisMap) { 74 List<Integer> iList = new ArrayList<Integer>(); 75 List<Iris> rList = new ArrayList<KNN.Iris>(); 76 Set<Entry<Integer, Double>> set = irisMap.entrySet(); 77 for (int i = 0; i < k; i++) { 78 int key = 0; 79 double value = 0; 80 boolean flag = true; 81 for (Entry<Integer, Double> e : set) { 82 if (flag == true) { 83 key = e.getKey(); 84 value = e.getValue(); 85 flag = false; 86 continue; 87 } 88 if (e.getValue() < value) { 89 key = e.getKey(); 90 value = e.getValue(); 91 } 92 } 93 iList.add(key); 94 irisMap.remove(key); 95 } 96 97 for (int i = 0; i < iList.size(); i++) { 98 rList.add(this.irisList.get(iList.get(i))); 99 } 100 return rList; 101 } 102 103 // 找出该测试数据应属于哪一类 104 public String FindClass(List<Iris> iList) { 105 HashMap<String, Integer> map = new HashMap<String, Integer>(); 106 for (int i = 0; i < iList.size(); i++) { 107 String s = iList.get(i).Iris_type; 108 if (map.containsKey(s)) { 109 map.put(s, map.get(s) + 1); 110 } else { 111 map.put(s, 1); 112 } 113 } 114 115 String key = null; 116 int value = 0; 117 for (Entry<String, Integer> e : map.entrySet()) { 118 if (e.getValue() > value) { 119 value = e.getValue(); 120 key = e.getKey(); 121 } 122 } 123 return key; 124 } 125 126 // 操控方法 127 public void Calc(String filePath1, String filePath2, int k) { 128 this.irisList = this.ReadFile(filePath1); 129 List<Iris> fList = this.ReadFile(filePath2); 130 System.out.println("测试数据展示:"); 131 System.out.println("-----------------------"); 132 for (int i = 0; i < fList.size(); i++) { 133 Iris iris = fList.get(i); 134 System.out.println(iris.Pet_len + " " + iris.Pet_wid + " " 135 + iris.Sep_len + " " + iris.Sep_wid + " " + iris.Iris_type); 136 } 137 System.out.println("-----------------------"); 138 System.out.println("测试结果为:"); 139 System.out.println("-----------------------"); 140 for (int i = 0; i < fList.size(); i++) { 141 Iris iris = fList.get(i); 142 LinkedHashMap<Integer, Double> dMap = this.GetDistance(iris); 143 List<Iris> iList = this.FindKData(k, dMap); 144 String type = this.FindClass(iList); 145 System.out.println(iris.Pet_len + " " + iris.Pet_wid + " " 146 + iris.Sep_len + " " + iris.Sep_wid + " " + type); 147 } 148 System.out.println("-----------------------"); 149 } 150 151 public static void main(String[] args) { 152 KNN knn = new KNN(); 153 String filePath1 = "D:\\data\\KNN\\iris.data";// 样本数据位置 154 String filePath2 = "D:\\data\\KNN\\firis.data";// 测试数据位置 155 int k = 3; 156 knn.Calc(filePath1, filePath2, k); 157 } 158 }
实验结果如下:
1.6 0.2 4.8 3.1 Iris-setosa
1.5 0.4 5.4 3.4 Iris-setosa
1.5 0.1 5.2 4.1 Iris-setosa
1.4 0.2 5.5 4.2 Iris-setosa
1.5 0.1 4.9 3.1 Iris-setosa
1.2 0.2 5.0 3.2 Iris-setosa
1.3 0.2 5.5 3.5 Iris-setosa
1.5 0.1 4.9 3.1 Iris-setosa
1.3 0.2 4.4 3.0 Iris-setosa
1.5 0.2 5.1 3.4 Iris-setosa
1.3 0.3 5.0 3.5 Iris-setosa
1.3 0.3 4.5 2.3 Iris-setosa
1.3 0.2 4.4 3.2 Iris-setosa
1.6 0.6 5.0 3.5 Iris-setosa
1.9 0.4 5.1 3.8 Iris-setosa
1.4 0.3 4.8 3.0 Iris-setosa
1.6 0.2 5.1 3.8 Iris-setosa
1.4 0.2 4.6 3.2 Iris-setosa
1.5 0.2 5.3 3.7 Iris-setosa
1.4 0.2 5.0 3.3 Iris-setosa
3.8 1.1 5.5 2.4 Iris-versicolor
3.7 1.0 5.5 2.4 Iris-versicolor
3.9 1.2 5.8 2.7 Iris-versicolor
5.1 1.6 6.0 2.7 Iris-virginica
4.5 1.5 5.4 3.0 Iris-versicolor
4.5 1.6 6.0 3.4 Iris-versicolor
4.7 1.5 6.7 3.1 Iris-versicolor
4.4 1.3 6.3 2.3 Iris-versicolor
4.1 1.3 5.6 3.0 Iris-versicolor
4.0 1.3 5.5 2.5 Iris-versicolor
4.4 1.2 5.5 2.6 Iris-versicolor
4.6 1.4 6.1 3.0 Iris-versicolor
4.0 1.2 5.8 2.6 Iris-versicolor
3.3 1.0 5.0 2.3 Iris-versicolor
4.2 1.3 5.6 2.7 Iris-versicolor
4.2 1.2 5.7 3.0 Iris-versicolor
4.2 1.3 5.7 2.9 Iris-versicolor
4.3 1.3 6.2 2.9 Iris-versicolor
3.0 1.1 5.1 2.5 Iris-versicolor
4.1 1.3 5.7 2.8 Iris-versicolor
6.1 1.9 7.4 2.8 Iris-virginica
6.4 2.0 7.9 3.8 Iris-virginica
5.6 2.2 6.4 2.8 Iris-virginica
5.1 1.5 6.3 2.8 Iris-virginica
5.6 1.4 6.1 2.6 Iris-virginica
6.1 2.3 7.7 3.0 Iris-virginica
5.6 2.4 6.3 3.4 Iris-virginica
5.5 1.8 6.4 3.1 Iris-virginica
4.8 1.8 6.0 3.0 Iris-virginica
5.4 2.1 6.9 3.1 Iris-virginica
5.6 2.4 6.7 3.1 Iris-virginica
5.1 2.3 6.9 3.1 Iris-virginica
5.1 1.9 5.8 2.7 Iris-virginica
5.9 2.3 6.8 3.2 Iris-virginica
5.7 2.5 6.7 3.3 Iris-virginica
5.2 2.3 6.7 3.0 Iris-virginica
5.0 1.9 6.3 2.5 Iris-virginica
5.2 2.0 6.5 3.0 Iris-virginica
5.4 2.3 6.2 3.4 Iris-virginica
5.1 1.8 5.9 3.0 Iris-virginica