数据挖掘分类算法--KNN

实验中使用的数据依然是UCI上的Iris,实验中分别有样本数据和测试使用的数据,分别如下:

样本数据是分别算则iris中三类数据各30个:

5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
View Code

测试数据为剩余的60条数据:

4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica
View Code

算法代码如下:

  1 package neugle.knn;
  2 
  3 import java.io.BufferedReader;
  4 import java.io.FileReader;
  5 import java.util.ArrayList;
  6 import java.util.HashMap;
  7 import java.util.LinkedHashMap;
  8 import java.util.List;
  9 import java.util.Map.Entry;
 10 import java.util.Set;
 11 
 12 public class KNN {
 13     private List<Iris> irisList = new ArrayList<KNN.Iris>();
 14 
 15     class Iris {
 16         public double Sep_len;
 17         public double Sep_wid;
 18         public double Pet_len;
 19         public double Pet_wid;
 20         public String Iris_type;
 21     }
 22 
 23     // 读取数据
 24     public List<Iris> ReadFile(String filePath) {
 25         FileReader fr = null;
 26         BufferedReader br = null;
 27         List<Iris> irisList = new ArrayList<KNN.Iris>();
 28         try {
 29             fr = new FileReader(filePath);
 30             br = new BufferedReader(fr);
 31             String line = null;
 32             while ((line = br.readLine()) != null) {
 33                 Iris iris = new Iris();
 34                 String[] agrs = line.split(",");
 35                 iris.Sep_len = Double.parseDouble(agrs[0]);
 36                 iris.Sep_wid = Double.parseDouble(agrs[1]);
 37                 iris.Pet_len = Double.parseDouble(agrs[2]);
 38                 iris.Pet_wid = Double.parseDouble(agrs[3]);
 39                 iris.Iris_type = agrs[4];
 40                 irisList.add(iris);
 41             }
 42         } catch (Exception e) {
 43             e.printStackTrace();
 44         } finally {
 45             try {
 46                 br.close();
 47             } catch (Exception e) {
 48                 e.printStackTrace();
 49             }
 50         }
 51         return irisList;
 52     }
 53 
 54     // 计算测试数据和样本点中每个点的距离
 55     public LinkedHashMap<Integer, Double> GetDistance(Iris iris) {
 56         LinkedHashMap<Integer, Double> irisMap = new LinkedHashMap<Integer, Double>();
 57         for (int i = 0; i < this.irisList.size(); i++) {
 58             double d = this.DistanceCalculate(iris, this.irisList.get(i));
 59             irisMap.put(i, d);
 60         }
 61         return irisMap;
 62     }
 63 
 64     private double DistanceCalculate(Iris iris1, Iris iris2) {
 65         double sum = Math.sqrt(Math.pow((iris1.Sep_len - iris2.Sep_len), 2)
 66                 + Math.pow((iris1.Sep_wid - iris2.Sep_wid), 2)
 67                 + Math.pow((iris1.Pet_len - iris2.Pet_len), 2)
 68                 + Math.pow((iris1.Pet_wid - iris2.Pet_wid), 2));
 69         return sum;
 70     }
 71 
 72     // 找出前k个数据
 73     public List<Iris> FindKData(int k, LinkedHashMap<Integer, Double> irisMap) {
 74         List<Integer> iList = new ArrayList<Integer>();
 75         List<Iris> rList = new ArrayList<KNN.Iris>();
 76         Set<Entry<Integer, Double>> set = irisMap.entrySet();
 77         for (int i = 0; i < k; i++) {
 78             int key = 0;
 79             double value = 0;
 80             boolean flag = true;
 81             for (Entry<Integer, Double> e : set) {
 82                 if (flag == true) {
 83                     key = e.getKey();
 84                     value = e.getValue();
 85                     flag = false;
 86                     continue;
 87                 }
 88                 if (e.getValue() < value) {
 89                     key = e.getKey();
 90                     value = e.getValue();
 91                 }
 92             }
 93             iList.add(key);
 94             irisMap.remove(key);
 95         }
 96 
 97         for (int i = 0; i < iList.size(); i++) {
 98             rList.add(this.irisList.get(iList.get(i)));
 99         }
100         return rList;
101     }
102 
103     // 找出该测试数据应属于哪一类
104     public String FindClass(List<Iris> iList) {
105         HashMap<String, Integer> map = new HashMap<String, Integer>();
106         for (int i = 0; i < iList.size(); i++) {
107             String s = iList.get(i).Iris_type;
108             if (map.containsKey(s)) {
109                 map.put(s, map.get(s) + 1);
110             } else {
111                 map.put(s, 1);
112             }
113         }
114 
115         String key = null;
116         int value = 0;
117         for (Entry<String, Integer> e : map.entrySet()) {
118             if (e.getValue() > value) {
119                 value = e.getValue();
120                 key = e.getKey();
121             }
122         }
123         return key;
124     }
125 
126     // 操控方法
127     public void Calc(String filePath1, String filePath2, int k) {
128         this.irisList = this.ReadFile(filePath1);
129         List<Iris> fList = this.ReadFile(filePath2);
130         System.out.println("测试数据展示:");
131         System.out.println("-----------------------");
132         for (int i = 0; i < fList.size(); i++) {
133             Iris iris = fList.get(i);
134             System.out.println(iris.Pet_len + " " + iris.Pet_wid + " "
135                     + iris.Sep_len + " " + iris.Sep_wid + " " + iris.Iris_type);
136         }
137         System.out.println("-----------------------");
138         System.out.println("测试结果为:");
139         System.out.println("-----------------------");
140         for (int i = 0; i < fList.size(); i++) {
141             Iris iris = fList.get(i);
142             LinkedHashMap<Integer, Double> dMap = this.GetDistance(iris);
143             List<Iris> iList = this.FindKData(k, dMap);
144             String type = this.FindClass(iList);
145             System.out.println(iris.Pet_len + " " + iris.Pet_wid + " "
146                     + iris.Sep_len + " " + iris.Sep_wid + " " + type);
147         }
148         System.out.println("-----------------------");
149     }
150 
151     public static void main(String[] args) {
152         KNN knn = new KNN();
153         String filePath1 = "D:\\data\\KNN\\iris.data";// 样本数据位置
154         String filePath2 = "D:\\data\\KNN\\firis.data";// 测试数据位置
155         int k = 3;
156         knn.Calc(filePath1, filePath2, k);
157     }
158 }

实验结果如下:

1.6 0.2 4.8 3.1 Iris-setosa
1.5 0.4 5.4 3.4 Iris-setosa
1.5 0.1 5.2 4.1 Iris-setosa
1.4 0.2 5.5 4.2 Iris-setosa
1.5 0.1 4.9 3.1 Iris-setosa
1.2 0.2 5.0 3.2 Iris-setosa
1.3 0.2 5.5 3.5 Iris-setosa
1.5 0.1 4.9 3.1 Iris-setosa
1.3 0.2 4.4 3.0 Iris-setosa
1.5 0.2 5.1 3.4 Iris-setosa
1.3 0.3 5.0 3.5 Iris-setosa
1.3 0.3 4.5 2.3 Iris-setosa
1.3 0.2 4.4 3.2 Iris-setosa
1.6 0.6 5.0 3.5 Iris-setosa
1.9 0.4 5.1 3.8 Iris-setosa
1.4 0.3 4.8 3.0 Iris-setosa
1.6 0.2 5.1 3.8 Iris-setosa
1.4 0.2 4.6 3.2 Iris-setosa
1.5 0.2 5.3 3.7 Iris-setosa
1.4 0.2 5.0 3.3 Iris-setosa
3.8 1.1 5.5 2.4 Iris-versicolor
3.7 1.0 5.5 2.4 Iris-versicolor
3.9 1.2 5.8 2.7 Iris-versicolor
5.1 1.6 6.0 2.7 Iris-virginica
4.5 1.5 5.4 3.0 Iris-versicolor
4.5 1.6 6.0 3.4 Iris-versicolor
4.7 1.5 6.7 3.1 Iris-versicolor
4.4 1.3 6.3 2.3 Iris-versicolor
4.1 1.3 5.6 3.0 Iris-versicolor
4.0 1.3 5.5 2.5 Iris-versicolor
4.4 1.2 5.5 2.6 Iris-versicolor
4.6 1.4 6.1 3.0 Iris-versicolor
4.0 1.2 5.8 2.6 Iris-versicolor
3.3 1.0 5.0 2.3 Iris-versicolor
4.2 1.3 5.6 2.7 Iris-versicolor
4.2 1.2 5.7 3.0 Iris-versicolor
4.2 1.3 5.7 2.9 Iris-versicolor
4.3 1.3 6.2 2.9 Iris-versicolor
3.0 1.1 5.1 2.5 Iris-versicolor
4.1 1.3 5.7 2.8 Iris-versicolor
6.1 1.9 7.4 2.8 Iris-virginica
6.4 2.0 7.9 3.8 Iris-virginica
5.6 2.2 6.4 2.8 Iris-virginica
5.1 1.5 6.3 2.8 Iris-virginica
5.6 1.4 6.1 2.6 Iris-virginica
6.1 2.3 7.7 3.0 Iris-virginica
5.6 2.4 6.3 3.4 Iris-virginica
5.5 1.8 6.4 3.1 Iris-virginica
4.8 1.8 6.0 3.0 Iris-virginica
5.4 2.1 6.9 3.1 Iris-virginica
5.6 2.4 6.7 3.1 Iris-virginica
5.1 2.3 6.9 3.1 Iris-virginica
5.1 1.9 5.8 2.7 Iris-virginica
5.9 2.3 6.8 3.2 Iris-virginica
5.7 2.5 6.7 3.3 Iris-virginica
5.2 2.3 6.7 3.0 Iris-virginica
5.0 1.9 6.3 2.5 Iris-virginica
5.2 2.0 6.5 3.0 Iris-virginica
5.4 2.3 6.2 3.4 Iris-virginica
5.1 1.8 5.9 3.0 Iris-virginica
View Code
posted @ 2015-07-16 13:21  iYou  阅读(494)  评论(0编辑  收藏  举报