数据挖掘聚类算法--Kmeans
算法采用数据集为iris(鸢尾花)可以在UCI上下载 http://archive.ics.uci.edu/ml/datasets/Iris
数据集介绍:
1. sepal length in cm
2. sepal width in cm
3. petal length in cm
4. petal width in cm
5. class:
-- Iris Setosa
-- Iris Versicolour
-- Iris Virginica
直接上代码:
1 package neugle.kmeans; 2 3 public class IrisModel { 4 public double Sep_len = 0; 5 public double Sep_wid = 0; 6 public double Pet_len = 0; 7 public double Pet_wid = 0; 8 public String Iris_type = ""; 9 10 public boolean equals(Object obj) { 11 IrisModel iris = (IrisModel) obj; 12 return this.Sep_len == iris.Sep_len && this.Sep_wid == iris.Sep_wid 13 && this.Pet_len == iris.Pet_len && this.Pet_wid == iris.Pet_wid; 14 } 15 }
1 package neugle.kmeans; 2 3 import java.io.BufferedReader; 4 import java.io.FileNotFoundException; 5 import java.io.FileReader; 6 import java.io.IOException; 7 import java.util.ArrayList; 8 import java.util.Iterator; 9 10 public class Kmeans { 11 private static int k = 3;// 划分簇数目 12 private static int dataCount = 150;// 文本数量 13 private static int n = 0;// 迭代次数 14 15 public static void main(String[] args) { 16 ArrayList<IrisModel> irisList = ReadFile();// 取得文本中数据 17 ArrayList<IrisModel> beforeRandomPot = new ArrayList<IrisModel>();// 记录上一次质心位置 18 ArrayList<IrisModel> randomPot = RandomPot(irisList);// 获得随机数据 19 ArrayList<ArrayList<IrisModel>> kMeansList = null; 20 while (!CompareRandomPot(beforeRandomPot, randomPot)) { 21 kMeansList = KMeans(irisList, randomPot);// 进行n次聚类 22 n++; 23 } 24 Print(kMeansList); 25 System.out.println("迭代了" + n + "次"); 26 } 27 28 // 读取文件中数据 29 private static ArrayList<IrisModel> ReadFile() { 30 FileReader read = null; 31 BufferedReader br = null; 32 ArrayList<IrisModel> irisList = new ArrayList<IrisModel>(); 33 try { 34 read = new FileReader("D:\\iris.data"); 35 br = new BufferedReader(read); 36 String readLine = null; 37 while ((readLine = br.readLine()) != null) { 38 IrisModel iris = new IrisModel(); 39 String[] agrs = readLine.split(","); 40 iris.Sep_len = Double.parseDouble(agrs[0]); 41 iris.Sep_wid = Double.parseDouble(agrs[1]); 42 iris.Pet_len = Double.parseDouble(agrs[2]); 43 iris.Pet_wid = Double.parseDouble(agrs[3]); 44 iris.Iris_type = agrs[4]; 45 irisList.add(iris); 46 } 47 } catch (FileNotFoundException e) { 48 System.out.println("读取文件异常"); 49 irisList = null; 50 } catch (IOException e) { 51 System.out.println("读取文件异常"); 52 irisList = null; 53 } finally { 54 try { 55 br.close(); 56 } catch (IOException e) { 57 System.out.println("关闭文件异常"); 58 } 59 } 60 return irisList; 61 } 62 63 // 随机生成初始k个点 64 private static ArrayList<IrisModel> RandomPot(ArrayList<IrisModel> irisList) { 65 ArrayList<Integer> initCenter = new ArrayList<Integer>(); 66 ArrayList<IrisModel> randomPot = new ArrayList<IrisModel>(); 67 for (int i = 0; i < k; i++) { 68 int num = (int) (Math.random() * dataCount); 69 if (!initCenter.contains(num)) 70 initCenter.add(num); 71 else 72 i--; 73 } 74 Iterator<Integer> i = initCenter.iterator(); 75 while (i.hasNext()) { 76 randomPot.add(irisList.get(i.next())); 77 } 78 return randomPot; 79 } 80 81 // KMeans主程序 82 private static ArrayList<ArrayList<IrisModel>> KMeans( 83 ArrayList<IrisModel> irisList, ArrayList<IrisModel> randomPot) { 84 ArrayList<ArrayList<IrisModel>> groupNum = new ArrayList<ArrayList<IrisModel>>(); 85 for (int i = 0; i < randomPot.size(); i++) { 86 ArrayList<IrisModel> list = new ArrayList<IrisModel>(); 87 list.add(randomPot.get(i)); 88 groupNum.add(list); 89 } 90 for (int i = 0; i < irisList.size(); i++) { 91 double temp = Double.MAX_VALUE; 92 int flag = -1; 93 for (int j = 0; j < randomPot.size(); j++) { 94 double distance = DistanceOfTwoPoint(irisList.get(i), 95 randomPot.get(j)); 96 if (distance < temp) { 97 temp = distance; 98 flag = j; 99 } 100 } 101 groupNum.get(flag).add(irisList.get(i)); 102 } 103 // 重新计算质心 104 ArrayList<IrisModel> tempList = CalcCenter(groupNum); 105 randomPot.clear(); 106 for (int i = 0; i < tempList.size(); i++) { 107 randomPot.add(tempList.get(i)); 108 } 109 return groupNum; 110 } 111 112 // 计算两点欧氏距离 113 private static double DistanceOfTwoPoint(IrisModel d1, IrisModel d2) { 114 double sum = Math.sqrt(Math.pow((d1.Sep_len - d2.Sep_len), 2) 115 + Math.pow((d1.Sep_wid - d2.Sep_wid), 2) 116 + Math.pow((d1.Pet_len - d2.Pet_len), 2) 117 + Math.pow((d1.Pet_wid - d2.Pet_wid), 2)); 118 return sum; 119 } 120 121 // 重新计算k个簇的质心 122 private static ArrayList<IrisModel> CalcCenter( 123 ArrayList<ArrayList<IrisModel>> c) { 124 ArrayList<IrisModel> cIris = new ArrayList<IrisModel>(); 125 Iterator<ArrayList<IrisModel>> i = c.iterator(); 126 while (i.hasNext()) { 127 ArrayList<IrisModel> irisList = i.next(); 128 IrisModel eIris = new IrisModel(); 129 for (int k = 0; k < irisList.size(); k++) { 130 eIris.Sep_len += irisList.get(k).Sep_len; 131 eIris.Sep_wid += irisList.get(k).Sep_wid; 132 eIris.Pet_len += irisList.get(k).Pet_len; 133 eIris.Pet_wid += irisList.get(k).Pet_wid; 134 } 135 eIris.Sep_len = eIris.Sep_len / irisList.size(); 136 eIris.Sep_wid = eIris.Sep_wid / irisList.size(); 137 eIris.Pet_len = eIris.Pet_len / irisList.size(); 138 eIris.Pet_wid = eIris.Pet_wid / irisList.size(); 139 cIris.add(eIris); 140 } 141 142 return cIris; 143 } 144 145 // 比较前后两次的质心,以确定是否结束 146 private static Boolean CompareRandomPot( 147 ArrayList<IrisModel> beforeRandomPot, ArrayList<IrisModel> randomPot) { 148 boolean flag = true; 149 for (int i = 0; i < randomPot.size(); i++) { 150 if (beforeRandomPot.size() <= 0 151 || !beforeRandomPot.contains(randomPot.get(i))) { 152 flag = false; 153 break; 154 } 155 } 156 if (flag == false) { 157 if (beforeRandomPot.size() > 0) { 158 beforeRandomPot.clear(); 159 } 160 for (int i = 0; i < randomPot.size(); i++) { 161 beforeRandomPot.add(randomPot.get(i)); 162 } 163 } 164 return flag; 165 } 166 167 // 打印 168 private static void Print(ArrayList<ArrayList<IrisModel>> kmeansList) { 169 System.out.println("------------------------------------"); 170 Iterator<ArrayList<IrisModel>> i = kmeansList.iterator(); 171 while (i.hasNext()) { 172 Iterator<IrisModel> ii = i.next().iterator(); 173 int n = 0; 174 while (ii.hasNext()) { 175 n++; 176 IrisModel irisModel = ii.next(); 177 if (n == 1) 178 continue; 179 System.out.println(irisModel.Sep_len + " " + irisModel.Sep_wid 180 + " " + irisModel.Pet_len + " " + irisModel.Pet_wid 181 + " " + irisModel.Iris_type); 182 } 183 System.out.println(n - 1); 184 System.out.println("------------------------------------"); 185 } 186 } 187 }
实验结果:
------------------------------------
7.0 3.2 4.7 1.4 Iris-versicolor
6.4 3.2 4.5 1.5 Iris-versicolor
5.5 2.3 4.0 1.3 Iris-versicolor
6.5 2.8 4.6 1.5 Iris-versicolor
5.7 2.8 4.5 1.3 Iris-versicolor
6.3 3.3 4.7 1.6 Iris-versicolor
4.9 2.4 3.3 1.0 Iris-versicolor
6.6 2.9 4.6 1.3 Iris-versicolor
5.2 2.7 3.9 1.4 Iris-versicolor
5.0 2.0 3.5 1.0 Iris-versicolor
5.9 3.0 4.2 1.5 Iris-versicolor
6.0 2.2 4.0 1.0 Iris-versicolor
6.1 2.9 4.7 1.4 Iris-versicolor
5.6 2.9 3.6 1.3 Iris-versicolor
6.7 3.1 4.4 1.4 Iris-versicolor
5.6 3.0 4.5 1.5 Iris-versicolor
5.8 2.7 4.1 1.0 Iris-versicolor
6.2 2.2 4.5 1.5 Iris-versicolor
5.6 2.5 3.9 1.1 Iris-versicolor
5.9 3.2 4.8 1.8 Iris-versicolor
6.1 2.8 4.0 1.3 Iris-versicolor
6.3 2.5 4.9 1.5 Iris-versicolor
6.1 2.8 4.7 1.2 Iris-versicolor
6.4 2.9 4.3 1.3 Iris-versicolor
6.6 3.0 4.4 1.4 Iris-versicolor
6.8 2.8 4.8 1.4 Iris-versicolor
6.0 2.9 4.5 1.5 Iris-versicolor
5.7 2.6 3.5 1.0 Iris-versicolor
5.5 2.4 3.8 1.1 Iris-versicolor
5.5 2.4 3.7 1.0 Iris-versicolor
5.8 2.7 3.9 1.2 Iris-versicolor
6.0 2.7 5.1 1.6 Iris-versicolor
5.4 3.0 4.5 1.5 Iris-versicolor
6.0 3.4 4.5 1.6 Iris-versicolor
6.7 3.1 4.7 1.5 Iris-versicolor
6.3 2.3 4.4 1.3 Iris-versicolor
5.6 3.0 4.1 1.3 Iris-versicolor
5.5 2.5 4.0 1.3 Iris-versicolor
5.5 2.6 4.4 1.2 Iris-versicolor
6.1 3.0 4.6 1.4 Iris-versicolor
5.8 2.6 4.0 1.2 Iris-versicolor
5.0 2.3 3.3 1.0 Iris-versicolor
5.6 2.7 4.2 1.3 Iris-versicolor
5.7 3.0 4.2 1.2 Iris-versicolor
5.7 2.9 4.2 1.3 Iris-versicolor
6.2 2.9 4.3 1.3 Iris-versicolor
5.1 2.5 3.0 1.1 Iris-versicolor
5.7 2.8 4.1 1.3 Iris-versicolor
5.8 2.7 5.1 1.9 Iris-virginica
4.9 2.5 4.5 1.7 Iris-virginica
5.7 2.5 5.0 2.0 Iris-virginica
5.8 2.8 5.1 2.4 Iris-virginica
6.0 2.2 5.0 1.5 Iris-virginica
5.6 2.8 4.9 2.0 Iris-virginica
6.3 2.7 4.9 1.8 Iris-virginica
6.2 2.8 4.8 1.8 Iris-virginica
6.1 3.0 4.9 1.8 Iris-virginica
6.3 2.8 5.1 1.5 Iris-virginica
6.0 3.0 4.8 1.8 Iris-virginica
5.8 2.7 5.1 1.9 Iris-virginica
6.3 2.5 5.0 1.9 Iris-virginica
5.9 3.0 5.1 1.8 Iris-virginica
62
------------------------------------
5.1 3.5 1.4 0.2 Iris-setosa
4.9 3.0 1.4 0.2 Iris-setosa
4.7 3.2 1.3 0.2 Iris-setosa
4.6 3.1 1.5 0.2 Iris-setosa
5.0 3.6 1.4 0.2 Iris-setosa
5.4 3.9 1.7 0.4 Iris-setosa
4.6 3.4 1.4 0.3 Iris-setosa
5.0 3.4 1.5 0.2 Iris-setosa
4.4 2.9 1.4 0.2 Iris-setosa
4.9 3.1 1.5 0.1 Iris-setosa
5.4 3.7 1.5 0.2 Iris-setosa
4.8 3.4 1.6 0.2 Iris-setosa
4.8 3.0 1.4 0.1 Iris-setosa
4.3 3.0 1.1 0.1 Iris-setosa
5.8 4.0 1.2 0.2 Iris-setosa
5.7 4.4 1.5 0.4 Iris-setosa
5.4 3.9 1.3 0.4 Iris-setosa
5.1 3.5 1.4 0.3 Iris-setosa
5.7 3.8 1.7 0.3 Iris-setosa
5.1 3.8 1.5 0.3 Iris-setosa
5.4 3.4 1.7 0.2 Iris-setosa
5.1 3.7 1.5 0.4 Iris-setosa
4.6 3.6 1.0 0.2 Iris-setosa
5.1 3.3 1.7 0.5 Iris-setosa
4.8 3.4 1.9 0.2 Iris-setosa
5.0 3.0 1.6 0.2 Iris-setosa
5.0 3.4 1.6 0.4 Iris-setosa
5.2 3.5 1.5 0.2 Iris-setosa
5.2 3.4 1.4 0.2 Iris-setosa
4.7 3.2 1.6 0.2 Iris-setosa
4.8 3.1 1.6 0.2 Iris-setosa
5.4 3.4 1.5 0.4 Iris-setosa
5.2 4.1 1.5 0.1 Iris-setosa
5.5 4.2 1.4 0.2 Iris-setosa
4.9 3.1 1.5 0.1 Iris-setosa
5.0 3.2 1.2 0.2 Iris-setosa
5.5 3.5 1.3 0.2 Iris-setosa
4.9 3.1 1.5 0.1 Iris-setosa
4.4 3.0 1.3 0.2 Iris-setosa
5.1 3.4 1.5 0.2 Iris-setosa
5.0 3.5 1.3 0.3 Iris-setosa
4.5 2.3 1.3 0.3 Iris-setosa
4.4 3.2 1.3 0.2 Iris-setosa
5.0 3.5 1.6 0.6 Iris-setosa
5.1 3.8 1.9 0.4 Iris-setosa
4.8 3.0 1.4 0.3 Iris-setosa
5.1 3.8 1.6 0.2 Iris-setosa
4.6 3.2 1.4 0.2 Iris-setosa
5.3 3.7 1.5 0.2 Iris-setosa
5.0 3.3 1.4 0.2 Iris-setosa
50
------------------------------------
6.9 3.1 4.9 1.5 Iris-versicolor
6.7 3.0 5.0 1.7 Iris-versicolor
6.3 3.3 6.0 2.5 Iris-virginica
7.1 3.0 5.9 2.1 Iris-virginica
6.3 2.9 5.6 1.8 Iris-virginica
6.5 3.0 5.8 2.2 Iris-virginica
7.6 3.0 6.6 2.1 Iris-virginica
7.3 2.9 6.3 1.8 Iris-virginica
6.7 2.5 5.8 1.8 Iris-virginica
7.2 3.6 6.1 2.5 Iris-virginica
6.5 3.2 5.1 2.0 Iris-virginica
6.4 2.7 5.3 1.9 Iris-virginica
6.8 3.0 5.5 2.1 Iris-virginica
6.4 3.2 5.3 2.3 Iris-virginica
6.5 3.0 5.5 1.8 Iris-virginica
7.7 3.8 6.7 2.2 Iris-virginica
7.7 2.6 6.9 2.3 Iris-virginica
6.9 3.2 5.7 2.3 Iris-virginica
7.7 2.8 6.7 2.0 Iris-virginica
6.7 3.3 5.7 2.1 Iris-virginica
7.2 3.2 6.0 1.8 Iris-virginica
6.4 2.8 5.6 2.1 Iris-virginica
7.2 3.0 5.8 1.6 Iris-virginica
7.4 2.8 6.1 1.9 Iris-virginica
7.9 3.8 6.4 2.0 Iris-virginica
6.4 2.8 5.6 2.2 Iris-virginica
6.1 2.6 5.6 1.4 Iris-virginica
7.7 3.0 6.1 2.3 Iris-virginica
6.3 3.4 5.6 2.4 Iris-virginica
6.4 3.1 5.5 1.8 Iris-virginica
6.9 3.1 5.4 2.1 Iris-virginica
6.7 3.1 5.6 2.4 Iris-virginica
6.9 3.1 5.1 2.3 Iris-virginica
6.8 3.2 5.9 2.3 Iris-virginica
6.7 3.3 5.7 2.5 Iris-virginica
6.7 3.0 5.2 2.3 Iris-virginica
6.5 3.0 5.2 2.0 Iris-virginica
6.2 3.4 5.4 2.3 Iris-virginica
38
------------------------------------
迭代了16次