C#实现K-MEDOIDS聚类算法
1、任意选取K个对象作为初始聚类中心(O1,O2,…Oi…Ok)。
2)将余下的对象分到各个类中去(该对象与哪一个聚类中心最近就被分配到哪一个聚类簇中);
3)对于每个类(Oi)中,顺序选取一个Or,重复步骤2,计算用Or代替Oi后的误差E=各个点到其对应的中心点欧式距离之和。选择E最小的那个Or来代替Oi。
4)重复步骤3,直到K个medoids固定下来。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 | using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; namespace K_medoids { class Program { /// <summary> /// 程序文本数据文件应位于同一文件夹下 /// </summary> /// <param name="args"></param> static void Main( string [] args) { var path = string .Empty; int k = 0; try { path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, args[0]); //数据文件路径 k = Convert.ToInt32(args[1]); } catch (Exception) { Console.Write( "参数错误" ); return ; } var reader= new StreamReader(path,Encoding.Default); var indivadulStr =reader.ReadLine().Trim(); //每一行数据的字符串形式 var stanStr= System.Text.RegularExpressions.Regex.Replace(indivadulStr, @" +" , " " ); var firstData=stanStr.Split( ' ' ); //第一行数据 var realIndivadul = new Indivaduls(); //数据结构体 var db = new List<Indivaduls>(); //存放所有数据 foreach ( var s in firstData) { realIndivadul.Numbers.Add(Convert.ToDouble(s)); } db.Add(realIndivadul); while ((indivadulStr=reader.ReadLine())!= null ) { var stringNumber = indivadulStr.Trim().Split( ' ' ); var doubleNumber = stringNumber.Select(Convert.ToDouble).ToList(); db.Add( new Indivaduls(){Numbers =doubleNumber}); } var initialCenters = new List<Indivaduls>(); //聚类初始中心个体 var gap = db.Count/k + 1; for ( int i = 0; i < k; i++) { initialCenters.Add(db[i*gap]); } var result=Pam(db, initialCenters); foreach ( var crow in result) { foreach ( var number in crow.CenterPoint.Numbers) { Console.Write(number+ "\0" ); } Console.Write( "\r\n" ); foreach ( var point in crow.CrowsPoint) { foreach ( var number in point.Numbers) { Console.Write(number+ "\0" ); } Console.Write( "\r\n" ); } Console.WriteLine( "========================" ); } Console.ReadKey(); } /// <summary> /// /// </summary> /// <param name="indivadulses"></param> /// <param name="centerPoints"></param> /// <returns>聚类结果 簇</returns> public static List<Crows> Pam(List<Indivaduls> indivadulses,List<Indivaduls> centerPoints ) { var firstCrows = K_medoids(indivadulses, centerPoints); var resultCenterPoints = new List<Indivaduls>(); //存放结果中心点 for ( int i = 0; i < firstCrows.Count; i++) //循环每一个簇 { resultCenterPoints.Add(firstCrows[i].CenterPoint); var oldOtherCrows = new List<Crows>(); oldOtherCrows.AddRange(firstCrows); oldOtherCrows.RemoveAt(i); var oldDiff = AbsoluteDiff(firstCrows[i], oldOtherCrows); var count=firstCrows[i].CrowsPoint.Count; for ( int j = 0; j < count; j++) //循环每一个簇中的非中心点个体 { //var otherCrowsss = new List<Crows>();//除去要替换的中心点所在的簇 其他簇 //otherCrowsss.AddRange(firstCrows); //otherCrowsss.RemoveAt(i); var newCenterPoints = new List<Indivaduls>(); //新的中心点集合 newCenterPoints.AddRange(centerPoints); newCenterPoints.RemoveAt(i); newCenterPoints.Add(firstCrows[i].CrowsPoint[j]); var newOtherCrowsCenterPoints = new List<Indivaduls>(); //新的除变化点所在簇的中心 的其他中心点 newOtherCrowsCenterPoints.AddRange(centerPoints); newOtherCrowsCenterPoints.RemoveAt(i); var newCrows = K_medoids(indivadulses,newCenterPoints); //替换点后 新的聚类簇 var newOtherCrows = new List<Crows>(); var newCrow = new Crows(); foreach ( var crow in newCrows) { if (newOtherCrowsCenterPoints.MyContains(crow.CenterPoint)) { newOtherCrows.Add(crow); } else { newCrow = crow; } } var newDiff = AbsoluteDiff(newCrow,newOtherCrows); if (newDiff < oldDiff) { resultCenterPoints[i] = newCrow.CenterPoint; oldDiff = newDiff; } } } var resultCrows= K_medoids(indivadulses, resultCenterPoints); return resultCrows; } /// <summary> /// 单次聚类 /// </summary> /// <param name="indivadulses">待聚类个体,包括了中心点</param> /// <param name="centerPoints">中心点个体</param> /// <returns>聚类结果</returns> public static List<Crows> K_medoids(List<Indivaduls> indivadulses,List<Indivaduls> centerPoints) { var resultCrows = new List<Crows>(); //聚类结果 簇集合 var indivadulsCount = indivadulses.Count; //待分配个体的个数,包括了中心点 for ( var i = 0; i < centerPoints.Count; i++) { resultCrows.Add( new Crows() { CenterPoint = centerPoints[i] }); } for ( int i = 0; i < indivadulsCount; i++) { if (!centerPoints.MyContains(indivadulses[i])) { int myNumber = 0; //要将这个点归类到 序号为0的resultCrows中 var firstDic = P2PDistance(indivadulses[i], resultCrows[0].CenterPoint); //该点与第一个中心的距离 for ( int j = 1; j < resultCrows.Count; j++) { var otherDic = P2PDistance(indivadulses[i], resultCrows[j].CenterPoint); if (otherDic < firstDic) { firstDic = otherDic; myNumber = j; } } resultCrows[myNumber].CrowsPoint.Add(indivadulses[i]); } } return resultCrows; } /// <summary> /// 对于已经完成一次聚类后的某一个点center计算绝对误差 /// </summary> /// <param name="centerCrow">要计算绝对误差的中心点群簇</param> /// <param name="otherPoints">除中心点群簇外的其他群簇</param> public static double AbsoluteDiff(Crows centerCrow,List<Crows> otherPoints ) { var countCrows = otherPoints.Count; var distance = Distance(centerCrow); for ( var i = 0; i < countCrows; i++) { distance += Distance(otherPoints[i]); } return distance; } /// <summary> /// 计算群簇中各个点距离中心点的欧式距离 /// </summary> /// <param name="crow">群簇</param> /// <returns>欧式距离</returns> public static double Distance(Crows crow) { var pointCount = crow.CrowsPoint.Count; //非中心点的个数 var distance = 0.0; //总距离 for ( var i = 0; i < pointCount; i++) { distance += P2PDistance(crow.CenterPoint, crow.CrowsPoint[i]); } return distance; } /// <summary> /// 两点间欧式距离 /// </summary> /// <param name="p1">点p1</param> /// <param name="p2">点p2</param> /// <returns></returns> public static double P2PDistance(Indivaduls p1,Indivaduls p2) { if (p1.Numbers.Count != p2.Numbers.Count || p1.Numbers.Count == 0) { throw new Exception(); } var dimension = p1.Numbers.Count; var result = 0.0; for ( var i = 0; i < dimension; i++) { result += (p1.Numbers[i] - p2.Numbers[i])*(p1.Numbers[i] - p2.Numbers[i]); } return Math.Sqrt(result); } } /// <summary> /// 一个点个体 /// </summary> public class Indivaduls { public List< double > Numbers; public Indivaduls() { this .Numbers= new List< double >(); } public bool MyEquals(Indivaduls obj) { if (obj.Numbers.Count != this .Numbers.Count) return false ; for ( int i = 0; i < Numbers.Count; i++) { if ( this .Numbers[i] != obj.Numbers[i]) return false ; } return true ; } } /// <summary> /// 一个聚类簇 /// </summary> public class Crows { public Crows() { this .CrowsPoint= new List<Indivaduls>(); this .CenterPoint= new Indivaduls(); } public List<Indivaduls> CrowsPoint; //簇中除中心点外的其他个体点 public Indivaduls CenterPoint; //聚类簇中心点 } public static class ExpandList { /// <summary> /// 扩展方法、判断该集合中是否存在point个体 /// </summary> /// <param name="indivadulses"></param> /// <param name="point"></param> /// <returns></returns> public static bool MyContains( this List<Indivaduls> indivadulses,Indivaduls point) { foreach ( var indivadulse in indivadulses) { if (point.MyEquals(indivadulse)) return true ; } return false ; } } } |
Do something useful!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 分享 3 个 .NET 开源的文件压缩处理库,助力快速实现文件压缩解压功能!
· Ollama——大语言模型本地部署的极速利器
· DeepSeek如何颠覆传统软件测试?测试工程师会被淘汰吗?