k_means算法

C语言 k_means代码

#include <iostream>
using namespace std;

// 定义点的结构体
struct point {
    double x;  // 点的x坐标
    double y;  // 点的y坐标
    int centroid;  // 点所属的质心
};

// 定义计算两点之间距离的函数
double dist(struct point a, struct point b) {
    return sqrt(pow(a.x - b.x, 2) + pow(a.y - b.y, 2));
}

/// <summary>
/// 定义kmeans算法函数
/// </summary>
/// <param name="dataset">需要分类的数据点</param>
/// <param name="N">定义数据点的数量</param>
/// <param name="K">定义质心的数量</param>
/// <param name="centroids">k个质心坐标</param>
/// <param name="iter">迭代次数</param>
void kmeans(point* dataset, int N, int K, point* centroids, int* iter) {
    // 初始化质心为数据集中的前k个元素
    for (int i = 0; i < K; i++) {
        centroids[i] = dataset[i];
    }

    // 重复直到收敛
    while (1) {
        int changed = 0;  // 标记是否有点的质心发生改变

        // 将每个点分配给最近的质心
        for (int i = 0; i < N; i++) {
            int closest = 0;  // 最近的质心
            double closest_dist = dist(dataset[i], centroids[0]);  // 最近的距离
            for (int j = 1; j < K; j++) {
                double dist_j = dist(dataset[i], centroids[j]);  // 计算到质心j的距离
                if (dist_j < closest_dist) {  // 如果到质心j的距离小于当前最近的距离
                    closest = j;  // 更新最近的质心
                    closest_dist = dist_j;  // 更新最近的距离
                }
            }
            if (dataset[i].centroid != closest) {  // 如果点的质心发生改变
                dataset[i].centroid = closest;  // 更新点的质心
                changed = 1;  // 标记有点的质心发生改变
            }
        }

        // 更新质心
        for (int i = 0; i < K; i++) {
            double sum_x = 0, sum_y = 0;  // 质心的x坐标和y坐标的和
            int count = 0;  // 属于该质心的点的数量
            for (int j = 0; j < N; j++) {
                if (dataset[j].centroid == i) {  // 如果点属于该质心
                    sum_x += dataset[j].x;  // 累加x坐标
                    sum_y += dataset[j].y;  // 累加y坐标
                    count++;  // 累加点的数量
                }
            }
            centroids[i].x = sum_x / count;  // 更新质心的x坐标
            centroids[i].y = sum_y / count;  // 更新质心的y坐标
        }

        if (!changed) {  // 如果没有点的质心发生改变
            break;  // 结束循环
        }
        (*iter)++;//迭代次数加一
    }
}
int main()
{
    const int N = 4;  // 定义数据集中的点的数量
    const int K = 2;  //定义质心的数量

    // 定义数据集
    struct point dataset[N] = {
        {2.0, 3.0, -1},
        {7.0, 8.0, -1},
        {9.0, 10.0, -1},
        {4.0, 5.0, -1},
    };

    // 定义质心
    struct point centroids[K];
    //定义迭代次数
    int iter = 0;
    kmeans(dataset, N, K, centroids, &iter);  // 执行kmeans算法
    //打印测试结果
    printf("迭代次数：%d\n", iter);
    for (int i = 0; i < K; i++) {
        printf("质心 %d 坐标 (%0.2f,%0.2f)\n", i, centroids[i].x, centroids[i].y);
        printf("属于该质心的点：\n");
        for (int j = 0; j < N; j++) {
            if (dataset[j].centroid == i)
                printf("(%0.1f,%0.1f)", dataset[j].x, dataset[j].y);
        }
        printf("\n\n");
    }
    return 0;
}

View Code

运行结果如下

C++ k_means代码

#include <iostream>
#include <vector>
#include <cmath>
#include <limits> // 添加了对limits库的引用，以使用std::numeric_limits

// 定义一个结构体，用于存储数据点的信息
struct DataPoint {
    double x; // 数据点的x坐标
    double y; // 数据点的y坐标
    int cluster; // 数据点所属的簇编号
};

// 计算两个数据点之间的欧氏距离
double calculateDistance(DataPoint point1, DataPoint point2) {
    // 使用sqrt和pow函数计算距离
    double distance = std::sqrt(std::pow(point1.x - point2.x, 2) + std::pow(point1.y - point2.y, 2));
    return distance;
}

int main() {
    // 初始化数据点集合
    std::vector<DataPoint> dataPoints = {
        {2.0, 3.0, -1}, // 初始时，所有数据点的簇编号设置为-1，表示未分类
        {7.0, 8.0, -1},
        {9.0, 10.0, -1},
        {4.0, 5.0, -1},
    };

    // 初始化质心集合
    std::vector<DataPoint> centroids = {
        {2.0, 3.0, 0}, // 每个质心也是一个数据点，但是有一个特定的簇编号
        {7.0, 8.0, 1},
    };

    int iteration = 0; // 迭代次数
    bool converged = false; // 收敛标志

    while (true) {
        for (auto& dataPoint : dataPoints) { // 遍历所有数据点
            double minDistance = std::numeric_limits<double>::max(); // 初始化最小距离为最大值
            int closestCentroid = 0; // 最近的质心编号

            for (size_t i = 0; i < centroids.size(); i++) { // 遍历所有质心
                double distance = calculateDistance(dataPoint, centroids[i]); // 计算距离
                if (distance < minDistance) { // 如果找到更近的质心
                    minDistance = distance; // 更新最小距离
                    closestCentroid = i; // 更新最近的质心编号
                }
            }
            dataPoint.cluster = closestCentroid; // 更新数据点的簇编号
        }

        std::vector<DataPoint> oldCentroids = centroids; // 保存旧的质心集合

        for (auto& centroid : centroids) { // 遍历所有质心
            double sumX = 0.0; // x坐标之和
            double sumY = 0.0; // y坐标之和
            int count = 0; // 属于该质心簇的数据点数量

            for (auto& dataPoint : dataPoints) { // 遍历所有数据点
                if (dataPoint.cluster == centroid.cluster) { // 如果数据点属于当前质心的簇
                    sumX += dataPoint.x; // 累加x坐标
                    sumY += dataPoint.y; // 累加y坐标
                    count++; // 计数增加
                }
            }

            if (count > 0) { // 如果该簇中有数据点
                centroid.x = sumX / count; // 计算新的x坐标
                centroid.y = sumY / count; // 计算新的y坐标
            }
        }

        converged = true; // 假设已经收敛
        for (int i = 0; i < centroids.size(); i++) { // 遍历所有质心
            if (oldCentroids[i].x != centroids[i].x || oldCentroids[i].y != centroids[i].y) { // 检查质心的x和y坐标是否发生变化
                converged = false; // 如果有变化，则未收敛
                break; // 退出循环
            }
        }
        if (converged) //如果已经收敛，则结束迭代
            break;
        iteration++; // 迭代次数增加
    }
    std::cout << "迭代次数: " << iteration << std::endl; // 输出迭代次数

    for (const auto& centroid : centroids) { // 输出所有质心的坐标
        std::cout << "质心" << centroid.cluster << "坐标(" << centroid.x << ", " << centroid.y << ")" << std::endl;
    }

    for (const auto& dataPoint : dataPoints) { // 输出所有数据点及其所属簇
        std::cout << "(" << dataPoint.x << ", " << dataPoint.y << ") 属于质心 " << dataPoint.cluster << std::endl;
    }
    return 0;
}

View Code

运行结果如下

C# k_means代码

using System;
using System.Collections.Generic;

// 定义一个结构体，用于存储数据点的信息
public class DataPoint
{
    public double x; // 数据点的x坐标
    public double y; // 数据点的y坐标
    public int cluster; // 数据点所属的簇编号

    public DataPoint(double x, double y, int cluster)
    {
        this.x = x;
        this.y = y;
        this.cluster = cluster;
    }
}

public class KMeans
{
    // 计算两个数据点之间的欧氏距离
    public static double CalculateDistance(DataPoint point1, DataPoint point2)
    {
        // 使用Math.Sqrt和Math.Pow函数计算距离
        double distance = Math.Sqrt(Math.Pow(point1.x - point2.x, 2) + Math.Pow(point1.y - point2.y, 2));
        return distance;
    }

    public static void Main(string[] args)
    {
        // 初始化数据点集合
        List<DataPoint> dataPoints = new List<DataPoint> {
            new DataPoint(2.0, 3.0, -1), // 初始时，所有数据点的簇编号设置为-1，表示未分类
            new DataPoint(7.0, 8.0, -1),
            new DataPoint(9.0, 10.0, -1),
            new DataPoint(4.0, 5.0, -1),
        };

        // 初始化质心集合
        List<DataPoint> centroids = new List<DataPoint> {
            new DataPoint(2.0, 3.0, 0), // 每个质心也是一个数据点，但是有一个特定的簇编号
            new DataPoint(7.0, 8.0, 1),
        };

        int iteration = 0; // 迭代次数
        bool converged = false; // 收敛标志

        while (!converged) //如果已经收敛，则结束迭代
        {
            foreach (var dataPoint in dataPoints)
            { // 遍历所有数据点
                double minDistance = double.MaxValue; // 初始化最小距离为最大值
                int closestCentroid = 0; // 最近的质心编号

                for (int i = 0; i < centroids.Count; i++)
                { // 遍历所有质心
                    double distance = CalculateDistance(dataPoint, centroids[i]); // 计算距离
                    if (distance < minDistance)
                    { // 如果找到更近的质心
                        minDistance = distance; // 更新最小距离
                        closestCentroid = i; // 更新最近的质心编号
                    }
                }
                dataPoint.cluster = closestCentroid; // 更新数据点的簇编号
            }

            List<DataPoint> oldCentroids = new List<DataPoint>(centroids); // 保存旧的质心集合

            foreach (var centroid in centroids)
            { // 遍历所有质心
                double sumX = 0.0; // x坐标之和
                double sumY = 0.0; // y坐标之和
                int count = 0; // 属于该质心簇的数据点数量

                foreach (var dataPoint in dataPoints)
                { // 遍历所有数据点
                    if (dataPoint.cluster == centroid.cluster)
                    { // 如果数据点属于当前质心的簇
                        sumX += dataPoint.x; // 累加x坐标
                        sumY += dataPoint.y; // 累加y坐标
                        count++; // 计数增加
                    }
                }

                if (count > 0)
                { // 如果该簇中有数据点
                    centroid.x = sumX / count; // 计算新的x坐标
                    centroid.y = sumY / count; // 计算新的y坐标
                }
            }

            converged = true; // 假设已经收敛
            for (int i = 0; i < centroids.Count; i++)
            { // 遍历所有质心
                if (oldCentroids[i].x != centroids[i].x || oldCentroids[i].y != centroids[i].y)
                { // 检查质心的x和y坐标是否发生变化
                    converged = false; // 如果有变化，则未收敛
                    break; // 退出循环
                }
            }
            iteration++; // 迭代次数增加
        }
        Console.WriteLine("迭代次数: " + iteration); // 输出迭代次数

        foreach (var centroid in centroids)
        { // 输出所有质心的坐标
            Console.WriteLine("质心" + centroid.cluster + "坐标(" + centroid.x + ", " + centroid.y + ")");
        }

        foreach (var dataPoint in dataPoints)
        { // 输出所有数据点及其所属簇
            Console.WriteLine("(" + dataPoint.x + ", " + dataPoint.y + ") 属于质心 " + dataPoint.cluster);
        }

        Console.ReadKey();
    }
}

View Code

Python k_means代码

import math

class DataPoint:
    def __init__(self, x, y, cluster):
        # 初始化数据点的构造函数
        self.x = x  # 数据点的x坐标
        self.y = y  # 数据点的y坐标
        self.cluster = cluster  # 数据点所属的簇

class KMeans:
    @staticmethod
    def calculate_distance(point1, point2):
        # 计算两个点之间的欧氏距离
        distance = math.sqrt(math.pow(point1.x - point2.x, 2) + math.pow(point1.y - point2.y, 2))
        return distance

    @staticmethod
    def main():
        # 初始化数据点
        data_points = [
            DataPoint(2.0, 3.0, -1),
            DataPoint(7.0, 8.0, -1),
            DataPoint(9.0, 10.0, -1),
            DataPoint(4.0, 5.0, -1),
        ]

        # 初始化质心
        centroids = [
            DataPoint(2.0, 3.0, 0),
            DataPoint(7.0, 8.0, 1),
        ]

        iteration = 0  # 迭代次数
        converged = False  # 收敛标志

        while not converged:
            # 为每个数据点分配最近的质心
            for data_point in data_points:
                min_distance = float('inf')  # 初始化最小距离为无穷大
                closest_centroid = 0  # 最近的质心索引

                for i, centroid in enumerate(centroids):
                    distance = KMeans.calculate_distance(data_point, centroid)
                    if distance < min_distance:
                        min_distance = distance
                        closest_centroid = i

                data_point.cluster = closest_centroid  # 更新数据点所属的簇

            # 保存旧的质心位置
            old_centroids = [DataPoint(centroid.x, centroid.y, centroid.cluster) for centroid in centroids]

            # 更新质心的位置
            for centroid in centroids:
                sum_x = 0.0  # x坐标之和
                sum_y = 0.0  # y坐标之和
                count = 0  # 属于该质心的数据点数量

                for data_point in data_points:
                    if data_point.cluster == centroid.cluster:
                        sum_x += data_point.x
                        sum_y += data_point.y
                        count += 1

                if count > 0:
                    centroid.x = sum_x / count  # 计算新的x坐标
                    centroid.y = sum_y / count  # 计算新的y坐标

            # 检查是否收敛
            converged = True
            for i, centroid in enumerate(centroids):
                if old_centroids[i].x != centroid.x or old_centroids[i].y != centroid.y:
                    converged = False
                    break

            iteration += 1  # 迭代次数加1

        # 输出结果
        print("迭代次数: ", iteration)

        for centroid in centroids:
            print(f"质心{centroid.cluster}坐标({centroid.x}, {centroid.y})")

        for data_point in data_points:
            print(f"({data_point.x}, {data_point.y}) 属于质心 {data_point.cluster}")

if __name__ == "__main__":
    KMeans.main()

View Code

posted @ 2024-02-01 09:45 阿坦阅读(24) 评论(0) 收藏举报

刷新页面返回顶部

k_means算法

公告