KNN in c++

Pseudo Code of KNN

We can implement a KNN model by following the below steps:

  1. Load the data
  2. Initialise the value of k
  3. For getting the predicted class, iterate from 1 to total number of training data points
    1. Calculate the distance between test data and each row of training data. Here we will use Euclidean distance as our distance metric since it’s the most popular method. The other metrics that can be used are Chebyshev, cosine, etc.
    2. Sort the calculated distances in ascending order based on distance values
    3. Get top k rows from the sorted array
    4. Get the most frequent class of these rows
    5. Return the predicted class

Iris Data Set

把数据作为string类型处理,进行string和double类型转换。

#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <numeric>
#include <functional>
#include <vector>
#include <algorithm>
#include <cmath>
#include <map>

template <class T1, class T2>
double ManhattanDistance(std::vector<T1> &inst1, std::vector<T2> &inst2) {
    if(inst1.size() != inst2.size()) {
        std::cout<<"the size of the vectors is not the same\n";
        return -1;
    }
    std::vector<double> temp;
    for(size_t i=0;i<inst1.size();++i) {
        temp.push_back(std::abs(inst1.at(i)-inst2.at(i)));
    }
    double distance=accumulate(temp.begin(), temp.end(), 0.0);

    return distance;
}

template <class DataType1, class DataType2>
double EuclideanDistance(const std::vector<DataType1> &inst1, const std::vector<DataType2> &inst2) {
    if(inst1.size() != inst2.size()) {
    std::cout<<"the size of the vectors is not the same\n";
        return -1;
    }
    std::vector<double> temp;
    for(size_t i=0; i<inst1.size(); ++i) {
        temp.push_back(pow(inst1.at(i)-inst2.at(i), 2.0));
    }
    double distance=accumulate(temp.begin(), temp.end(), 0.0);
    distance=sqrt(distance);

    return distance;
}

void vstr2vdouble(std::vector<std::string>::const_iterator beg, std::vector<std::string>::const_iterator end, std::vector<double> &vdouble) {
    for(std::vector<std::string>::const_iterator it=beg; it!=end; ++it) {
        double d;
        std::stringstream ss;
        ss<<*it;
        ss>>d;
        vdouble.push_back(d);
    }
}

void knn(std::vector<std::vector<std::string> > &trainset, std::vector<std::string> &testdata, int &k) {
    std::vector<double> testitem;
    vstr2vdouble(testdata.begin(), testdata.end(), testitem);
    std::multimap<std::string, std::string> mmap;

    for(size_t i=0;i<trainset.size();++i) {
        std::vector<double> trainitem;
        vstr2vdouble(trainset[i].begin(), trainset[i].end()-1, trainitem);
        double distance=EuclideanDistance(testitem, trainitem);
        std::string strdis;
        std::stringstream ss;
        ss<<distance;
        ss>>strdis;
        mmap.insert(std::pair<std::string, std::string>(strdis, trainset[i].back()));
    }
    size_t i=0;
    for(std::multimap<std::string, std::string>::const_iterator it=mmap.begin(); i<k; ++i,++it) {
        std::cout<<it->first<<" "<<it->second<<"\n";
    }
}

template <class DataType>
void ReadDataFromFile(std::string &filename, std::vector<std::vector<DataType> > &lines_feat) {
    std::ifstream vm_info(filename.c_str());
    std::string lines, var;
    std::vector<std::string> row;

    lines_feat.clear();

    while(!vm_info.eof()) {
        getline(vm_info, lines);
        if(lines.empty())
            break;
        std::replace(lines.begin(), lines.end(), ',', ' ');
        std::stringstream stringin(lines);
        row.clear();

        while(stringin >> var) {
            row.push_back(var);
        }
        lines_feat.push_back(row);
    }
}

template <class DataType>
void Display2DVector(std::vector<std::vector<DataType> > &vv) {
    std::cout<<"the total rows of 2d vector_data: "<<vv.size()<<std::endl;

    for(size_t i=0;i<vv.size();++i) {
        for(typename::std::vector<DataType>::const_iterator it=vv[i].begin();it!=vv[i].end();++it) {
            std::cout<<*it<<" ";
        }
        std::cout<<"\n";
    }
    std::cout<<"--------the end of the Display2DVector()--------\n";
}

int main() {
    std::string trainpath="Iris.data", testpath="knntest.data";
    std::vector<std::vector<std::string> > knn_data, test_data;

    ReadDataFromFile(trainpath, knn_data);
    ReadDataFromFile(testpath, test_data);

    Display2DVector(test_data);

    int k=3;
    for(size_t i=0;i<test_data.size();++i) {
        knn(knn_data, test_data[i], k);
    }

    return 0;
}

posted @   东宫得臣  阅读(201)  评论(0编辑  收藏  举报
编辑推荐:
· AI与.NET技术实操系列:基于图像分类模型对图像进行分类
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
阅读排行:
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 25岁的心里话
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 按钮权限的设计及实现
点击右上角即可分享
微信分享提示