[Javascript] Classify JSON text data with machine learning in Natural

In this lesson, we will learn how to train a Naive Bayes classifier and a Logistic Regression classifier - basic machine learning algorithms - on JSON text data, and classify it into categories.

While this dataset is still considered a small dataset -- only a couple hundred points of data -- we'll start to get better results.

The general rule is that Logistic Regression will work better than Naive Bayes, but only if there is enough data. Since this is still a pretty small dataset, Naive Bayes works better here. Generally, Logistic Regression takes longer to train as well.

This uses data from Ana Cachopo: http://ana.cachopo.org/datasets-for-single-label-text-categorization.

// train data

[{text: 'xxxxxx', label: 'space'}]

 

复制代码
// Load train data form the files and train

var natural = require('natural');
var fs = require('fs');
var classifier = new natural.BayesClassifier();

fs.readFile('training_data.json', 'utf-8', function(err, data){
    if (err){
        console.log(err);
    } else {
        var trainingData = JSON.parse(data);
        train(trainingData);
    }
});

function train(trainingData){
    console.log("Training");
    trainingData.forEach(function(item){
        classifier.addDocument(item.text, item.label);
    });
    var startTime = new Date();
    classifier.train();
    var endTime = new Date();
    var trainingTime = (endTime-startTime)/1000.0;
    console.log("Training time:", trainingTime, "seconds");
    loadTestData();
}

function loadTestData(){
    console.log("Loading test data");
    fs.readFile('test_data.json', 'utf-8', function(err, data){
        if (err){
            console.log(err);
        } else {
            var testData = JSON.parse(data);
            testClassifier(testData);
        }
    });
}

function testClassifier(testData){
    console.log("Testing classifier");
    var numCorrect = 0;
    testData.forEach(function(item){
        var labelGuess = classifier.classify(item.text);
        if (labelGuess === item.label){
            numCorrect++;
        }
    });
    console.log("Correct %:", numCorrect/testData.length);
   saveClassifier(classifier) }
复制代码
复制代码
function saveClassifier(classifier){
    classifier.save('classifier.json', function(err, classifier){
        if (err){
            console.log(err);
        } else {
            console.log("Classifier saved!");
        }
    });
}
复制代码

 

In a new project, we can test the train result by:

复制代码
var natural = require('natural');

natural.LogisticRegressionClassifier.load('classifier.json', null, function(err, classifier){
    if (err){
        console.log(err);
    } else {
        var testComment = "is this about the sun and moon?";
        console.log(classifier.classify(testComment));
    }
});
复制代码

 

posted @   Zhentiw  阅读(608)  评论(0编辑  收藏  举报
编辑推荐:
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· AI技术革命,工作效率10个最佳AI工具
历史上的今天:
2016-10-03 [AngularFire 2] Object Observables - How to Read Objects from a Firebase Database?
2016-10-03 [AngularFire 2 ] Hello World - How To Write your First Query using AngularFire 2 List Observables ?
点击右上角即可分享
微信分享提示