说说 ML.NET and AutoML

经常参加培训讲座。发现最受欢迎的讲座之一是"ML.NET和AutoML的介绍"。ML.NET是一个代码库,可用于创建经典(非神经网络)机器学习预测模型。AutoML 是命令行工具中的非正式术语,可自动为您生成ML.NET代码。

以下是我使用的两个数据文件,演示ML.NET程序的源代码,以及 AutoML 的 shell 命令。目标是从年龄、工作类型、年收入和工作满意度中预测一个人的性别。

文件: employees_norm_train.tsv

isMale	age	job	income	satisfac
False	0.66	mgmt	0.5210	low
True	0.35	tech	0.8610	medium
False	0.24	tech	0.4410	high
True	0.43	sale	0.5170	medium
True	0.37	mgmt	0.8860	medium
True	0.30	sale	0.8790	low
False	0.40	mgmt	0.2020	medium
False	0.58	tech	0.2650	low
True	0.27	mgmt	0.8480	low
False	0.33	sale	0.5600	medium
True	0.59	tech	0.2330	high
True	0.52	sale	0.8700	high
False	0.41	mgmt	0.5170	medium
True	0.22	sale	0.3500	high
False	0.61	sale	0.2980	low
True	0.46	mgmt	0.6780	medium
True	0.59	mgmt	0.8430	low
False	0.28	tech	0.7730	high
True	0.46	sale	0.8930	medium
False	0.48	tech	0.2920	medium
False	0.28	mgmt	0.6690	medium
False	0.23	sale	0.8970	high
True	0.60	mgmt	0.6270	high
True	0.29	sale	0.7760	low
True	0.24	tech	0.8750	high
False	0.51	mgmt	0.4090	medium
True	0.22	sale	0.8910	low
True	0.19	tech	0.5380	low
False	0.25	sale	0.9000	high
True	0.44	tech	0.8980	medium
True	0.35	mgmt	0.5380	medium
True	0.29	sale	0.7610	low
False	0.25	mgmt	0.3450	medium
False	0.66	mgmt	0.2210	low
False	0.43	tech	0.7450	medium
True	0.42	sale	0.8520	medium
True	0.44	mgmt	0.6580	medium
False	0.42	sale	0.6970	medium
True	0.56	tech	0.3680	high
True	0.38	mgmt	0.2600	low

文件: employees_norm_test.tsv

isMale	age	job	income	satisfac
True	0.50	mgmt	0.5470	medium
False	0.67	tech	0.3200	low
False	0.23	sale	0.7510	high
True	0.18	tech	0.7950	low
False	0.33	mgmt	0.6210	medium
True	0.47	sale	0.4650	medium
True	0.59	sale	0.7420	high
True	0.51	tech	0.4970	medium
False	0.33	tech	0.2630	medium
False	0.35	mgmt	0.8300	high

文件: GenderMLdotNETProgram.cs

using System;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
namespace GenderMLdotNET
{
  class GenderMLdotNETProgram
  {
    static void Main(string[] args)
    {
      Console.WriteLine("\nBegin ML.NET gender demo \n");
      MLContext mlc = new MLContext(seed: 1);

      // 1. load data and create data pipeline
      Console.WriteLine("\nLoading norm data into memory \n");
      string trainDataPath =
        "..\\..\\..\\Data\\employees_norm_train.tsv";

      IDataView trainData =
        mlc.Data.LoadFromTextFile
        (trainDataPath, '\t', hasHeader: true);

      var a = mlc.Transforms.Categorical.OneHotEncoding(new[]
       { new InputOutputColumnPair("job", "job") });
      var b = mlc.Transforms.Categorical.OneHotEncoding(new[]
        { new InputOutputColumnPair("satisfac", "satisfac") });
      var c = mlc.Transforms.Concatenate("Features", new[]
        { "age", "job", "income", "satisfac" });
      var dataPipe = a.Append(b).Append(c);

      Console.WriteLine("Creating logistic regression model");
      var options =
        new LbfgsLogisticRegressionBinaryTrainer.Options()
      {
        LabelColumnName = "isMale",
        FeatureColumnName = "Features",
        MaximumNumberOfIterations = 100,
        OptimizationTolerance = 1e-8f
      };

      var trainer =
        mlc.BinaryClassification.Trainers.
        LbfgsLogisticRegression(options);
      var trainPipe = dataPipe.Append(trainer);
      Console.WriteLine("Starting training");
      ITransformer model = trainPipe.Fit(trainData);
      Console.WriteLine("Training complete");

      // 3. evaluate model
      IDataView predictions = model.Transform(trainData);
      var metrics = mlc.BinaryClassification.
        EvaluateNonCalibrated(predictions, "isMale", "Score");
      Console.Write("Model accuracy on training data = ");
      Console.WriteLine(metrics.Accuracy.ToString("F4") + "\n");

      // 4. use model
      ModelInput X = new ModelInput();
      X.Age = 0.32f; X.Job = "mgmt"; X.Income = 0.4900f;
      X.Satisfac = "medium";

      var pe = mlc.Model.CreatePredictionEngine(model);
      var Y = pe.Predict(X);
      Console.Write("Set age = 32, job = mgmt, income = $49K, ");
      Console.WriteLine("satisfac = medium");
      Console.Write("Predicted isMale : ");
      Console.WriteLine(Y.PredictedLabel);

      Console.WriteLine("\nEnd ML.NET demo ");
      Console.ReadLine();
    } // Main
  } // Program

  class ModelOutput
  {
    [ColumnName("predictedLabel")]
    public bool PredictedLabel { get; set; }

    [ColumnName("score")]
    public float Score { get; set; }
  }

  class ModelInput
  {
    [ColumnName("isMale"), LoadColumn(0)]
    public bool IsMale { get; set; }

    [ColumnName("age"), LoadColumn(1)]
    public float Age { get; set; }

    [ColumnName("job"), LoadColumn(2)]
    public string Job { get; set; }

    [ColumnName("income"), LoadColumn(3)]
    public float Income { get; set; }

    [ColumnName("satisfac"), LoadColumn(4)]
    public string Satisfac { get; set; }
  }
} // ns

AutoML命令:

mlnet auto-train ^
--task binary-classification ^
--dataset ".\Data\employees_norm_train.tsv" ^
--test-dataset ".\Data\employees_norm_test.tsv" ^
--label-column-name isMale ^
--max-exploration-time 60 ^
--name PredictGenderAutoML

 

posted on 2020-03-22 15:16  Bean.Hsiang  阅读(885)  评论(0编辑  收藏  举报