TensorFlow.js - 根据 2D 数据进行预测

index.html

<!DOCTYPE html>
<html>
<head>
  <title>TensorFlow.js Tutorial</title>

  <!-- Import TensorFlow.js -->
  <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@2.0.0/dist/tf.min.js"></script>
  <!-- Import tfjs-vis -->
  <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-vis@1.0.2/dist/tfjs-vis.umd.min.js"></script>

  <!-- Import the main script file -->
  <script src="script.js"></script>

</head>

<body>
</body>
</html>

script.js

/**
 * Get the car data reduced to just the variables we are interested
 * and cleaned of missing data.     */

 // async function:https://www.runoob.com/w3cnote/es6-async.html
 async function getData() {

    /* const:https://www.runoob.com/js/js-let-const.html
     * fetch:发起请求   */
    const carsDataResponse = await fetch('https://storage.googleapis.com/tfjs-tutorials/carsData.json');

    // 数据格式转换为json文本
    const carsData = await carsDataResponse.json();

    // map() 方法返回一个新数组
    const cleaned = carsData.map(car => ({
      mpg: car.Miles_per_Gallon,
      horsepower: car.Horsepower,
    }))
    // filter() 方法创建一个新的数组，新数组中的元素是通过检查指定数组中符合条件的所有元素
    .filter(car => (car.mpg != null && car.horsepower != null));
  
    return cleaned;
  }

/*------------------------------------------------------------------------------------------------------------------*/

  async function run() {
    // Load and plot the original input data that we are going to train on.

    // 调用自己定义的getData()函数
    const data = await getData();

    const values = data.map(d => ({
      x: d.horsepower,
      y: d.mpg,
    }));
  
    // 绘制原始图像
    tfvis.render.scatterplot(
      {name: 'Horsepower v MPG'},
      {values},     // values上面定义的x、y数据
      {
        xLabel: 'Horsepower',
        yLabel: 'MPG',
        height: 300
      }
    );
  
    // Create the model
    const model = createModel();
    tfvis.show.modelSummary({name: 'Model Summary'}, model);

    // Convert the data to a form we can use for training.
    const tensorData = convertToTensor(data);
    const {inputs, labels} = tensorData;

    // Train the model
    await trainModel(model, inputs, labels);
    console.log('Done Training');

    // Make some predictions using the model and compare them to the original data 
    testModel(model, data, tensorData);
  }

  document.addEventListener('DOMContentLoaded', run);

/*------------------------------------------------------------------------------------------------------------------*/

  function createModel() {
    // Create a sequential model
    const model = tf.sequential();
  
    // Add a single input layer
    model.add(tf.layers.dense({inputShape: [1], units: 1, useBias: true}));
  
    // Add an output layer
    model.add(tf.layers.dense({units: 1, useBias: true}));
  
    return model;
  }

/*------------------------------------------------------------------------------------------------------------------*/

  /**
 * Convert the input data to tensors that we can use for machine
 * learning. We will also do the important best practices of _shuffling_
 * the data and _normalizing_ the data
 * MPG on the y-axis.
 */
function convertToTensor(data) {
    // Wrapping these calculations in a tidy will dispose any
    // intermediate tensors.
  
    return tf.tidy(() => {
      // Step 1. 重排数据
      tf.util.shuffle(data);
  
      // Step 2. 转换为张量。张量：即有标签的数据
      const inputs = data.map(d => d.horsepower)
      const labels = data.map(d => d.mpg);
      const inputTensor = tf.tensor2d(inputs, [inputs.length, 1]);
      const labelTensor = tf.tensor2d(labels, [labels.length, 1]);
  
      //Step 3. 对数据进行0-1归一化
      const inputMax = inputTensor.max();
      const inputMin = inputTensor.min();
      const labelMax = labelTensor.max();
      const labelMin = labelTensor.min();
      const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
      const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));
  
      return {
        inputs: normalizedInputs,
        labels: normalizedLabels,
        // Return the min/max bounds so we can use them later.
        inputMax,
        inputMin,
        labelMax,
        labelMin,
      }
    });
  }

/*------------------------------------------------------------------------------------------------------------------*/

  async function trainModel(model, inputs, labels) {
    // Prepare the model for training.
    model.compile({

      // 这是用于控制模型更新的算法，我们选择了 Adam 优化器，因为它在实际使用中非常有效，无需进行任何配置。
      optimizer: tf.train.adam(),    

      // 这是一个函数，用于告知模型在学习所显示的各个批次（数据子集）时的表现如何。我们使用 meanSquaredError 将模型所做的预测与真实值进行比较
      loss: tf.losses.meanSquaredError, 

      metrics: ['mse'],
    });
  
    // 是指模型在每次训练迭代时会看到的数据子集的大小。常见的批次大小通常介于 32-512 之间
    const batchSize = 32;        
    
    // 表示模型查看您提供的整个数据集的次数。我们将对数据集执行 50 次迭代
    const epochs = 50;                  
  
    return await model.fit(inputs, labels, {
      batchSize,
      epochs,
      shuffle: true,
      callbacks: tfvis.show.fitCallbacks(
        { name: 'Training Performance' },
        ['loss', 'mse'],
        { height: 200, callbacks: ['onEpochEnd'] }
      )
    });
  }

/*------------------------------------------------------------------------------------------------------------------*/

  function testModel(model, inputData, normalizationData) {
    const {inputMax, inputMin, labelMin, labelMax} = normalizationData;
  
    // Generate predictions for a uniform range of numbers between 0 and 1;
    // We un-normalize the data by doing the inverse of the min-max scaling
    // that we did earlier.

    // .tidy() 函数用于执行给定的函数
    const [xs, preds] = tf.tidy(() => {
  
      // 生成 100 个新“样本”，以提供给模型
      const xs = tf.linspace(0, 1, 100);

      const preds = model.predict(xs.reshape([100, 1]));
  
      // 将数据恢复到原始范围
      const unNormXs = xs
        .mul(inputMax.sub(inputMin))
        .add(inputMin);
      const unNormPreds = preds
        .mul(labelMax.sub(labelMin))
        .add(labelMin);
  
      // Un-normalize the data
      return [unNormXs.dataSync(), unNormPreds.dataSync()];
    });
  
    const predictedPoints = Array.from(xs).map((val, i) => {
      return {x: val, y: preds[i]}
    });
  
    const originalPoints = inputData.map(d => ({
      x: d.horsepower, y: d.mpg,
    }));
  
    // 使用 tfjs-vis 来绘制 原始数据 和 模型的预测
    tfvis.render.scatterplot(
      {name: 'Model Predictions vs Original Data'},
      {values: [originalPoints, predictedPoints], series: ['original', 'predicted']},
      {
        xLabel: 'Horsepower',
        yLabel: 'MPG',
        height: 300
      }
    );
  }