node导出百万条数据成excel文件

用的是xlsx包,由于v8默认内存限制是1.4G,当导出的数据太大的时候,内存会炸,由于csv格式的excel文件,本质上就是文本文件,所以在不调大内存上限的情况下,实现思路是分批处理,用流的方式追加到文件,第一批处理有header,后面追加没有header。微软excel能打开最大的行数是1048576,所以生成文件超过了也没用。

const xlsx = require('xlsx')
const path = require('path')
const fs = require('fs')
let result = []
for (let i = 0; i < 1100000; i++) {
  result.push({
    hello: '哈哈哈' + i,
    world: '黑乎乎' + i,
    hehe2: '黑乎乎' + i,
    hehe3: '黑乎乎' + i,
    hehe4: '黑乎乎' + i,
    hehe5: '黑乎乎' + i,
    hehe6: '黑乎乎' + i,
    hehe7: '黑乎乎' + i,
    hehe8: '黑乎乎' + i,
    hehe9: '黑乎乎' + i,
    hehe10: '黑乎乎' + i,
    hehe11: '黑乎乎' + i,
    hehe12: '黑乎乎' + i,
    hehe13: '黑乎乎' + i,
    hehe14: '黑乎乎' + i,
    hehe15: '黑乎乎' + i,
  })
}

const add = async (data, filePath, hasHeader) => {
  return new Promise(resolve => {
    const jsonWorkSheet = xlsx.utils.json_to_sheet(data, {skipHeader: hasHeader});
    const stream = xlsx.stream.to_csv(jsonWorkSheet);
    const writeS = fs.createWriteStream(filePath, {flags: 'a'})
    stream.pipe(writeS)
    stream.on('end', function() {
      resolve()
    });
  })
} 

let filepath = path.resolve('world6.csv');

async function hello(result,filepath) {
  let chunk = []
  let num = 10000;
  let flag = false;
  while(result.length > 0) {
    chunk = result.splice(0,num)
    await add(chunk,filepath, flag)
    flag = true
  }
}
hello(result,filepath)

上面的实现是生成csv文件,如果要生成xlsx格式,可以用xlsx-writestream

var XLSXWriter = require('xlsx-writestream');
var fs = require('fs');

var writer = new XLSXWriter('mySpreadsheet.xlsx', {} /* options */);

writer.getReadStream().pipe(fs.createWriteStream('mySpreadsheet.xlsx'));

let result = []
for (let i = 0; i < 1500000; i++) {
  result.push({
    hello: '哈哈哈' + i,
    world: '黑乎乎' + i,
    hehe2: '黑乎乎' + i,
    hehe3: '黑乎乎' + i,
    hehe4: '黑乎乎' + i,
    hehe5: '黑乎乎' + i,
    hehe6: '黑乎乎' + i,
    hehe7: '黑乎乎' + i,
    hehe8: '黑乎乎' + i,
    hehe9: '黑乎乎' + i,
    hehe10: '黑乎乎' + i,
    hehe11: '黑乎乎' + i,
    hehe12: '黑乎乎' + i,
    hehe13: '黑乎乎' + i,
    hehe14: '黑乎乎' + i,
    hehe15: '黑乎乎' + i,
  })
}
result.forEach(item => {
    writer.addRow(item)
})

// Finalize the spreadsheet. If you don't do this, the readstream will not end.
writer.finalize();
posted @ 2020-07-31 11:57  樱风凛  阅读(1441)  评论(0编辑  收藏  举报