node导出百万条数据成excel文件
用的是xlsx包,由于v8默认内存限制是1.4G,当导出的数据太大的时候,内存会炸,由于csv格式的excel文件,本质上就是文本文件,所以在不调大内存上限的情况下,实现思路是分批处理,用流的方式追加到文件,第一批处理有header,后面追加没有header。微软excel能打开最大的行数是1048576,所以生成文件超过了也没用。
const xlsx = require('xlsx')
const path = require('path')
const fs = require('fs')
let result = []
for (let i = 0; i < 1100000; i++) {
result.push({
hello: '哈哈哈' + i,
world: '黑乎乎' + i,
hehe2: '黑乎乎' + i,
hehe3: '黑乎乎' + i,
hehe4: '黑乎乎' + i,
hehe5: '黑乎乎' + i,
hehe6: '黑乎乎' + i,
hehe7: '黑乎乎' + i,
hehe8: '黑乎乎' + i,
hehe9: '黑乎乎' + i,
hehe10: '黑乎乎' + i,
hehe11: '黑乎乎' + i,
hehe12: '黑乎乎' + i,
hehe13: '黑乎乎' + i,
hehe14: '黑乎乎' + i,
hehe15: '黑乎乎' + i,
})
}
const add = async (data, filePath, hasHeader) => {
return new Promise(resolve => {
const jsonWorkSheet = xlsx.utils.json_to_sheet(data, {skipHeader: hasHeader});
const stream = xlsx.stream.to_csv(jsonWorkSheet);
const writeS = fs.createWriteStream(filePath, {flags: 'a'})
stream.pipe(writeS)
stream.on('end', function() {
resolve()
});
})
}
let filepath = path.resolve('world6.csv');
async function hello(result,filepath) {
let chunk = []
let num = 10000;
let flag = false;
while(result.length > 0) {
chunk = result.splice(0,num)
await add(chunk,filepath, flag)
flag = true
}
}
hello(result,filepath)
上面的实现是生成csv文件,如果要生成xlsx格式,可以用xlsx-writestream
var XLSXWriter = require('xlsx-writestream');
var fs = require('fs');
var writer = new XLSXWriter('mySpreadsheet.xlsx', {} /* options */);
writer.getReadStream().pipe(fs.createWriteStream('mySpreadsheet.xlsx'));
let result = []
for (let i = 0; i < 1500000; i++) {
result.push({
hello: '哈哈哈' + i,
world: '黑乎乎' + i,
hehe2: '黑乎乎' + i,
hehe3: '黑乎乎' + i,
hehe4: '黑乎乎' + i,
hehe5: '黑乎乎' + i,
hehe6: '黑乎乎' + i,
hehe7: '黑乎乎' + i,
hehe8: '黑乎乎' + i,
hehe9: '黑乎乎' + i,
hehe10: '黑乎乎' + i,
hehe11: '黑乎乎' + i,
hehe12: '黑乎乎' + i,
hehe13: '黑乎乎' + i,
hehe14: '黑乎乎' + i,
hehe15: '黑乎乎' + i,
})
}
result.forEach(item => {
writer.addRow(item)
})
// Finalize the spreadsheet. If you don't do this, the readstream will not end.
writer.finalize();