大文件多线程切片并发上传
- 技术使用:
File.slice
FileReader
spark-md5
web worker
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>大文件分片上传</title>
<style>
img {
width: 120px;
}
.ball {
width: 100px;
height: 100px;
border-radius: 50%;
background-color: pink;
animation: ball-move 3s linear infinite alternate;
}
@keyframes ball-move {
0% {
translate: 0 0;
}
100% {
translate: calc(100vw - 100px - 2 * 8px) 0;
}
}
</style>
</head>
<body>
<div class="ball"></div>
<hr />
<input type="file" id="fileInput" hidden />
<button type="button" id="uploadBtn">上传文件</button>
<button type="button" id="pauseBtn">暂停上传</button>
<button type="button" id="resumeBtn">继续上传</button>
<hr />
<ul class="fileList"></ul>
<script type="module">
import SparkMD5 from './spark-md5.js';
const uploadBtn = document.getElementById('uploadBtn');
const fileInput = document.getElementById('fileInput');
const fileList = document.querySelector('.fileList');
const pauseBtn = document.getElementById('pauseBtn');
const resumeBtn = document.getElementById('resumeBtn');
const spark = new SparkMD5.ArrayBuffer();
pauseBtn.addEventListener('click', async () => {
const res = await fetch('http://localhost:3001/upload-info').then((res) => res.json());
console.log('res => ', res);
});
resumeBtn.addEventListener('click', async () => {
await mergeChunks('4f81fe0634a6de12');
});
async function uploadFile(file) {
const formData = new FormData();
formData.append('file', file);
const res = await fetch('http://localhost:3001/upload', {
method: 'POST',
body: formData,
}).then((res) => res.json());
}
uploadBtn.addEventListener('click', () => {
fileInput.click();
});
fileInput.addEventListener('change', async () => {
/** @type {File} */
const file = fileInput.files[0];
if (!file) return;
console.log('file.size => ', file.size);
const chunkSize = 1024 * 64 * 1; // 64KB
const chunks = Math.ceil(file.size / chunkSize);
console.time('md5计算耗时');
// 创建分片, 并每次上传一个分片
// let currentChunk = 0;
// // const fileMd5 = await getFileMd5(file);
// const fileMd5 = await getHash(file);
// await createChunks(file, chunkSize, fileMd5, currentChunk, chunks);
const { fileMd5, chunkList } = await createWorker(file, chunkSize, chunks);
console.log('data => ', fileMd5, chunkList);
// 检测哪些文件是否已上传过,并上传剩余的分片
// await checkFile(fileMd5, chunkList);
const fetchList = chunkList.map((chunk) => {
const formData = new FormData();
formData.append('file', chunk.blob);
formData.append('chunk', chunk.index);
formData.append('chunks', chunks);
formData.append('chunkId', chunk.hash);
formData.append('fileId', fileMd5);
formData.append('mime', file.type);
return fetch('http://localhost:3001/upload2', {
method: 'POST',
body: formData,
});
});
const res = await Promise.all(fetchList);
// console.log('res => ', res);
await mergeChunks(fileMd5, file.type);
console.timeEnd('md5计算耗时');
});
function createWorker(file, chunkSize, chunks) {
return new Promise((resolve) => {
const worker = new Worker('./file-worker.js', {
type: 'module',
});
worker.onmessage = (e) => {
resolve(e.data);
};
worker.postMessage({ file, chunkSize, chunks });
});
}
/**
*创建分片
* @param {File} file 文件
* @param {number} chunkSize 分片大小
* @param {string} fileMd5 文件md5
* @param {number} currentChunk 当前分片索引
* @param {number} chunks 分片总数
*/
async function createChunks(file, chunkSize, fileMd5, currentChunk, chunks) {
let start = currentChunk * chunkSize;
let end = Math.min(start + chunkSize, file);
const blob = file.slice(start, end);
const formData = new FormData();
formData.append('file', blob);
formData.append('chunk', currentChunk);
formData.append('chunks', chunks);
// formData.append('chunkId', createMD5(await blob.text()));
formData.append('chunkId', await getHash(blob));
formData.append('fileId', fileMd5);
try {
const res = await fetch('http://localhost:3001/upload2', {
method: 'POST',
body: formData,
});
if (res.ok) {
console.log(`分片${currentChunk}上传成功`);
currentChunk++;
if (currentChunk < chunks) {
await createChunks(file, chunkSize, fileMd5, currentChunk, chunks);
} else {
console.log('fileMd5 => ', fileMd5);
await mergeChunks(fileMd5);
}
}
} catch (err) {
console.log(err);
}
}
function getHash(blob) {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onload = (e) => {
const data = e.target.result;
spark.append(data);
resolve(spark.end());
};
reader.readAsArrayBuffer(blob);
});
}
async function checkFile(fileMd5) {
const res = await fetch(`http://localhost:3001/upload-info/${fileMd5}`).then((res) => res.json());
console.log('res => ', res);
}
async function mergeChunks(fileId, fileType) {
if (!fileId) throw new Error('fileId is required');
const res = await fetch(`http://localhost:3001/merge/${fileId}`, {
method: 'POST',
body: JSON.stringify({ mime: fileType }),
}).then((res) => res.json());
console.log('res => ', res);
// if (res.code === 0) {
// const img = new Image();
// img.src = `http://localhost:3001${res.data.url}`;
// const li = document.createElement('li');
// li.appendChild(img);
// fileList.appendChild(li);
// }
}
</script>
</body>
</html>
import SparkMD5 from './spark-md5.js';
// 监听来自主线程的消息,计算文件的 MD5 值并返回处理后的分片列表
self.addEventListener('message', async (e) => {
const { file, chunkSize, chunks } = e.data;
// 计算可用的线程数,减去2以确保不占用所有CPU资源
const threadCount = navigator.hardwareConcurrency - 2 || 4;
// 调用 createWorkers 函数,使用多线程处理文件分片
const chunkList = await createChunkWorkers(file, chunkSize, chunks, threadCount);
// 将分片的哈希值拼接成字符串,并计算文件的 MD5 值
const fileMd5 = await createFileHash(chunkList);
// 向主线程发送文件的 MD5 值和处理后的分片列表
postMessage({ fileMd5, chunkList });
self.close();
});
// 创建多个线程处理文件分片
function createChunkWorkers(file, chunkSize, chunks, threadCount) {
return new Promise((resolve) => {
// 每个线程需要处理的分片数
const threadChunkCount = Math.ceil(chunks / threadCount);
// 完成的线程数
let finishedCount = 0;
const result = new Array(chunks).fill('');
// 循环创建线程,并监听线程消息
for (let i = 0; i < threadCount; i++) {
const worker = new Worker('./chunk-worker.js', {
type: 'module',
});
worker.onmessage = (e) => {
// 处理线程返回的分片列表数据
const { chunkList } = e.data;
// 将每个分片的数据存入结果数组中
chunkList.forEach((chunk) => {
result[chunk.index] = chunk;
});
finishedCount++;
// 如果所有线程处理完成,则返回结果数组
if (finishedCount === threadCount) {
resolve(result);
}
};
// 向线程发送处理文件分片的消息
worker.postMessage({
index: i,
file,
start: i * threadChunkCount,
end: Math.min((i + 1) * threadChunkCount, chunks),
chunkSize,
});
}
});
}
// 计算文件的 MD5 值
async function createFileHash(list) {
const spark = new SparkMD5.ArrayBuffer();
for await (const { buf } of list) {
spark.append(buf);
}
return spark.end();
}
import SparkMD5 from './spark-md5.js';
// 添加事件监听器,当接收到消息时运行该异步函数
self.addEventListener('message', async (e) => {
// 从消息中获取索引、文件、起始位置、结束位置和块大小
const { index, file, start, end, chunkSize } = e.data;
// 创建空的块列表
const chunkList = [];
// 循环计算每个块的哈希并加入块列表
for (let i = start; i < end; i++) {
// 从文件中切割出一个块
const blob = file.slice(i * chunkSize, (i + 1) * chunkSize);
// 计算块的MD5哈希,并将索引、哈希和块本身加入块列表
const { buf, hash } = await createHash(blob);
chunkList.push({
index: i,
hash,
buf,
blob,
});
}
// 发送包含索引和块列表的消息
postMessage({ index, chunkList });
// 关闭当前 worker
self.close();
});
// 创建哈希值的函数
function createHash(blob) {
return new Promise((resolve) => {
const fileReader = new FileReader();
const spark = new SparkMD5.ArrayBuffer();
// 当文件读取完成后生成MD5哈希值
fileReader.addEventListener('load', (e) => {
const buf = e.target.result;
spark.append(buf);
resolve({ buf, hash: spark.end() });
});
fileReader.readAsArrayBuffer(blob);
});
}