松鼠的博客

导航

java实现文件分片上传并且断点续传

一、简单的分片上传

针对第一个问题,如果文件过大,上传到一半断开了,若重新开始上传的话,会很消耗时间,并且你也并不知道距离上次断开时,已经上传到哪一部分了。因此我们应该先对大文件进行分片处理,防止上面提到的问题。

前端代码:

<!-- html代码 -->
<!DOCTYPE html>
<html>
<head>
    <title>文件上传示例</title>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
</head>
<body>
<form>
    <input type="file" id="fileInput" multiple>
    <button type="button" onclick="upload()" >上传</button>
</form>
<script>
    function upload() {
        var fileInput = document.getElementById('fileInput');
        var fileName = document.getElementById("fileInput").files[0].name;
        var files = fileInput.files;
        var chunkSize = 1024 * 10; // 每个块的大小为10KB
        var totalChunks = Math.ceil(files[0].size / chunkSize); // 文件总块数
        var currentChunk = 0; // 当前块数

        // 分片上传文件
        function uploadChunk() {
            var xhr = new XMLHttpRequest();
            var formData = new FormData();

            // 将当前块数和总块数添加到formData中
            formData.append('currentChunk', currentChunk);
            formData.append('totalChunks', totalChunks);
            formData.append('fileName',fileName);

            // 计算当前块在文件中的偏移量和长度
            var start = currentChunk * chunkSize;
            var end = Math.min(files[0].size, start + chunkSize);
            var chunk = files[0].slice(start, end);

            // 添加当前块到formData中
            formData.append('chunk', chunk);

            // 发送分片到后端
            xhr.open('POST', '/file/upload');
            xhr.send(formData);

            xhr.onload = function() {
                // 更新当前块数
                currentChunk++;

                // 如果还有未上传的块,则继续上传
                if (currentChunk < totalChunks) {
                    uploadChunk();
                } else {
                    // 所有块都上传完毕,进行文件合并
                    mergeChunks(fileName);
                }
            }
        }

        // 合并所有分片
        function mergeChunks() {
            var xhr = new XMLHttpRequest();
            xhr.open("POST", "/file/merge", true);
            xhr.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
            xhr.onreadystatechange = function() {
                if (xhr.readyState === 4) {
                    if (xhr.status === 200) {
                        console.log("文件上传完成:", xhr.responseText);
                    } else {
                        console.error(xhr.responseText);
                    }
                }
            };
            xhr.send("fileName=" + fileName);
        }

        // 开始上传
        uploadChunk();
    }
</script>
</body>
</html>

ps:以上代码使用了html+js完成,请求是使用了xhr来发送请求。其中xhr.open的地址为自己本地的接口地址。由于平时测试并不需要真正上传大型文件,所以每个分片的大小定义为10KB,以此模拟大文件上传。

后端代码:

//java代码
@RestController
@RequestMapping("/file")
public class FileController {
    @Autowired
    private ResourceLoader resourceLoader;

    @Value("${my.config.savePath}")
    private String uploadPath;

    private Map<String, List<File>> chunksMap = new ConcurrentHashMap<>();

    @PostMapping("/upload")
    public void upload(@RequestParam int currentChunk, @RequestParam int totalChunks,
                       @RequestParam MultipartFile chunk,@RequestParam String fileName) throws IOException {

        // 将分片保存到临时文件夹中
        String chunkName = chunk.getOriginalFilename() + "." + currentChunk;
        File chunkFile = new File(uploadPath, chunkName);
        chunk.transferTo(chunkFile);

        // 记录分片上传状态
        List<File> chunkList = chunksMap.get(fileName);
        if (chunkList == null) {
            chunkList = new ArrayList<>(totalChunks);
            chunksMap.put(fileName, chunkList);
        }
        chunkList.add(chunkFile);
    }

    @PostMapping("/merge")
    public String merge(@RequestParam String fileName) throws IOException {

        // 获取所有分片,并按照分片的顺序将它们合并成一个文件
        List<File> chunkList = chunksMap.get(fileName);
        if (chunkList == null || chunkList.size() == 0) {
            throw new RuntimeException("分片不存在");
        }

        File outputFile = new File(uploadPath, fileName);
        try (FileChannel outChannel = new FileOutputStream(outputFile).getChannel()) {
            for (int i = 0; i < chunkList.size(); i++) {
                try (FileChannel inChannel = new FileInputStream(chunkList.get(i)).getChannel()) {
                    inChannel.transferTo(0, inChannel.size(), outChannel);
                }
                chunkList.get(i).delete(); // 删除分片
            }
        }

        chunksMap.remove(fileName); // 删除记录
        // 获取文件的访问URL
        Resource resource = 
        			resourceLoader.getResource("file:" + uploadPath + fileName); //由于是本地文件,所以开头是"file",如果是服务器,请改成自己服务器前缀
        return resource.getURI().toString();
    }
}

ps: 使用一个map记录上传了哪些分片,这里将分片存在了本地的文件夹,等到分片都上传完成后合并并删除分片。用ConcurrentHashMap代替HashMap是因为它在多线程下是安全的。
以上只是一个简单的文件上传代码,但是只要在这上面另做修改就可以解决上面提到的问题。

二、解决问题

1. 怎么避免大量的硬盘读写
上面代码有一个弊端,就是将分片的内容存在了本地的文件夹里。而且在合并的时候判断上传是否完全也是从文件夹读取文件的。对磁盘的大量读写操作不仅速度慢,还会导致服务器崩溃,因此下面代码使用了redis来存储分片信息,避免对磁盘过多读写。(你也可以使用mysql或者其他中间件来存储信息,由于读写尽量不要在mysql,所以我使用了redis)。

2.目标文件过大,如果在上传过程中断开了怎么办
使用redis来存储分片内容,当断开后,文件信息还是存储在redis中,用户再次上传时,检测redis是否有该分片的内容,如果有则跳过。

3. 前端页面上传的文件数据与原文件数据不一致该如何发现
前端在调用上传接口时,先计算文件的校验和,然后将文件和校验和一并传给后端,后端对文件再计算一次校验和,两个校验和进行对比,如果相等,则说明数据一致,如果不一致则报错,让前端重新上传该片段。 js计算校验和代码:

// 计算文件的 SHA-256 校验和
//javascript代码
    function calculateHash(fileChunk) {
        return new Promise((resolve, reject) => {
            const blob = new Blob([fileChunk]);
            const reader = new FileReader();
            reader.readAsArrayBuffer(blob);
            reader.onload = () => {
                const arrayBuffer = reader.result;
                const crypto = window.crypto || window.msCrypto;
                const digest = crypto.subtle.digest("SHA-256", arrayBuffer);
                digest.then(hash => {
                    const hashArray = Array.from(new Uint8Array(hash));
                    const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
                    resolve(hashHex);
                });
            };
            reader.onerror = () => {
                reject(new Error('Failed to calculate hash'));
            };
        });
    }
//java代码
public static String calculateHash(byte[] fileChunk) throws Exception {
        MessageDigest md = MessageDigest.getInstance("SHA-256");
        md.update(fileChunk);
        byte[] hash = md.digest();
        ByteBuffer byteBuffer = ByteBuffer.wrap(hash);
        StringBuilder hexString = new StringBuilder();
        while (byteBuffer.hasRemaining()) {
            hexString.append(String.format("%02x", byteBuffer.get()));
        }
        return hexString.toString();
    }

注意点:

1,这里前端和后端计算校验和的算法一定要是一致的,不然得不到相同的结果。
2,在前端中使用了crypto对文件进行计算,需要引入相关的js。 你可以使用script引入也可以直接下载js
<script src="https://cdn.bootcss.com/crypto-js/3.1.9-1/crypto-js.min.js"></script>

crypto的下载地址 如果github打不开,可能需要使用npm下载了

4. 上传过程中如果断开了应该如何判断哪些分片没有上传

对redis检测哪个分片的下标不存在,若不存在则存入list,最后将list返回给前端

//java代码
boolean allChunksUploaded = true;
	List<Integer> missingChunkIndexes = new ArrayList<>();
	 for (int i = 0; i < hashMap.size(); i++) {
	     if (!hashMap.containsKey(String.valueOf(i))) {
	         allChunksUploaded = false;
	         missingChunkIndexes.add(i);
	     }
	 }
	 if (!allChunksUploaded) {
	     return ResponseEntity.status(HttpStatus.BAD_REQUEST).body(missingChunkIndexes);
	 }

三、完整代码

1、引入依赖

<dependency>
  	<groupId>io.lettuce</groupId>
    <artifactId>lettuce-core</artifactId>
    <version>6.1.4.RELEASE</version>
</dependency>
<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>

lettuce是一个Redis客户端,你也可以不引入,直接使用redisTemplat就行了

2、前端代码

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>File Upload Demo</title>
</head>
<body>
<input type="file" id="fileInput" multiple>
<button type="button" onclick="uploadFile()" >上传</button>
<div id="progressBar"></div>
<script src="https://cdn.bootcss.com/crypto-js/3.1.9-1/crypto-js.min.js"></script>
<script>
    var fileId = "";
    var fileName = null;
    var file;
    const chunkSize = 1024 * 10; // 每个分片的大小10KB

    async function uploadFile() {
        var fileInput = document.getElementById('fileInput');
        file = fileInput.files[0];
        fileName = document.getElementById("fileInput").files[0].name;
        // 分片上传文件
        const chunks = Math.ceil(file.size / chunkSize);
        for (let i = 0; i < chunks; i++) {
            try {
                await uploadChunk(file, i);
            } catch (error) {
                console.error('Failed to upload chunk', i, error);
                // 如果上传失败,则尝试恢复上传
                try {
                    await uploadChunk(file, i);
                } catch (error) {
                    console.error('Failed to resume upload', i, error);
                    return;
                }
            }
        }
        // 合并文件
        try {
            const fileUrl = await mergeFile();
            console.log('File URL:', fileUrl);
        } catch (error) {
            console.error('Failed to merge file', error);
        }
    }
    function uploadChunk(file, chunkIndex) {
        return new Promise((resolve, reject) => {
            let fileTemp = file.slice(chunkIndex * chunkSize, (chunkIndex + 1) * chunkSize);
            var myPromise = calculateHash(fileTemp);
            myPromise.then(result =>{
                const formData = new FormData();
                formData.append('chunk',fileTemp);
                formData.append('chunkIndex', chunkIndex);
                formData.append('chunkChecksum', result);
                formData.append('chunkSize', chunkSize);
                formData.append('fileId',fileId);
                const xhr = new XMLHttpRequest();
                xhr.open('POST', '/hospital/file2/upload', true);

                xhr.onload = () => {
                    if (xhr.status === 200) {
                        resolve(xhr.response);
                        fileId = xhr.responseText
                    } else {
                        reject(xhr.statusText);
                    }
                };

                xhr.onerror = () => {
                    reject(xhr.statusText);
                };
                xhr.send(formData);
            })
        });
    }

    function mergeFile() {
        return new Promise((resolve, reject) => {
            const xhr = new XMLHttpRequest();
            const formData = new FormData();
            formData.append('fileId',fileId);
            formData.append('fileName',fileName);
            xhr.open('POST', '/hospital/file2/merge', true);

            xhr.onload = () => {
                if (xhr.status === 200) {
                    resolve(xhr.response);
                } else {
                    reject(xhr.statusText);
                    resume(xhr.response.replace(/\[|]/g,'').split(','));
                }
            };

            xhr.onerror = () => {
                reject(xhr.statusText);

            };

            xhr.send(formData);
        });
    }
    async function resume(list){
        for (let i = 0; i < list.length; i++) {
            try {
                await uploadChunk(file, i);
            } catch (error) {
                console.error('Failed to upload chunk', i, error);
                // 如果上传失败,则尝试恢复上传
                try {
                    await uploadChunk(file, i);
                } catch (error) {
                    console.error('Failed to resume upload', i, error);
                    return;
                }
            }
        }
        // 合并文件
        try {
            const fileUrl = await mergeFile();
            console.log('File URL:', fileUrl);
        } catch (error) {
            console.error('Failed to merge file', error);
        }
    }

    // 计算文件的 SHA-256 校验和
    function calculateHash(fileChunk) {
        return new Promise((resolve, reject) => {
            const blob = new Blob([fileChunk]);
            const reader = new FileReader();
            reader.readAsArrayBuffer(blob);
            reader.onload = () => {
                const arrayBuffer = reader.result;
                const crypto = window.crypto || window.msCrypto;
                const digest = crypto.subtle.digest("SHA-256", arrayBuffer);
                digest.then(hash => {
                    const hashArray = Array.from(new Uint8Array(hash));
                    const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
                    resolve(hashHex);
                });
            };
            reader.onerror = () => {
                reject(new Error('Failed to calculate hash'));
            };
        });
    }

</script>
</body>
</html>

3、后端接口代码

@RestController
@RequestMapping("/file2")
public class File2Controller {

    private static final String FILE_UPLOAD_PREFIX = "file_upload:";

    @Autowired
    private ResourceLoader resourceLoader;

    @Value("${my.config.savePath}")
    private String uploadPath;
    @Autowired
    private ThreadLocal<RedisConnection> redisConnectionThreadLocal;
    
//    @Autowired
//    private RedisTemplate redisTemplate;

    @PostMapping("/upload")
    public ResponseEntity<?> uploadFile(@RequestParam("chunk") MultipartFile chunk,
                                        @RequestParam("chunkIndex") Integer chunkIndex,
                                        @RequestParam("chunkSize") Integer chunkSize,
                                        @RequestParam("chunkChecksum") String chunkChecksum,
                                        @RequestParam("fileId") String fileId) throws Exception {
        if (StringUtils.isBlank(fileId) || StringUtils.isEmpty(fileId)) {
            fileId = UUID.randomUUID().toString();
        }
        String key = FILE_UPLOAD_PREFIX + fileId;
        byte[] chunkBytes = chunk.getBytes();
        String actualChecksum = calculateHash(chunkBytes);
        if (!chunkChecksum.equals(actualChecksum)) {
            return ResponseEntity.status(HttpStatus.BAD_REQUEST).body("Chunk checksum does not match");
        }
//        if(!redisTemplate.opsForHash().hasKey(key,String.valueOf(chunkIndex))) {
//            redisTemplate.opsForHash().put(key, String.valueOf(chunkIndex), chunkBytes);
//        }
        RedisConnection connection = redisConnectionThreadLocal.get();

        Boolean flag = connection.hExists(key.getBytes(), String.valueOf(chunkIndex).getBytes());
        if (flag==null || flag == false) {
            connection.hSet(key.getBytes(), String.valueOf(chunkIndex).getBytes(), chunkBytes);
        }

        return ResponseEntity.ok(fileId);

    }

    public static String calculateHash(byte[] fileChunk) throws Exception {
        MessageDigest md = MessageDigest.getInstance("SHA-256");
        md.update(fileChunk);
        byte[] hash = md.digest();
        ByteBuffer byteBuffer = ByteBuffer.wrap(hash);
        StringBuilder hexString = new StringBuilder();
        while (byteBuffer.hasRemaining()) {
            hexString.append(String.format("%02x", byteBuffer.get()));
        }
        return hexString.toString();
    }

    @PostMapping("/merge")
    public ResponseEntity<?> mergeFile(@RequestParam("fileId") String fileId, @RequestParam("fileName") String fileName) throws IOException {
        String key = FILE_UPLOAD_PREFIX + fileId;
        RedisConnection connection = redisConnectionThreadLocal.get();
        try {
            Map<byte[], byte[]> chunkMap = connection.hGetAll(key.getBytes());
//            Map chunkMap = redisTemplate.opsForHash().entries(key);
            if (chunkMap.isEmpty()) {
                return ResponseEntity.status(HttpStatus.NOT_FOUND).body("File not found");
            }

            Map<String,byte[]> hashMap = new HashMap<>();
            for(Map.Entry<byte[],byte[]> entry :chunkMap.entrySet()){
                hashMap.put((new String(entry.getKey())),entry.getValue());
            }
            // 检测是否所有分片都上传了
            boolean allChunksUploaded = true;
            List<Integer> missingChunkIndexes = new ArrayList<>();
            for (int i = 0; i < hashMap.size(); i++) {
                if (!hashMap.containsKey(String.valueOf(i))) {
                    allChunksUploaded = false;
                    missingChunkIndexes.add(i);
                }
            }
            if (!allChunksUploaded) {
                return ResponseEntity.status(HttpStatus.BAD_REQUEST).body(missingChunkIndexes);
            }

            File outputFile = new File(uploadPath, fileName);
            boolean flag = mergeChunks(hashMap, outputFile);
            Resource resource = resourceLoader.getResource("file:" + uploadPath + fileName);


            if (flag == true) {
                connection.del(key.getBytes());
//                redisTemplate.delete(key);
                return ResponseEntity.ok().body(resource.getURI().toString());
            } else {
                return ResponseEntity.status(555).build();
            }
        } catch (Exception e) {
            e.printStackTrace();
            return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(e.getMessage());
        }
    }

    private boolean mergeChunks(Map<String, byte[]> chunkMap, File destFile) {
        try (FileOutputStream outputStream = new FileOutputStream(destFile)) {
            // 将分片按照顺序合并
            for (int i = 0; i < chunkMap.size(); i++) {
                byte[] chunkBytes = chunkMap.get(String.valueOf(i));
                outputStream.write(chunkBytes);
            }
            return true;
        } catch (IOException e) {
            e.printStackTrace();
            return false;
        }
    }
}

4、redis配置

@Configuration
public class RedisConfig {
    @Value("${spring.redis.host}")
    private String host;

    @Value("${spring.redis.port}")
    private int port;

    @Value("${spring.redis.password}")
    private String password;

    @Bean
    public RedisConnectionFactory redisConnectionFactory() {
        RedisStandaloneConfiguration config = new RedisStandaloneConfiguration();
        config.setHostName(host);
        config.setPort(port);
        config.setPassword(RedisPassword.of(password));
        return new LettuceConnectionFactory(config);
    }
    @Bean
    public ThreadLocal<RedisConnection> redisConnectionThreadLocal(RedisConnectionFactory redisConnectionFactory) {
        return ThreadLocal.withInitial(() -> redisConnectionFactory.getConnection());
    }
}

使用 redisConnectionThreadLocal 是为了避免多次建立连接,很耗时间

总结

以上就是该功能的完整代码。使用代码记得修改uploadPath,避免代码找不到目录路径。在代码最后,可以使用mysql对整个文件计算校验和,将校验和结果和文件名、文件大小、文件类型存入数据库中,在下次大文件上传前先判断是否存在。若存在就不要上传避免占用空间。

 

参考文章:http://blog.ncmem.com/wordpress/2023/10/12/java%e5%ae%9e%e7%8e%b0%e6%96%87%e4%bb%b6%e5%88%86%e7%89%87%e4%b8%8a%e4%bc%a0%e5%b9%b6%e4%b8%94%e6%96%ad%e7%82%b9%e7%bb%ad%e4%bc%a0/

欢迎入群一起讨论

 

 

posted on 2023-10-12 18:30  Xproer-松鼠  阅读(366)  评论(0编辑  收藏  举报