根据不同的编码格式读取txt文件内容

参考:https://blog.csdn.net/chiwang1984/article/details/8593240

import lombok.extern.slf4j.Slf4j;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;


@Slf4j
public class FileRead {
    public List<List<String>> readTxt(FileInputStream fis, String code) {
        List<List<String>> result = new ArrayList<>();

        InputStreamReader isr = null;
        String tmp;
        try {
            isr = new InputStreamReader(fis, code);
            BufferedReader bufferedReader = new BufferedReader(isr);
            while ((tmp = bufferedReader.readLine()) != null) {
                result.add(Collections.singletonList(tmp));
            }
        } catch (IOException e) {
            log.error("读取文件内容失败", e);
            throw new ApiException("上传文件按失败,请稍后再试!");
        } finally {
            if (isr != null) {
                try {
                    isr.close();
                } catch (IOException e) {
                    log.error("读取文件内容关闭流失败", e);
                }
            }
        }
        return result;
    }

    public String getCode(FileInputStream fis) {
        String code;
        boolean flag = true;
        DataInputStream dis = null;

        try {
            //取前100个左右字节进行判断
            byte[] c = new byte[104];
            dis = new DataInputStream(fis);
            dis.read(c);
            if (c[0] == (byte) 0xFF && c[1] == (byte) 0xFE) {
                code = "UTF-16LE";
            }else if (c[0] == (byte) 0xFE && c[1] == (byte) 0xFF) {
                code = "UTF-16BE";
            } else {
                int len = c.length - 4;//为了防止后面的数组取值越界
                for (int i = 0; i < len; i++) {
                    if ((c[i] >> 7 & 0xff) == 0x00) {
                        continue;
                    }
                    if ((c[i] >> 4 & 0xff) == 0xff && (c[++i] >> 6 & 0xff) == 0xfe && (c[++i] >> 6 & 0xff) == 0xfe && (c[++i] >> 6 & 0xff) == 0xfe) {
                        continue;
                    }
                    if ((c[i] >> 5 & 0xff) == 0xff && (c[++i] >> 6 & 0xff) == 0xfe && (c[++i] >> 6 & 0xff) == 0xfe) {
                        continue;
                    }
                    if ((c[i] >> 6 & 0xff) == 0xff && (c[++i] >> 6 & 0xff) == 0xfe) {
                        continue;
                    }
                    flag = false;
                    break;
                }
                if (flag){
                    code = "UTF-8";
                }else {
                    code = "gbk";
                }
            }

        } catch (IOException e) {
            log.error("读取文件编码类型失败", e);
            throw new ApiException("上传文件按失败,请稍后再试!");
        } finally {
            if (dis != null) {
                try {
                    dis.close();
                } catch (IOException e) {
                    log.error("读取文件编码类型关闭流失败", e);
                }
            }
        }
        return code;
    }
}
posted @   MaC-Matthew  阅读(72)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
点击右上角即可分享
微信分享提示