tika判断是否是纯文本文件

判断是否是纯文本文件

import com.google.common.collect.Lists;
import com.jdl.jscaffold.exception.BusinessException;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.filefilter.IOFileFilter;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.tika.Tika;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;

@Slf4j
public class FileUtils {
    public static boolean isTextFile(File file){
          Tika tika = new Tika();

          List<String> contentTypes = Lists.newArrayList(
                  "application/json",
                  "application/xml",
                  "application/xhtml+xml",
                  "application/sql",
                  "application/ld+json",
                  "application/x-yaml"
          );

          try {
              String mimeType = tika.detect(file);
              // 判断是否为纯文本类型
              if (mimeType.startsWith("text/")){
                  return true;
              }
              if (contentTypes.contains(mimeType)) {
                  return true;
              }

              return false;
          } catch (IOException e) {
              log.error("judge file text error",e);
              throw new BusinessException("判断文件" + file.getName() + "是否纯文本出现error",e);
          }
      }
}
posted @ 2024-03-22 15:20  SpecialSpeculator  阅读(28)  评论(0编辑  收藏  举报