tika判断是否是纯文本文件
判断是否是纯文本文件
import com.google.common.collect.Lists;
import com.jdl.jscaffold.exception.BusinessException;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.filefilter.IOFileFilter;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.tika.Tika;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
@Slf4j
public class FileUtils {
public static boolean isTextFile(File file){
Tika tika = new Tika();
List<String> contentTypes = Lists.newArrayList(
"application/json",
"application/xml",
"application/xhtml+xml",
"application/sql",
"application/ld+json",
"application/x-yaml"
);
try {
String mimeType = tika.detect(file);
// 判断是否为纯文本类型
if (mimeType.startsWith("text/")){
return true;
}
if (contentTypes.contains(mimeType)) {
return true;
}
return false;
} catch (IOException e) {
log.error("judge file text error",e);
throw new BusinessException("判断文件" + file.getName() + "是否纯文本出现error",e);
}
}
}
原创:做时间的朋友