guessContentTypeFromStream
先贴段 guessContentTypeFromStream 实现代码
感觉只判断前几个字节的话,还是有点问题的
比如我一个txt文本
以RIFF开头就会被判别成
audio/x-wav
总之有点不够严谨,但一般情况下应该是够用了
static public String guessContentTypeFromStream(InputStream is)
throws IOException {
// If we can't read ahead safely, just give up on guessing
if (!is.markSupported())
return null;
is.mark(12);
int c1 = is.read();
int c2 = is.read();
int c3 = is.read();
int c4 = is.read();
int c5 = is.read();
int c6 = is.read();
int c7 = is.read();
int c8 = is.read();
int c9 = is.read();
int c10 = is.read();
int c11 = is.read();
is.reset();
if (c1 == 0xCA && c2 == 0xFE && c3 == 0xBA && c4 == 0xBE) {
return "application/java-vm" ;
}
if (c1 == 0xAC && c2 == 0xED) {
// next two bytes are version number, currently 0x00 0x05
return "application/x-java-serialized-object" ;
}
if (c1 == '<' ) {
if (c2 == '!'
|| ((c2 == 'h' && (c3 == 't' && c4 == 'm' && c5 == 'l' ||
c3 == 'e' && c4 == 'a' && c5 == 'd') ||
(c2 == 'b' && c3 == 'o' && c4 == 'd' && c5 == 'y'))) ||
((c2 == 'H' && (c3 == 'T' && c4 == 'M' && c5 == 'L' ||
c3 == 'E' && c4 == 'A' && c5 == 'D') ||
(c2 == 'B' && c3 == 'O' && c4 == 'D' && c5 == 'Y')))) {
return "text/html";
}
if (c2 == '?' && c3 == 'x' && c4 == 'm' && c5 == 'l' && c6 == ' ') {
return "application/xml" ;
}
}
// big and little endian UTF-16 encodings, with byte order mark
if (c1 == 0xfe && c2 == 0xff) {
if (c3 == 0 && c4 == '<' && c5 == 0 && c6 == '?' &&
c7 == 0 && c8 == 'x') {
return "application/xml" ;
}
}
if (c1 == 0xff && c2 == 0xfe) {
if (c3 == '<' && c4 == 0 && c5 == '?' && c6 == 0 &&
c7 == 'x' && c8 == 0) {
return "application/xml" ;
}
}
if (c1 == 'G' && c2 == 'I' && c3 == 'F' && c4 == '8' ) {
return "image/gif";
}
if (c1 == '#' && c2 == 'd' && c3 == 'e' && c4 == 'f' ) {
return "image/x-bitmap";
}
if (c1 == '!' && c2 == ' ' && c3 == 'X' && c4 == 'P' &&
c5 == 'M' && c6 == '2') {
return "image/x-pixmap";
}
if (c1 == 137 && c2 == 80 && c3 == 78 &&
c4 == 71 && c5 == 13 && c6 == 10 &&
c7 == 26 && c8 == 10) {
return "image/png";
}
if (c1 == 0xFF && c2 == 0xD8 && c3 == 0xFF) {
if (c4 == 0xE0) {
return "image/jpeg";
}
/**
* File format used by digital cameras to store images.
* Exif Format can be read by any application supporting
* JPEG. Exif Spec can be found at:
* http://www.pima.net/standards/it10/PIMA15740/Exif_2 -1.PDF
*/
if ((c4 == 0xE1) &&
(c7 == 'E' && c8 == 'x' && c9 == 'i' && c10 == 'f' &&
c11 == 0)) {
return "image/jpeg";
}
if (c4 == 0xEE) {
return "image/jpg";
}
}
if (c1 == 0xD0 && c2 == 0xCF && c3 == 0x11 && c4 == 0xE0 &&
c5 == 0xA1 && c6 == 0xB1 && c7 == 0x1A && c8 == 0xE1) {
/* Above is signature of Microsoft Structured Storage.
* Below this, could have tests for various SS entities.
* For now, just test for FlashPix.
*/
if ( checkfpx(is)) {
return "image/vnd.fpx";
}
}
if (c1 == 0x2E && c2 == 0x73 && c3 == 0x6E && c4 == 0x64) {
return "audio/basic"; // .au format, big endian
}
if (c1 == 0x64 && c2 == 0x6E && c3 == 0x73 && c4 == 0x2E) {
return "audio/basic"; // .au format, little endian
}
if (c1 == 'R' && c2 == 'I' && c3 == 'F' && c4 == 'F' ) {
/* I don't know if this is official but evidence
* suggests that .wav files start with "RIFF" - brown
*/
return "audio/x-wav";
}
return null;
}
posted on 2012-12-03 18:39 luckistmaomao 阅读(500) 评论(0) 编辑 收藏 举报