guessContentTypeFromStream

先贴段 guessContentTypeFromStream 实现代码
感觉只判断前几个字节的话,还是有点问题的
 
比如我一个txt文本
以RIFF开头就会被判别成
audio/x-wav
总之有点不够严谨,但一般情况下应该是够用了
 
 
 
static public String guessContentTypeFromStream(InputStream is)
                   throws IOException {
       // If we can't read ahead safely, just give up on guessing
       if (!is.markSupported())
          return null;
 
      is.mark(12);
       int c1 = is.read();
       int c2 = is.read();
       int c3 = is.read();
       int c4 = is.read();
       int c5 = is.read();
       int c6 = is.read();
       int c7 = is.read();
       int c8 = is.read();
       int c9 = is.read();
       int c10 = is.read();
       int c11 = is.read();
      is.reset();
 
       if (c1 == 0xCA && c2 == 0xFE && c3 == 0xBA && c4 == 0xBE) {
          return "application/java-vm" ;
      }
 
       if (c1 == 0xAC && c2 == 0xED) {
          // next two bytes are version number, currently 0x00 0x05
          return "application/x-java-serialized-object" ;
      }
 
       if (c1 == '<' ) {
          if (c2 == '!'
            || ((c2 == 'h' && (c3 == 't' && c4 == 'm' && c5 == 'l' ||
                           c3 == 'e' && c4 == 'a' && c5 == 'd') ||
            (c2 == 'b' && c3 == 'o' && c4 == 'd' && c5 == 'y'))) ||
            ((c2 == 'H' && (c3 == 'T' && c4 == 'M' && c5 == 'L' ||
                        c3 == 'E' && c4 == 'A' && c5 == 'D') ||
            (c2 == 'B' && c3 == 'O' && c4 == 'D' && c5 == 'Y')))) {
             return "text/html";
          }
 
          if (c2 == '?' && c3 == 'x' && c4 == 'm' && c5 == 'l' && c6 == ' ') {
             return "application/xml" ;
          }
      }
 
       // big and little endian UTF-16 encodings, with byte order mark
       if (c1 == 0xfe && c2 == 0xff) {
          if (c3 == 0 && c4 == '<' && c5 == 0 && c6 == '?' &&
            c7 == 0 && c8 == 'x') {
             return "application/xml" ;
          }
      }
 
       if (c1 == 0xff && c2 == 0xfe) {
          if (c3 == '<' && c4 == 0 && c5 == '?' && c6 == 0 &&
            c7 == 'x' && c8 == 0) {
             return "application/xml" ;
          }
      }
 
       if (c1 == 'G' && c2 == 'I' && c3 == 'F' && c4 == '8' ) {
          return "image/gif";
      }
 
       if (c1 == '#' && c2 == 'd' && c3 == 'e' && c4 == 'f' ) {
          return "image/x-bitmap";
      }
 
       if (c1 == '!' && c2 == ' ' && c3 == 'X' && c4 == 'P' &&
                  c5 == 'M' && c6 == '2') {
          return "image/x-pixmap";
      }
 
       if (c1 == 137 && c2 == 80 && c3 == 78 &&
            c4 == 71 && c5 == 13 && c6 == 10 &&
            c7 == 26 && c8 == 10) {
          return "image/png";
      }
 
       if (c1 == 0xFF && c2 == 0xD8 && c3 == 0xFF) {
          if (c4 == 0xE0) {
              return "image/jpeg";
          }
 
          /**
             * File format used by digital cameras to store images.
             * Exif Format can be read by any application supporting
             * JPEG. Exif Spec can be found at:
             * http://www.pima.net/standards/it10/PIMA15740/Exif_2 -1.PDF
             */
            if ((c4 == 0xE1) &&
                (c7 == 'E' && c8 == 'x' && c9 == 'i' && c10 == 'f' &&
                 c11 == 0)) {
                return "image/jpeg";
            }
 
          if (c4 == 0xEE) {
             return "image/jpg";
          }
      }
 
       if (c1 == 0xD0 && c2 == 0xCF && c3 == 0x11 && c4 == 0xE0 &&
          c5 == 0xA1 && c6 == 0xB1 && c7 == 0x1A && c8 == 0xE1) {
 
          /* Above is signature of Microsoft Structured Storage.
           * Below this, could have tests for various SS entities.
           * For now, just test for FlashPix.
           */
          if ( checkfpx(is)) {
             return "image/vnd.fpx";
          }
      }
 
       if (c1 == 0x2E && c2 == 0x73 && c3 == 0x6E && c4 == 0x64) {
          return "audio/basic";  // .au format, big endian
      }
 
       if (c1 == 0x64 && c2 == 0x6E && c3 == 0x73 && c4 == 0x2E) {
          return "audio/basic";  // .au format, little endian
      }
 
       if (c1 == 'R' && c2 == 'I' && c3 == 'F' && c4 == 'F' ) {
          /* I don't know if this is official but evidence
           * suggests that .wav files start with "RIFF" - brown
           */
          return "audio/x-wav"
      }
       return null;
    }

posted on 2012-12-03 18:39  luckistmaomao  阅读(500)  评论(0编辑  收藏  举报

导航