RandomAccessFile读取大文件时效率很低,现进行改进---BufferedRandomAccessFile

  /**
   * Licensed to the Apache Software Foundation (ASF) under one
   * or more contributor license agreements.  See the NOTICE file
   * distributed with this work for additional information
   * regarding copyright ownership.  The ASF licenses this file
   * to you under the Apache License, Version 2.0 (the
   * "License"); you may not use this file except in compliance
   * with the License.  You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  import java.io.File;
  import java.io.FileNotFoundException;
  import java.io.IOException;
  import java.io.RandomAccessFile;
  import java.util.Arrays;
  
  /**
   * A <code>BufferedRandomAccessFile</code> is like a
   * <code>RandomAccessFile</code>, but it uses a private buffer so that most
   * operations do not require a disk access.
   * <P>
   * 
   * Note: The operations on this class are unmonitored. Also, the correct
   * functioning of the <code>RandomAccessFile</code> methods that are not
   * overridden here relies on the implementation of those methods in the
   * superclass.
   */
  
  public final class BufferedRandomAccessFile extends RandomAccessFile
  {
      static final int LogBuffSz_ = 16; // 64K buffer
      public static final int BuffSz_ = (1 << LogBuffSz_);
      static final long BuffMask_ = ~(((long) BuffSz_) - 1L);
  
      private String path_;
      
      /*
       * This implementation is based on the buffer implementation in Modula-3's
       * "Rd", "Wr", "RdClass", and "WrClass" interfaces.
       */
      private boolean dirty_; // true iff unflushed bytes exist
      private boolean syncNeeded_; // dirty_ can be cleared by e.g. seek, so track sync separately
      private long curr_; // current position in file
      private long lo_, hi_; // bounds on characters in "buff"
      private byte[] buff_; // local buffer
      private long maxHi_; // this.lo + this.buff.length
      private boolean hitEOF_; // buffer contains last file block?
      private long diskPos_; // disk position
  
      /*
      * To describe the above fields, we introduce the following abstractions for
      * the file "f":
      *
      * len(f) the length of the file curr(f) the current position in the file
      * c(f) the abstract contents of the file disk(f) the contents of f's
      * backing disk file closed(f) true iff the file is closed
      *
      * "curr(f)" is an index in the closed interval [0, len(f)]. "c(f)" is a
      * character sequence of length "len(f)". "c(f)" and "disk(f)" may differ if
      * "c(f)" contains unflushed writes not reflected in "disk(f)". The flush
      * operation has the effect of making "disk(f)" identical to "c(f)".
      *
      * A file is said to be *valid* if the following conditions hold:
      *
      * V1. The "closed" and "curr" fields are correct:
      *
      * f.closed == closed(f) f.curr == curr(f)
      *
      * V2. The current position is either contained in the buffer, or just past
      * the buffer:
      *
      * f.lo <= f.curr <= f.hi
      *
      * V3. Any (possibly) unflushed characters are stored in "f.buff":
      *
      * (forall i in [f.lo, f.curr): c(f)[i] == f.buff[i - f.lo])
      *
      * V4. For all characters not covered by V3, c(f) and disk(f) agree:
      *
      * (forall i in [f.lo, len(f)): i not in [f.lo, f.curr) => c(f)[i] ==
      * disk(f)[i])
      *
      * V5. "f.dirty" is true iff the buffer contains bytes that should be
      * flushed to the file; by V3 and V4, only part of the buffer can be dirty.
      *
      * f.dirty == (exists i in [f.lo, f.curr): c(f)[i] != f.buff[i - f.lo])
      *
      * V6. this.maxHi == this.lo + this.buff.length
      *
      * Note that "f.buff" can be "null" in a valid file, since the range of
      * characters in V3 is empty when "f.lo == f.curr".
      *
      * A file is said to be *ready* if the buffer contains the current position,
      * i.e., when:
      *
      * R1. !f.closed && f.buff != null && f.lo <= f.curr && f.curr < f.hi
      *
      * When a file is ready, reading or writing a single byte can be performed
      * by reading or writing the in-memory buffer without performing a disk
      * operation.
      */
      
      /**
       * Open a new <code>BufferedRandomAccessFile</code> on <code>file</code>
       * in mode <code>mode</code>, which should be "r" for reading only, or
       * "rw" for reading and writing.
       */
      public BufferedRandomAccessFile(File file, String mode) throws IOException
      {
          this(file, mode, 0);
      }
      
      public BufferedRandomAccessFile(File file, String mode, int size) throws IOException
      {
          super(file, mode);
          path_ = file.getAbsolutePath();
          this.init(size);
      }
      
      /**
       * Open a new <code>BufferedRandomAccessFile</code> on the file named
       * <code>name</code> in mode <code>mode</code>, which should be "r" for
       * reading only, or "rw" for reading and writing.
       */
      public BufferedRandomAccessFile(String name, String mode) throws IOException
      {
          this(name, mode, 0);
      }
      
      public BufferedRandomAccessFile(String name, String mode, int size) throws FileNotFoundException
      {
          super(name, mode);
          path_ = name;
          this.init(size);
      }
      
      private void init(int size)
      {
          this.dirty_ = false;
          this.lo_ = this.curr_ = this.hi_ = 0;
          this.buff_ = (size > BuffSz_) ? new byte[size] : new byte[BuffSz_];
          this.maxHi_ = (long) BuffSz_;
          this.hitEOF_ = false;
          this.diskPos_ = 0L;
      }
  
      public String getPath()
      {
          return path_;
      }
  
      public void sync() throws IOException
      {
          if (syncNeeded_)
          {
              flush();
              getChannel().force(true);
              syncNeeded_ = false;
          }
      }
  
//      public boolean isEOF() throws IOException
//      {
//          assert getFilePointer() <= length();
//          return getFilePointer() == length();
//      }
  
      public void close() throws IOException
      {
          this.flush();
          this.buff_ = null;
          super.close();
      }
      
      /**
       * Flush any bytes in the file's buffer that have not yet been written to
       * disk. If the file was created read-only, this method is a no-op.
       */
      public void flush() throws IOException
      {        
          this.flushBuffer();
      }
      
      /* Flush any dirty bytes in the buffer to disk. */
      private void flushBuffer() throws IOException
      {   
          if (this.dirty_)
          {
              if (this.diskPos_ != this.lo_)
                  super.seek(this.lo_);
              int len = (int) (this.curr_ - this.lo_);
              super.write(this.buff_, 0, len);
              this.diskPos_ = this.curr_;             
              this.dirty_ = false;
          }
      }
      
      /*
       * Read at most "this.buff.length" bytes into "this.buff", returning the
       * number of bytes read. If the return result is less than
       * "this.buff.length", then EOF was read.
       */
      private int fillBuffer() throws IOException
      {
          int cnt = 0;
          int rem = this.buff_.length;
          while (rem > 0)
          {
              int n = super.read(this.buff_, cnt, rem);
              if (n < 0)
                  break;
              cnt += n;
              rem -= n;
          }
          if ( (cnt < 0) && (this.hitEOF_ = (cnt < this.buff_.length)) )
          {
              // make sure buffer that wasn't read is initialized with -1
              Arrays.fill(this.buff_, cnt, this.buff_.length, (byte) 0xff);
          }
          this.diskPos_ += cnt;
          return cnt;
      }
      
      /*
       * This method positions <code>this.curr</code> at position <code>pos</code>.
       * If <code>pos</code> does not fall in the current buffer, it flushes the
       * current buffer and loads the correct one.<p>
       * 
       * On exit from this routine <code>this.curr == this.hi</code> iff <code>pos</code>
       * is at or past the end-of-file, which can only happen if the file was
       * opened in read-only mode.
       */
      public void seek(long pos) throws IOException
      {
          if (pos >= this.hi_ || pos < this.lo_)
          {
              // seeking outside of current buffer -- flush and read             
              this.flushBuffer();
              this.lo_ = pos & BuffMask_; // start at BuffSz boundary
              this.maxHi_ = this.lo_ + (long) this.buff_.length;
              if (this.diskPos_ != this.lo_)
              {
                  super.seek(this.lo_);
                  this.diskPos_ = this.lo_;
              }
              int n = this.fillBuffer();
              this.hi_ = this.lo_ + (long) n;
          }
          else
          {
              // seeking inside current buffer -- no read required
              if (pos < this.curr_)
              {
                  // if seeking backwards, we must flush to maintain V4
                  this.flushBuffer();
              }
          }
          this.curr_ = pos;
      }
      
      public long getFilePointer()
      {
          return this.curr_;
      }
  
      public long length() throws IOException
      {
          // max accounts for the case where we have written past the old file length, but not yet flushed our buffer
          return Math.max(this.curr_, super.length());
      }
  
      public int read() throws IOException
      {
          if (this.curr_ >= this.hi_)
          {
              // test for EOF
              // if (this.hi < this.maxHi) return -1;
              if (this.hitEOF_)
                  return -1;
              
              // slow path -- read another buffer
              this.seek(this.curr_);
              if (this.curr_ == this.hi_)
                  return -1;
          }
          byte res = this.buff_[(int) (this.curr_ - this.lo_)];
          this.curr_++;
          return ((int) res) & 0xFF; // convert byte -> int
      }
      
      public int read(byte[] b) throws IOException
      {
          return this.read(b, 0, b.length);
      }
      
      public int read(byte[] b, int off, int len) throws IOException
      {
          if (this.curr_ >= this.hi_)
          {
              // test for EOF
              // if (this.hi < this.maxHi) return -1;
              if (this.hitEOF_)
                  return -1;
              
              // slow path -- read another buffer
              this.seek(this.curr_);
              if (this.curr_ == this.hi_)
                  return -1;
          }
          len = Math.min(len, (int) (this.hi_ - this.curr_));
          int buffOff = (int) (this.curr_ - this.lo_);
          System.arraycopy(this.buff_, buffOff, b, off, len);
          this.curr_ += len;
          return len;
      }
      
      public void write(int b) throws IOException
      {
          if (this.curr_ >= this.hi_)
          {
              if (this.hitEOF_ && this.hi_ < this.maxHi_)
              {
                  // at EOF -- bump "hi"
                  this.hi_++;
              }
              else
              {
                  // slow path -- write current buffer; read next one
                  this.seek(this.curr_);
                  if (this.curr_ == this.hi_)
                  {
                      // appending to EOF -- bump "hi"
                      this.hi_++;
                  }
              }
          }
          this.buff_[(int) (this.curr_ - this.lo_)] = (byte) b;
          this.curr_++;
          this.dirty_ = true;
          syncNeeded_ = true;
      }
      
      public void write(byte[] b) throws IOException
      {
          this.write(b, 0, b.length);
      }
      
      public void write(byte[] b, int off, int len) throws IOException
      {        
          while (len > 0)
          {              
              int n = this.writeAtMost(b, off, len);
              off += n;
              len -= n;
              this.dirty_ = true;
              syncNeeded_ = true;
          }
      }
      
      /*
       * Write at most "len" bytes to "b" starting at position "off", and return
       * the number of bytes written.
       */
      private int writeAtMost(byte[] b, int off, int len) throws IOException
      {        
          if (this.curr_ >= this.hi_)
          {
              if (this.hitEOF_ && this.hi_ < this.maxHi_)
              {
                  // at EOF -- bump "hi"
                  this.hi_ = this.maxHi_;
              }
              else
              {                                
                  // slow path -- write current buffer; read next one                
                  this.seek(this.curr_);
                  if (this.curr_ == this.hi_)
                  {
                      // appending to EOF -- bump "hi"
                      this.hi_ = this.maxHi_;
                  }
              }
          }
          len = Math.min(len, (int) (this.hi_ - this.curr_));
          int buffOff = (int) (this.curr_ - this.lo_);
          System.arraycopy(b, off, this.buff_, buffOff, len);
          this.curr_ += len;
          return len;
      }
  }

仿照:http://www.ibm.com/developerworks/cn/java/l-javaio/

 转载 : http://blog.csdn.net/silentbalanceyh/article/details/5252285

posted @ 2013-08-18 01:41  编程小爬虫  阅读(3593)  评论(0编辑  收藏  举报