Java读取大文件

原文地址:http://wgslucky.blog.163.com/blog/static/97562532201332324639689/ 

java 读取一个巨大的文本文件既能保证内存不溢出又能保证性能  

 1 import java.io.BufferedReader;
 2 import java.io.File;
 3 import java.io.FileReader;
 4 import java.io.RandomAccessFile;
 5 import java.nio.ByteBuffer;
 6 import java.nio.MappedByteBuffer;
 7 import java.nio.channels.FileChannel;
 8 
 9 public class ReadBig {
10 public static String fff = "C:\\mq\\read\\from.xml";
11 
12 public static void main1(String[] args) throws Exception {
13 
14   final int BUFFER_SIZE = 0x300000;// 缓冲区大小为3M
15 
16   File f = new File(fff);
17 
18   /**
19    * 
20    * map(FileChannel.MapMode mode,long position, long size)
21    * 
22    * mode - 根据是按只读、读取/写入或专用(写入时拷贝)来映射文件,分别为 FileChannel.MapMode 类中所定义的
23    * READ_ONLY、READ_WRITE 或 PRIVATE 之一
24    * 
25    * position - 文件中的位置,映射区域从此位置开始;必须为非负数
26    * 
27    * size - 要映射的区域大小;必须为非负数且不大于 Integer.MAX_VALUE
28    * 
29    * 所以若想读取文件后半部分内容,如例子所写;若想读取文本后1/8内容,需要这样写map(FileChannel.MapMode.READ_ONLY,
30    * f.length()*7/8,f.length()/8)
31    * 
32    * 想读取文件所有内容,需要这样写map(FileChannel.MapMode.READ_ONLY, 0,f.length())
33    * 
34    */
35 
36   MappedByteBuffer inputBuffer = new RandomAccessFile(f, "r")
37     .getChannel().map(FileChannel.MapMode.READ_ONLY,
38       f.length() / 2, f.length() / 2);
39 
40   byte[] dst = new byte[BUFFER_SIZE];// 每次读出3M的内容
41 
42   long start = System.currentTimeMillis();
43 
44   for (int offset = 0; offset < inputBuffer.capacity(); offset += BUFFER_SIZE) {
45 
46    if (inputBuffer.capacity() - offset >= BUFFER_SIZE) {
47 
48     for (int i = 0; i < BUFFER_SIZE; i++)
49 
50      dst[i] = inputBuffer.get(offset + i);
51 
52    } else {
53 
54     for (int i = 0; i < inputBuffer.capacity() - offset; i++)
55 
56      dst[i] = inputBuffer.get(offset + i);
57 
58    }
59 
60    int length = (inputBuffer.capacity() % BUFFER_SIZE == 0) ? BUFFER_SIZE
61      : inputBuffer.capacity() % BUFFER_SIZE;
62 
63    System.out.println(new String(dst, 0, length));// new
64    // String(dst,0,length)这样可以取出缓存保存的字符串,可以对其进行操作
65 
66   }
67 
68   long end = System.currentTimeMillis();
69 
70   System.out.println("读取文件文件一半内容花费:" + (end - start) + "毫秒");
71 
72 }

 

 1 public static void main2(String[] args) throws Exception {
 2   int bufSize = 1024;
 3   byte[] bs = new byte[bufSize];
 4   ByteBuffer byteBuf = ByteBuffer.allocate(1024);
 5   FileChannel channel = new RandomAccessFile(fff, "r").getChannel();
 6   while (channel.read(byteBuf) != -1) {
 7    int size = byteBuf.position();
 8    byteBuf.rewind();
 9    byteBuf.get(bs); // 把文件当字符串处理,直接打印做为一个例子。
10    System.out.print(new String(bs, 0, size));
11    byteBuf.clear();
12   }
13 
14 }

 

1 public static void main(String[] args) throws Exception {
2   BufferedReader br = new BufferedReader(new FileReader(fff));
3   String line = null;
4   while ((line = br.readLine()) != null) {
5    System.out.println(line);
6   }
7 }

 

 1 public static void main(String[] args) throws Exception {
 2     int bufSize = 1024;
 3     byte[] bs = new byte[bufSize];
 4     ByteBuffer byteBuf = ByteBuffer.allocate(1024);
 5     FileChannel channel = new RandomAccessFile("d:\\filename","r").getChannel();
 6     while(channel.read(byteBuf) != -1) {
 7       int size = byteBuf.position();
 8       byteBuf.rewind();
 9       byteBuf.get(bs);
10       // 把文件当字符串处理,直接打印做为一个例子。
11       System.out.print(new String(bs, 0, size));
12       byteBuf.clear();
13     }
14   }
15 
16 }

 

java 读取大容量文件,内存溢出?怎么按几行读取,读取多次

 1 import java.io.BufferedReader;
 2 import java.io.FileNotFoundException;
 3 import java.io.FileReader;
 4 import java.io.IOException;
 5 import java.io.RandomAccessFile;
 6 import java.util.Scanner;
 7 
 8 public class TestPrint {
 9     public static void main(String[] args) throws IOException {
10         String path = "你要读的文件的路径";
11         RandomAccessFile br=new RandomAccessFile(path,"rw");//这里rw看你了。要是之都就只写r
12         String str = null, app = null;
13         int i=0;
14         while ((str = br.readLine()) != null) {
15             i++;
16             app=app+str;
17             if(i>=100){//假设读取100行
18                 i=0;
19 //                这里你先对这100行操作,然后继续读
20 app=null;
21             }
22         }
23         br.close();
24     }
25 
26 }

 

 

当逐行读写大于2G的文本文件时推荐使用以下代码

 

 1 void largeFileIO(String inputFile, String outputFile) {
 2         try {
 3             BufferedInputStream bis = new BufferedInputStream(new FileInputStream(new File(inputFile)));
 4             BufferedReader in = new BufferedReader(new InputStreamReader(bis, "utf-8"), 10 * 1024 * 1024);//10M缓存
 5             FileWriter fw = new FileWriter(outputFile);
 6             while (in.ready()) {
 7                 String line = in.readLine();
 8                 fw.append(line + " ");
 9             }
10             in.close();
11             fw.flush();
12             fw.close();
13         } catch (IOException ex) {
14             ex.printStackTrace();
15         }

jdk本身就支持超大文件的读写

  网上的文章基本分为两大类,一类是使用BufferedReader类读写超大文件;另一类是使用RandomAccessFile类读取,经过比较,最后使用了前一种方式进行超大文件的读取,下面是相关代码,其实很简单

1 File file = new File(filepath);   
2 BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));    
3 BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),5*1024*1024);// 用5M的缓冲读取文本文件  
4   
5 String line = "";
6 while((line = reader.readLine()) != null){
7 //TODO: write your business
8 }

 

posted on 2014-12-03 17:46  linksky1018  阅读(458)  评论(0编辑  收藏  举报