将一个无法一次读入内存的大文件排序
弄了一整天才弄出来 直接上代码
1 package com.test; 2 3 import java.io.BufferedReader; 4 import java.io.BufferedWriter; 5 import java.io.File; 6 import java.io.FileReader; 7 import java.io.FileWriter; 8 import java.io.IOException; 9 import java.util.Collections; 10 import java.util.Comparator; 11 import java.util.Iterator; 12 import java.util.LinkedList; 13 import java.util.List; 14 import java.util.PriorityQueue; 15 import java.util.Random; 16 17 /** 18 * 19 * @author wangyuyuan 20 * 将一个大文件中的数据排序 无法一次读入内存情况的处理方法 21 * 22 */ 23 public class LargeDataSortTest { 24 static File file = new File("E:"+File.separator+"dataTest"+File.separator+"data.txt"); 25 static File file1 = new File("E:"+File.separator+"dataTest"+File.separator+"dataSorted.txt"); 26 public static void main(String[] args) throws Exception{ 27 createData(); 28 System.out.println("大文件写入成功"); 29 separateFile(); 30 System.out.println("文件拆分成功"); 31 32 everySingleFileSort(); 33 System.out.println("小文件排序完成"); 34 mergeFile(); 35 System.out.println("所有排序都已完成"); 36 37 } 38 public static void createData() throws IOException{ 39 FileWriter fw = new FileWriter(file); 40 BufferedWriter bw = new BufferedWriter(fw); 41 Random random = new Random(); 42 for(int i=0;i<1000000;i++){ 43 bw.write(random.nextInt(Integer.MAX_VALUE)+"\r\n"); 44 } 45 bw.close(); 46 fw.close(); 47 } 48 public static void separateFile() throws IOException{ 49 FileReader fr = new FileReader(file); 50 BufferedReader br = new BufferedReader(fr); 51 FileWriter fw = null; 52 BufferedWriter bw = null; 53 List<FileWriter> fwList = new LinkedList<FileWriter>(); 54 List<BufferedWriter> bwList = new LinkedList<BufferedWriter>(); 55 for(int i=0;i<20;i++){ 56 fw = new FileWriter("E:"+File.separator+"dataTest"+File.separator+"data"+i+".txt"); 57 bw = new BufferedWriter(fw); 58 //把对象放入集合 59 fwList.add(fw); 60 bwList.add(bw); 61 } 62 63 while(br.ready()){ 64 for(Iterator<BufferedWriter> iterator=bwList.iterator();iterator.hasNext();){ 65 BufferedWriter it = iterator.next(); 66 it.write(br.readLine()+"\r\n"); 67 continue;//第一个bw读完后让下一个读 然后写入小文件 68 } 69 } 70 br.close(); 71 fr.close(); 72 //遍历关闭所有子文件流 73 for (Iterator iterator = bwList.iterator(); iterator.hasNext();) { 74 BufferedWriter it = (BufferedWriter) iterator.next(); 75 it.close(); 76 } 77 78 for (Iterator iterator = fwList.iterator(); iterator.hasNext();) { 79 FileWriter it = (FileWriter) iterator.next(); 80 it.close(); 81 } 82 } 83 //对每个小文件进行排序 84 public static void everySingleFileSort() throws Exception{ 85 LinkedList<Integer> numbers ; 86 for(int i=0;i<20;i++){ 87 numbers = new LinkedList<Integer>(); 88 String path = "E:"+File.separator+"dataTest"+File.separator+"data"+i+".txt"; 89 FileReader fr = new FileReader(path); 90 BufferedReader br = new BufferedReader(fr); 91 while(br.ready()){ 92 numbers.add(Integer.parseInt(br.readLine())); 93 } 94 Collections.sort(numbers); 95 numbersWrite(numbers,path); 96 br.close(); 97 fr.close(); 98 } 99 } 100 //将排好序的没个文件写回到小文件中 101 public static void numbersWrite(LinkedList<Integer> numbers,String path) throws IOException{ 102 FileWriter fw = new FileWriter(path); 103 BufferedWriter bw = new BufferedWriter(fw); 104 for(Iterator<Integer> iterator=numbers.iterator();iterator.hasNext();){ 105 Integer num = (Integer)iterator.next(); 106 bw.write(num+"\r\n"); 107 } 108 bw.close(); 109 fw.close(); 110 } 111 //再将所有小文件整合到一个大文件中 112 public static void mergeFile() throws Exception{ 113 PriorityQueue<Obj> queue = new PriorityQueue<Obj>(20,new Obj()); 114 FileReader fr = null; 115 BufferedReader br = null; 116 FileWriter fw = new FileWriter(file1); 117 BufferedWriter bw = new BufferedWriter(fw); 118 List<FileReader> frList = new LinkedList<FileReader>(); 119 List<BufferedReader> brList = new LinkedList<BufferedReader>(); 120 int n; 121 for(int i=0;i<20;i++){ 122 String path = "E:"+File.separator+"dataTest"+File.separator+"data"+i+".txt"; 123 fr = new FileReader(path); 124 br = new BufferedReader(fr); 125 frList.add(fr); 126 brList.add(br); 127 } 128 //把每个小文件的第一个数读入队列中 129 for(int i=0;i<=20;i++){ 130 BufferedReader buffR; 131 if(i==20){ 132 while(queue.size()!=0){ 133 Obj obj = queue.poll(); 134 bw.write(obj.a+"\r\n"); 135 buffR = brList.get(obj.b); 136 while(buffR.ready()&&queue.size()<20){ 137 n = Integer.parseInt(buffR.readLine()); 138 queue.add(new Obj(n,obj.b)); 139 } 140 } 141 break; 142 } 143 buffR = brList.get(i); 144 while(buffR.ready()&&queue.size()<20){ 145 n = Integer.parseInt(buffR.readLine()); 146 Obj obj = new Obj(n,i); 147 queue.add(obj); 148 break; 149 } 150 } 151 bw.close(); 152 fw.close(); 153 //遍历关闭所有子文件流 154 for (Iterator iterator = brList.iterator(); iterator.hasNext();) { 155 BufferedReader it = (BufferedReader) iterator.next(); 156 it.close(); 157 } 158 159 for (Iterator iterator = frList.iterator(); iterator.hasNext();) { 160 FileReader it = (FileReader) iterator.next(); 161 it.close(); 162 } 163 } 164 } 165 166 class Obj implements Comparator<Obj>{ 167 int a,b; 168 Obj(){} 169 Obj(int a,int b){ 170 this.a =a; 171 this.b=b; 172 } 173 public int compare(Obj o1, Obj o2) { 174 return o1.a-o2.a; 175 } 176 }
posted on 2014-04-14 00:30 yaoboyyao 阅读(2760) 评论(0) 编辑 收藏 举报