最完整的合并相交集合的Java代码(查并集)
这个是自己写的算法,如果有大牛,麻烦帮我并行化。初学者则可以学到不少东西。
产生测试用例
import java.io.*; import java.util.Random; public class ProduceCase { public static void main(String[] argvs){ File file2 = new File("D:\\YounG\\TestCases\\MySet\\test.txt"); FileWriter fw = null; BufferedWriter writer = null; try { fw = new FileWriter(file2); writer = new BufferedWriter(fw); for(int i = 0; i < 500000; i++){ StringBuilder stringBuilder = new StringBuilder(); Random random = new Random(); for(int j = 0; j < random.nextInt(21); j++){ String s = getRandomString(); if(" ".equals(s)&&s.isEmpty()) continue; stringBuilder.append(s + " "); } writer.write(stringBuilder.toString()); writer.newLine();//换行 } writer.flush(); } catch (FileNotFoundException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); }finally{ try { writer.close(); fw.close(); } catch (IOException e) { e.printStackTrace(); } } } public static String getRandomString( ) { //length表示生成字符串的长度 Random random = new Random(); StringBuilder sb = new StringBuilder(); int length = random.nextInt(10); for (int i = 0; i < length; i++) { int number = random.nextInt(26); sb.append(String.valueOf((char) (number+65) )); } return sb.toString(); } }
合并集合:
import java.io.*; import java.util.*; public class MapTest { public static void main(String[] argvs) { File file = new File("D:\\YounG\\TestCases\\MySet\\test.txt"); BufferedReader reader = null; List<HashSet> mySets = new ArrayList<>(500000); boolean hasEmpty = false; try { reader = new BufferedReader(new FileReader(file)); String tempString; // 一次读入一行,直到读入null为文件结束 while ((tempString = reader.readLine()) != null) { // 显示行号 Scanner scanner = new Scanner(tempString); HashSet<String> mySet = new HashSet<>(); mySet.clear(); while (scanner.hasNext()) { mySet.add(scanner.next()); } scanner.close(); //对个集合内部进行排序,定义大小。放弃排序。 if (mySet.isEmpty() && !hasEmpty) hasEmpty = true; else mySets.add(mySet); } reader.close(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e1) { } } } List<Integer> destList = new ArrayList<>(mySets.size()); int setsSize = mySets.size(); for (int i = 0; i < setsSize; ++i) { destList.add(i); } Map<String,Integer> mergeMap = new HashMap<>(1000000); //合并图。 long startTime=System.currentTimeMillis(); //获取开始时间 for (int setId = 0; setId < mySets.size(); ++setId) { Iterator<String> iterator = mySets.get(setId).iterator(); List<String> setElem = new ArrayList<>(50);//先把元素全部迭代出来,避免了多线程错误。 while (iterator.hasNext()) setElem.add(iterator.next()); int elemNumOfSet = setElem.size(); for(int j = 0; j < elemNumOfSet; j++) { if( mergeMap.containsKey( setElem.get(j) ) ) { //判断当前元素是否包含在合并记录表里边。永远不要用直接下标访问(伪下标) Integer destValueSetId = destList.get( mergeMap.get( setElem.get(j) ).intValue() ); //真实 的包含该元素的最小集合号。 Integer destLoopSetId = destList.get(setId); if( destValueSetId.compareTo( destLoopSetId ) > 0 ) { // Iterator putIterator = mySets.get(destValue).iterator(); // while (putIterator.hasNext()) // mergeMap.put((String) putIterator.next(), destList.get(setId)); //找过的元素记得入mergeMap。 mySets.get( destLoopSetId ).addAll( mySets.get(destValueSetId) );//Set中元素增多,所以iterator失效,故重新复制。且Hash 存储本身就是无序的,随着元素的增加是会改动存储顺序的。 //推测,HashSet与HashMap不同HashSet不过就是所有的Value是一个固定的地址罢了。而HashMap分为了Value和Key两个集合。 mySets.get( destValueSetId ).clear(); for (int i = 0; i < destList.size(); i++) { if ( destList.get(i).equals( destValueSetId ) ) { destList.set(i, destLoopSetId ); } } } else if(destValueSetId.compareTo( destLoopSetId ) < 0) { // Iterator putIterator = mySets.get(destValue).iterator(); // while (putIterator.hasNext()) // mergeMap.put((String) putIterator.next(), destValue); //找过的元素记得入mergeMap。 mySets.get( destValueSetId ).addAll( mySets.get(destLoopSetId)); mySets.get( destLoopSetId ).clear(); for (int i = 0; i < destList.size(); i++) { if ( destList.get(i).equals( destLoopSetId )) { destList.set(i, destValueSetId); } } } } else { mergeMap.put( setElem.get(j) , setId ); //此处切记不可用destList.get(iSet),因为该值并不稳定。 } } } long endTime=System.currentTimeMillis(); //获取结束时间 System.out.println("程序运行时间: "+(endTime-startTime)+"ms"); Iterator iterator = mySets.iterator(); File file2 = new File("D:\\YounG\\TestCases\\MySet\\testACK.txt"); FileWriter fw = null; BufferedWriter writer = null; try { fw = new FileWriter(file2); writer = new BufferedWriter(fw); while(iterator.hasNext()){ HashSet<String> temp = (HashSet<String>) iterator.next(); if(!temp.isEmpty()) { writer.write( temp.toString() ); writer.newLine(); } } writer.flush(); } catch (FileNotFoundException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); }finally{ try { writer.close(); fw.close(); } catch (IOException e) { e.printStackTrace(); } } } }
验证输出结果:
import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.*; import java.util.regex.Pattern; /** * Created by Young on 2015/12/25. * 如何测试测试用例: * 1.所有的元素不重复,/hashSet * 2.元素的种类不减少,/count * 3.没有非法合并。(非法合并指,合并过程中,不存在共同元素依然合并),这一项适合在合并程序必然实现。 * 3 must be true since contains and addAll execute as the same time. */ public class Validaty { public static void main(String[] args) { File file = new File("D:\\YounG\\TestCases\\MySet\\testACK.txt"); BufferedReader reader = null; Set allSet = new HashSet<String>(); //int count1 = 0; try { reader = new BufferedReader(new FileReader(file)); String tempString; // 一次读入一行,直到读入null为文件结束 allSet.clear(); while ((tempString = reader.readLine()) != null) { // 显示行号 tempString = tempString.substring(1,tempString.length()-1); //Pattern pattern = Pattern.compile(","); String[] strings = tempString.split(", "); int strj = 0; while (strj < strings.length && !strings[strj].isEmpty()){ if (allSet.contains(strings[strj])) { System.out.println("Wrong ACK for \"" + strings[strj] + "\" is repeated"); return; } allSet.add(strings[strj]); strj++; } /* Scanner scanner = new Scanner(tempString); while (scanner.hasNext()) { String temp = scanner.next(); if (allSet.contains(temp)) { System.out.println("Wrong ACK for " + temp + "is repeated"); return; } allSet.add(temp); //++count1; } scanner.close();*/ } reader.close(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e1) { } } } File file2 = new File("D:\\YounG\\TestCases\\MySet\\test.txt"); BufferedReader reader2 = null; try { reader2 = new BufferedReader(new FileReader(file2)); String tempString; // 一次读入一行,直到读入null为文件结束 while ((tempString = reader2.readLine()) != null) { // 显示行号 Scanner scanner = new Scanner(tempString); while (scanner.hasNext()) { String temp = scanner.next(); if (!allSet.contains(temp)) { System.out.println("Wrong ACK for lost elem " + temp); return; } } scanner.close(); } reader2.close(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e1) { } } } System.out.println("Right ACK"); } }