最完整的合并相交集合的Java代码(查并集)

这个是自己写的算法,如果有大牛,麻烦帮我并行化。初学者则可以学到不少东西。

产生测试用例

import java.io.*;
import java.util.Random;

public class ProduceCase {

    public static void main(String[] argvs){
        File file2 = new File("D:\\YounG\\TestCases\\MySet\\test.txt");
        FileWriter fw = null;
        BufferedWriter writer = null;
        try {
            fw = new FileWriter(file2);
            writer = new BufferedWriter(fw);
            for(int i = 0; i < 500000; i++){
                StringBuilder stringBuilder = new StringBuilder();
                Random random = new Random();
                for(int j = 0; j < random.nextInt(21); j++){
                    String s = getRandomString();
                    if(" ".equals(s)&&s.isEmpty()) continue;
                    stringBuilder.append(s + " ");
                }
                writer.write(stringBuilder.toString());
                writer.newLine();//换行
            }
            writer.flush();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }catch (IOException e) {
            e.printStackTrace();
        }finally{
            try {
                writer.close();
                fw.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    public static String getRandomString( ) { //length表示生成字符串的长度
        Random random = new Random();
        StringBuilder sb = new StringBuilder();
        int length = random.nextInt(10);
        for (int i = 0; i < length; i++) {
            int number = random.nextInt(26);
            sb.append(String.valueOf((char) (number+65) ));
        }
        return sb.toString();
    }
}

  合并集合:

import java.io.*;
import java.util.*;


public class MapTest {
    public static void main(String[] argvs) {
        File file = new File("D:\\YounG\\TestCases\\MySet\\test.txt");
        BufferedReader reader = null;
        List<HashSet> mySets = new ArrayList<>(500000);
        boolean hasEmpty = false;
        try {
            reader = new BufferedReader(new FileReader(file));
            String tempString;
            // 一次读入一行,直到读入null为文件结束
            while ((tempString = reader.readLine()) != null) {
                // 显示行号
                Scanner scanner = new Scanner(tempString);
                HashSet<String> mySet = new HashSet<>();
                mySet.clear();
                while (scanner.hasNext()) {
                    mySet.add(scanner.next());
                }
                scanner.close();
                //对个集合内部进行排序,定义大小。放弃排序。
                if (mySet.isEmpty() && !hasEmpty) hasEmpty = true;
                else
                    mySets.add(mySet);
            }
            reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e1) {
                }
            }
        }

        List<Integer> destList = new ArrayList<>(mySets.size());
        int setsSize = mySets.size();
        for (int i = 0; i < setsSize; ++i) {
            destList.add(i);
        }
        Map<String,Integer> mergeMap = new HashMap<>(1000000); //合并图。

        long startTime=System.currentTimeMillis();   //获取开始时间

        
        for (int setId = 0; setId < mySets.size(); ++setId) {
            Iterator<String> iterator = mySets.get(setId).iterator();
            List<String> setElem = new ArrayList<>(50);//先把元素全部迭代出来,避免了多线程错误。
            while (iterator.hasNext())
                setElem.add(iterator.next());
            int elemNumOfSet = setElem.size();
            for(int j = 0; j < elemNumOfSet; j++) {
                if( mergeMap.containsKey( setElem.get(j) ) ) {                     //判断当前元素是否包含在合并记录表里边。永远不要用直接下标访问(伪下标)
                    Integer destValueSetId = destList.get( mergeMap.get( setElem.get(j) ).intValue() );  //真实 的包含该元素的最小集合号。
                    Integer destLoopSetId = destList.get(setId);
                    if( destValueSetId.compareTo( destLoopSetId ) > 0 ) {
//                        Iterator putIterator = mySets.get(destValue).iterator();
//                        while (putIterator.hasNext())
//                            mergeMap.put((String) putIterator.next(), destList.get(setId)); //找过的元素记得入mergeMap。
                        mySets.get( destLoopSetId ).addAll( mySets.get(destValueSetId) );//Set中元素增多,所以iterator失效,故重新复制。且Hash 存储本身就是无序的,随着元素的增加是会改动存储顺序的。
                        //推测,HashSet与HashMap不同HashSet不过就是所有的Value是一个固定的地址罢了。而HashMap分为了Value和Key两个集合。
                        mySets.get( destValueSetId ).clear();
                        for (int i = 0; i < destList.size(); i++) {
                            if ( destList.get(i).equals( destValueSetId ) ) {
                                destList.set(i, destLoopSetId );
                            }
                        }
                    }
                    else if(destValueSetId.compareTo( destLoopSetId ) < 0) {
//                        Iterator putIterator = mySets.get(destValue).iterator();
//                        while (putIterator.hasNext())
//                            mergeMap.put((String) putIterator.next(), destValue); //找过的元素记得入mergeMap。
                        mySets.get( destValueSetId ).addAll( mySets.get(destLoopSetId));
                        mySets.get( destLoopSetId ).clear();
                        for (int i = 0; i < destList.size(); i++) {
                            if ( destList.get(i).equals( destLoopSetId )) {
                                destList.set(i, destValueSetId);
                            }
                        }
                    }
                }
                else {
                    mergeMap.put( setElem.get(j) , setId );   //此处切记不可用destList.get(iSet),因为该值并不稳定。
                }
            }
        }
     


        long endTime=System.currentTimeMillis(); //获取结束时间
        System.out.println("程序运行时间: "+(endTime-startTime)+"ms");

        Iterator iterator = mySets.iterator();
        File file2 = new File("D:\\YounG\\TestCases\\MySet\\testACK.txt");
        FileWriter fw = null;
        BufferedWriter writer = null;
        try {
            fw = new FileWriter(file2);
            writer = new BufferedWriter(fw);
            while(iterator.hasNext()){
                HashSet<String> temp = (HashSet<String>) iterator.next();
                if(!temp.isEmpty()) {
                    writer.write( temp.toString() );
                    writer.newLine();
                }
            }
            writer.flush();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }catch (IOException e) {
            e.printStackTrace();
        }finally{
            try {
                writer.close();
                fw.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

验证输出结果:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;

/**
 * Created by Young on 2015/12/25.
 * 如何测试测试用例:
 * 1.所有的元素不重复,/hashSet
 * 2.元素的种类不减少,/count
 * 3.没有非法合并。(非法合并指,合并过程中,不存在共同元素依然合并),这一项适合在合并程序必然实现。
 * 3 must be true since contains and addAll execute as the same time.
 */
public class Validaty {
    public static void main(String[] args) {
        File file = new File("D:\\YounG\\TestCases\\MySet\\testACK.txt");
        BufferedReader reader = null;
        Set allSet = new HashSet<String>();
        //int count1 = 0;
        try {
            reader = new BufferedReader(new FileReader(file));
            String tempString;
            // 一次读入一行,直到读入null为文件结束
            allSet.clear();
            while ((tempString = reader.readLine()) != null) {
                // 显示行号
                tempString = tempString.substring(1,tempString.length()-1);
                //Pattern pattern = Pattern.compile(",");
                String[] strings = tempString.split(", ");
                int strj = 0;
                while (strj < strings.length && !strings[strj].isEmpty()){
                    if (allSet.contains(strings[strj])) {
                        System.out.println("Wrong ACK for \"" + strings[strj] + "\" is repeated");
                        return;
                    }
                    allSet.add(strings[strj]);
                    strj++;
                }
/*                Scanner scanner = new Scanner(tempString);
                while (scanner.hasNext()) {
                    String temp = scanner.next();
                    if (allSet.contains(temp)) {
                        System.out.println("Wrong ACK for " + temp + "is repeated");
                        return;
                    }
                    allSet.add(temp);
                    //++count1;
                }
                scanner.close();*/
            }
            reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e1) {
                }
            }
        }


        File file2 = new File("D:\\YounG\\TestCases\\MySet\\test.txt");
        BufferedReader reader2 = null;
        try {
            reader2 = new BufferedReader(new FileReader(file2));
            String tempString;
            // 一次读入一行,直到读入null为文件结束

            while ((tempString = reader2.readLine()) != null) {
                // 显示行号
                Scanner scanner = new Scanner(tempString);
                while (scanner.hasNext()) {
                    String temp = scanner.next();
                    if (!allSet.contains(temp)) {
                        System.out.println("Wrong ACK for lost elem " + temp);
                        return;
                    }
                }
                scanner.close();
            }
            reader2.close();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e1) {
                }
            }
        }

        System.out.println("Right ACK");
    }
}

 

posted on 2016-01-04 18:19  1的哲学  阅读(2516)  评论(0编辑  收藏  举报

导航