统计两组数据的交集和补集(新旧数据的差异比较算法)遍历一次


旧数据A = {}
新数据B = {}

新增项:B - A = { x | x∈B且x∉A}
删除项:A - B = { x | x∈A且x∉B}
共有项:B ∩ A = { x | x∈B且x∈A}

 

import java.io.BufferedReader;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

public class RecordDiff {
    public static void main(String[] args) {

        // 根据新数据对旧数据的改变进行统计:新旧项来自数据库约束为自增长整数的原始键字段,即不重复整数,已升序排序

        Integer[] as = new Integer[] { 1, 2, 3, 8, 9, 12 };
        Integer[] bs = new Integer[] { 1, 2, 3, 9, 10, 12, 18, 22 };
        // bs = new Integer[] { 1, 2, 3, 9 };
        System.out.println("旧项:" + Arrays.toString(as));
        System.out.println("新项:" + Arrays.toString(bs));
        System.out.println("====================");
        List<Integer> aList = Arrays.asList(as);// old data
        List<Integer> bList = Arrays.asList(bs);// new data

        Diff<Integer> diff1 = Diff.difference(aList, bList);

        System.out.println("都有项:" + Arrays.toString(diff1.unionList.toArray()));
        System.out.println("新增项:" + Arrays.toString(diff1.addedList.toArray()));
        System.out.println("删除项:" + Arrays.toString(diff1.removedList.toArray()));
        System.out.println("====================");

        List<Integer> fromOnlyList = new ArrayList<Integer>();
        List<Integer> againstOnlyList = new ArrayList<Integer>();
        List<Integer> bothList = new ArrayList<Integer>();
        Diff.diff(aList, bList, fromOnlyList, againstOnlyList, bothList);
        System.out.println("都有项:" + Arrays.toString(bothList.toArray()));
        System.out.println("新增项:" + Arrays.toString(againstOnlyList.toArray()));
        System.out.println("删除项:" + Arrays.toString(fromOnlyList.toArray()));
    }

    /**
     * 对两组数据进行差异比较,得出新旧的差异:都有项,新增项,删除项
     * 
     * @author fangss
     *
     * @param <T>
     */
    public static class Diff<T extends Comparable<T>> {
        /** 共有项 */
        List<T> unionList;
        /** 差异项 */
        List<T> addedList, removedList;

        /**
         * 
         * @param unionList
         *          接受都有项的结果Buffer
         * @param addedList
         *          接受新增项的结果Buffer
         * @param removedList
         *          接受删除项的结果Buffer
         */
        public Diff(List<T> unionList, List<T> addedList, List<T> removedList) {
            super();
            this.unionList = unionList;
            this.addedList = addedList;
            this.removedList = removedList;
        }

        /**
         * 新旧数据列表两个都只遍历一次,适用于数据只能向前滚动一次,如读文件行 <br>
         * A B 6 1 7 \ 2 8 \ 3 9 \ 5 10 . 6 11 . 10 12 16 17 18
         * 
         * @param fromList
         *          必须有序,且升序,一般是旧数据, The List to compare from
         * @param againstList
         *          必须有序,且升序,一般是新数据 A List to compare against
         * @param fromOnlyList
         *          补集
         * @param againstOnlyList
         *          补集
         * @param bothList
         *          交集
         */
        public static <T extends Comparable<T>> void diff(List<T> fromList, List<T> againstList, List<T> fromOnlyList,
                List<T> againstOnlyList, List<T> bothList) {

            // 0 - both, 'f' - from, 'a' - against, 和比较结果一致:一样大小都移动,否则谁小谁移动
            int whoMakeWay = 'b';
            Iterator<T> fromIterator = fromList.iterator();
            Iterator<T> againstIterator = againstList.iterator();

            T from = null, against = null;
            while (true) {
                T fromNext = null;
                if ('a' != whoMakeWay) {
                    if (hasNextOrExhaustRival(fromIterator, null, againstIterator, againstOnlyList)) {
                        from = fromIterator.next();
                        fromNext = from;
                    } else {
                        return;
                    }
                }
                if ('f' != whoMakeWay) {
                    if (hasNextOrExhaustRival(againstIterator, fromNext, fromIterator, fromOnlyList)) {
                        against = againstIterator.next();
                    } else {
                        return;
                    }
                }
                // 先两个都判断有下一个,然后再移动,否则先移动有下一个而另一个没有,前一个仅自己有的就丢失一项
                int cmpResult = from.compareTo(against);
                // 谁小移动谁,一样就都移动。
                if (0 == cmpResult) {
                    whoMakeWay = 'b';
                    bothList.add(from);
                } else if (0 > cmpResult) {
                    // from < against: fromIterator continue until 持平0或超过1
                    whoMakeWay = 'f';
                    fromOnlyList.add(from);
                } else {
                    // from > against: againstIterator continue until 持平0或超过1
                    whoMakeWay = 'a';
                    againstOnlyList.add(against);
                }
            }

        }

        public static <T extends Comparable<T>> boolean hasNextOrExhaustRival(Iterator<T> hasNext, T rivalCurVal,
                Iterator<T> rival, List<T> list) {
            if (hasNext.hasNext()) {
                return true;
            }
            if (null != rivalCurVal) {
                list.add(rivalCurVal);
            }
            while (rival.hasNext()) {
                list.add(rival.next());
            }
            return false;
        }

        /**
         * 新旧数据列表两个遍历可能不只一次
         * 
         * @param newList
         *          必须有序,且升序
         * @param oldList
         *          必须有序,且升序
         * @param unionList
         * @param addedList
         * @param removedList
         */
        private static <T> void innerDifference(List<T> newList, List<T> oldList, List<T> unionList, List<T> addedList,
                List<T> removedList) {
            for (Iterator<T> iterator = removedList.iterator(); iterator.hasNext();) {
                T item = iterator.next();
                if (addedList.contains(item)) {
                    unionList.add(item);
                    iterator.remove();
                    addedList.remove(item);
                }
            }
        }

        /**
         * 新旧数据列表两个遍历可能不只一次
         * 
         * @param newList
         *          新数据,必须有序,且升序
         * @param oldList
         *          旧数据,必须有序,且升序
         * @return
         */
        public static <T extends Comparable<T>> Diff<T> difference(List<T> newList, List<T> oldList) {
            List<T> unionList = new ArrayList<T>();
            List<T> addedList = new ArrayList<T>(oldList);
            List<T> removedList = new ArrayList<T>(newList);
            innerDifference(newList, oldList, unionList, addedList, removedList);
            return new Diff<T>(unionList, addedList, removedList);
        }

        /**
         * 新旧数据列表两个遍历可能不只一次
         * 
         * @param cursorList
         *          新数据,必须有序,且升序
         * @param baseList
         *          旧数据,必须有序,且升序
         * @param unionList
         * @param addedList
         * @param removedList
         */
        public static <T extends Comparable<T>> void difference(List<T> cursorList, List<T> baseList, List<T> unionList,
                List<T> addedList, List<T> removedList) {
            addedList.addAll(cursorList);
            removedList.addAll(baseList);
            innerDifference(cursorList, baseList, unionList, addedList, removedList);
        }

    }

    public List diff(String aFilePath, String bFilePath, String resultFilePath) throws FileNotFoundException {
        BufferedReader aReader = null, bReader = null;
        String aLine, bLine;
        String delimiter = " ";
        try {
            aReader = new BufferedReader(new FileReader(aFilePath));
            bReader = new BufferedReader(new FileReader(aFilePath));
            if (null != (aLine = aReader.readLine())) {
            }
        } catch (Exception e) {
            // TODO: handle exception
        } finally {
            closeQuietly(aReader);
            closeQuietly(bReader);
            closeQuietly(aReader);
        }
        return null;
    }

    public static <T extends Closeable> T closeQuietly(T c) {
        if (null != c) {
            try {
                c.close();
            } catch (IOException e) {
                // ALog.d("close", e.getMessage());
            }
        }
        return null;
    }
}

 如下情况:

        Integer[] as = new Integer[] { 1, 2, 3, 6, 12 };
        Integer[] bs = new Integer[] { 1, 2, 3, 8, 10, 22, 26 };

输出:

旧项:[1, 2, 3, 6, 12]
新项:[1, 2, 3, 8, 10, 22, 26]
都有项:[1, 2, 3]
新增项:[8, 10, 26]
删除项:[6, 12]

有误,新项有剩余

 

当以其中一组进行移动时,这个值都是小于不移动那组中的某个位置的值,设为base

 

修改后代码(diff方法):

/**
     * 新旧数据列表两个都只遍历一次,适用于数据只能向前滚动一次,如读文件行 <br>
     * A B 6 1 7 \ 2 8 \ 3 9 \ 5 10 . 6 11 . 10 12 16 17 18
     * 
     * @param fromList
     *          必须有序,且升序,一般是旧数据, The List to compare from
     * @param againstList
     *          必须有序,且升序,一般是新数据 A List to compare against
     * @param fromOnlyList
     *          补集
     * @param againstOnlyList
     *          补集
     * @param bothList
     *          交集
     */
    public static <T extends Comparable<T>> void diff(List<T> fromList, List<T> againstList, List<T> fromOnlyList,
            List<T> againstOnlyList, List<T> bothList) {

        // 0 - both, 'f' - from, 'a' - against, 和比较结果一致:一样大小都移动,否则谁小谁移动
        int whoMakeWay = 'b';
        Iterator<T> fromIterator = fromList.iterator();
        Iterator<T> againstIterator = againstList.iterator();
        // 本次循环的大值,他是不移动那组数据里的值,易知还没入结果集的项,也许为null这时是等于(两个都应该继续移动)时
        T baseBigger = null;
        T from = null, against = null;
        while (true) {
            // 预判能不能移动
            if ('a' != whoMakeWay && !hasNextOrExhaustRival(fromIterator, baseBigger, againstIterator, againstOnlyList)) {// 'f' or // 'b'
                break;
            }
            if ('f' != whoMakeWay && !hasNextOrExhaustRival(againstIterator, baseBigger, fromIterator, fromOnlyList)) {// 'a' or 'b'
                break;
            }
            // 真正开始移动
            if ('a' != whoMakeWay) {// 'f' or 'b'
                from = fromIterator.next();
            }
            if ('f' != whoMakeWay) {// 'a' or 'b'
                against = againstIterator.next();
            }

            int cmpResult = from.compareTo(against);
            // 谁小移动谁,一样就都移动。
            // 入结果集的都是较小值或等值,而大值baseBigger在未来比较时如果小了或等了才入,
            // 故而如果由于某一组数据结束了,就不走以下语句,从而入结果集需要检查这种情况。
            if (0 == cmpResult) {
                whoMakeWay = 'b';
                bothList.add(from);
                baseBigger = null;
            } else if (0 > cmpResult) {
                // from < against: fromIterator continue until 持平0或超过1
                if ('f' != whoMakeWay) {
                    whoMakeWay = 'f';
                    baseBigger = against;
                }
                fromOnlyList.add(from);
            } else {
                // from > against: againstIterator continue until 持平0或超过1
                if ('a' != whoMakeWay) {
                    whoMakeWay = 'a';
                    baseBigger = from;
                }
                againstOnlyList.add(against);
            }
        }
    }

    /**
     * 如果入参hasNext还有下一项,返回true;否则追加还未入结果集的rivalCurVal以及另一组数据的剩余项到结果集
     * 
     * @param hasNext
     *          带检查是否有下一项
     * @param rivalCurVal
     *          还未入结果集的项
     * @param rival
     *          另一组数据的剩余项开始位置
     * @param list
     *          结果集
     * @return
     */
    public static <T extends Comparable<T>> boolean hasNextOrExhaustRival(Iterator<T> hasNext, T rivalCurVal, Iterator<T> rival,
            List<T> list) {
        if (hasNext.hasNext()) {
            return true;
        }
        if (null != rivalCurVal) {
            list.add(rivalCurVal);
        }
        while (rival.hasNext()) {
            list.add(rival.next());
        }
        return false;
    }

旧项:[1, 2, 3, 6, 12]
新项:[1, 2, 3, 8, 10, 22, 26]
都有项:[1, 2, 3]
新增项:[8, 10, 22, 26]
删除项:[6, 12]

    public static <T extends Comparable<T>> void diff(List<T> fromList, List<T> againstList, List<T> fromOnlyList,
            List<T> againstOnlyList, List<T> bothList) {
        Iterator<T> fromIterator = fromList.iterator();
        Iterator<T> againstIterator = againstList.iterator();
        // 本次循环的大值就是对方值(本次循环不移动的那组数据,还未入结果集),如果相等则两者都被设置为null,这也是初始值
        T from = null, against = null;
        while (true) {
            // 预判能不能移动,既然是我该移动,说明我是小值或等值。所以判断有下一个时,除了第一入参其他都是对方相关的参数。
            // 在等于时,即两者都移动,from和against中是不存在未入结果集的数据项,两者应该是null;
            //否则需要考虑未入结果集,即,只有一方移动时却没有下一项而跳出循环,大值没入结果集:
            if (null == from && !hasNextOrExhaustRival(fromIterator, against, againstIterator, againstOnlyList)) {// 'f' or 'b'
                break;
            }
            if (null == against && !hasNextOrExhaustRival(againstIterator, from, fromIterator, fromOnlyList)) {// 'a' or 'b'
                break;
            }

            // 真正开始移动,获得值
            if (null == from) {// 'f' or 'b'
                from = fromIterator.next();
            }
            if (null == against) {// 'a' or 'b'
                against = againstIterator.next();
            }
            int cmpResult = from.compareTo(against);

            // 谁小移动谁,一样就都移动。
            // 入结果集的都是较小值或等值,而大值baseBigger在未来比较时如果小了或等了才入,
            // 故而如果由于某一组数据结束了,就不走以下语句,从而入结果集需要检查这种情况。
            if (0 == cmpResult) {
                bothList.add(from);
                from = null;
                against = null;
            } else if (0 > cmpResult) {
                // from < against: fromIterator continue until 持平0或超过1
                fromOnlyList.add(from);
                from = null;
            } else {
                // from > against: againstIterator continue until 持平0或超过1
                againstOnlyList.add(against);
                against = null;
            }
        }
    }

 Diff

        Diff<Integer> diff = new Diff<Integer>() {

            @Override
            public int compare(Integer o1, Integer o2) {
                // TODO Auto-generated method stub
                return o1.compareTo(o2);
            }

            @Override
            public void diff(Integer from, Integer against, int diff) {
                System.out.println((char) diff + ": " + from + " vs " + against);
            }
        };
        List<Integer> aList = Arrays.asList(new Integer[] { 1, 2, 3, 6, 12 });// old data
        List<Integer> bList = Arrays.asList(new Integer[] { 1, 2, 3, 8, 10, 22, 26 });// new data
        Diff.diff(aList.iterator(), bList.iterator(), diff);

diff

/**
     * 
     * @param <T>
     *          项不允许为null
     * @usage <pre>
     * Diff&lt;Integer&gt; diff = new Diff&lt;Integer&gt;() {
     * 
     *     &#064;Override
     *     public int compare(Integer o1, Integer o2) {
     *         // TODO Auto-generated method stub
     *         return o1.compareTo(o2);
     *     }
     * 
     *     &#064;Override
     *     public void diff(Integer from, Integer against, int diff) {
     *         System.out.println((char) diff + &quot;: &quot; + from + &quot; vs &quot; + against);
     *     }
     * };
     * List&lt;Integer&gt; aList = Arrays.asList(new Integer[] { 1, 2, 3, 6, 12 });// old data
     * List&lt;Integer&gt; bList = Arrays.asList(new Integer[] { 1, 2, 3, 8, 10, 22, 26 });// new data
     * Diff.diff(aList.iterator(), bList.iterator(), diff);
     * </pre>
     */
    public static interface Diff<T> extends Comparator<T> {
        public void diff(T from, T against, int diff);

        static final int MOVE_FROM = -1;
        static final int MOVE_AGAINST = 1;
        static final int MOVE_FROM_AND_AGAINST = 0;

        public static <T> void diff(Iterator<T> fromIterator, Iterator<T> againstIterator, Diff<T> diffcallback) {
            // 本次循环的大值就是对方值(本次循环不移动的那组数据,还未入结果集),如果相等则两者都被设置为null,这也是初始值
            T from = null, against = null;
            int cmpResult = 0;
            while (true) {
                // 预判能不能移动,既然是我该移动,说明我是小值或等值。所以判断有下一个时,除了第一入参其他都是对方相关的参数。
                // 在等于时,即两者都移动,from和against中是不存在未入结果集的数据项,两者应该是null;
                // 否则需要考虑未入结果集,即,只有一方移动时却没有下一项而跳出循环,大值没入结果集:
                if (MOVE_AGAINST != cmpResult && !fromIterator.hasNext()) {// 'f' or 'b'
                    if (null != against) {
                        diffcallback.diff(null, against, '+');
                    }
                    while (againstIterator.hasNext()) {
                        diffcallback.diff(null, againstIterator.next(), '+');
                    }
                    break;
                }
                if (MOVE_FROM != cmpResult && !againstIterator.hasNext()) {// 'a' or 'b'
                    if (null != from) {
                        diffcallback.diff(from, null, '-');
                    }
                    while (fromIterator.hasNext()) {
                        diffcallback.diff(fromIterator.next(), null, '-');
                    }
                    break;
                }

                // 真正开始移动,获得值
                if (MOVE_AGAINST != cmpResult) {// 'f' or 'b'
                    from = fromIterator.next();
                }
                if (MOVE_FROM != cmpResult) {// 'a' or 'b'
                    against = againstIterator.next();
                }
                cmpResult = diffcallback.compare(from, against);

                // 谁小移动谁,一样就都移动。
                // 入结果集的都是较小值或等值,而大值baseBigger在未来比较时如果小了或等了才入,
                // 故而如果由于某一组数据结束了,就不走以下语句,从而入结果集需要检查这种情况。
                if (0 == cmpResult) {
                    diffcallback.diff(from, against, '*');
                    from = null;
                    against = null;
                } else if (0 > cmpResult) {
                    // from < against: fromIterator continue until 持平0或超过1
                    diffcallback.diff(from, null, '-');
                    from = null;
                } else {
                    // from > against: againstIterator continue until 持平0或超过1
                    diffcallback.diff(null, against, '+');
                    against = null;
                }
            }
        }
    }

 迭代相同行为

/** 类似{@link BufferedReader#readLine}没有判断下一项而是通过null判断,这个类可以实现 Iterator接口,有相同的行为,使用很少 */
    public static abstract class IterateAdapter<T> implements Iterator<T> {
        private boolean hasReadNext;
        /** 跟着游标走的值,调用hasNext和next,保护的成员 */
        public T value;
        protected boolean hasNext;

        @Override
        public boolean hasNext() {
            if (!hasReadNext) {
                value = readNext();
                hasReadNext = true;
            }
            return hasNext;
        }

        /** 真正的,没有下一项需要设置保护的成员 hasNext,因为没有下一项不能单纯靠返回null,如果下一项允许null就不正确了 */
        public abstract T readNext();

        @Override
        public T next() {
            if (hasReadNext) {
                hasReadNext = false;
                return value;
            } else {
                value = readNext();
            }
            if (!hasNext) {
                throw new NoSuchElementException();
            }
            return value;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("remove");
        }
    }

    /** 使用例子 */
    public static class IterableAdapter<T> extends IterateAdapter<T> {
        private Iterator<T> iterator;

        public IterableAdapter(Iterator<T> iterator) {
            this.iterator = iterator;
        }

        @Override
        public T readNext() {
            return (hasNext = iterator.hasNext()) ? iterator.next() : null;
        }
    }

 

posted @ 2015-08-27 11:05  山岚的一缺  阅读(911)  评论(0编辑  收藏  举报
喜欢
评论
收藏
顶部