统计两组数据的交集和补集(新旧数据的差异比较算法)遍历一次
旧数据A = {}
新数据B = {}
新增项:B - A = { x | x∈B且x∉A}
删除项:A - B = { x | x∈A且x∉B}
共有项:B ∩ A = { x | x∈B且x∈A}
import java.io.BufferedReader; import java.io.Closeable; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; public class RecordDiff { public static void main(String[] args) { // 根据新数据对旧数据的改变进行统计:新旧项来自数据库约束为自增长整数的原始键字段,即不重复整数,已升序排序 Integer[] as = new Integer[] { 1, 2, 3, 8, 9, 12 }; Integer[] bs = new Integer[] { 1, 2, 3, 9, 10, 12, 18, 22 }; // bs = new Integer[] { 1, 2, 3, 9 }; System.out.println("旧项:" + Arrays.toString(as)); System.out.println("新项:" + Arrays.toString(bs)); System.out.println("===================="); List<Integer> aList = Arrays.asList(as);// old data List<Integer> bList = Arrays.asList(bs);// new data Diff<Integer> diff1 = Diff.difference(aList, bList); System.out.println("都有项:" + Arrays.toString(diff1.unionList.toArray())); System.out.println("新增项:" + Arrays.toString(diff1.addedList.toArray())); System.out.println("删除项:" + Arrays.toString(diff1.removedList.toArray())); System.out.println("===================="); List<Integer> fromOnlyList = new ArrayList<Integer>(); List<Integer> againstOnlyList = new ArrayList<Integer>(); List<Integer> bothList = new ArrayList<Integer>(); Diff.diff(aList, bList, fromOnlyList, againstOnlyList, bothList); System.out.println("都有项:" + Arrays.toString(bothList.toArray())); System.out.println("新增项:" + Arrays.toString(againstOnlyList.toArray())); System.out.println("删除项:" + Arrays.toString(fromOnlyList.toArray())); } /** * 对两组数据进行差异比较,得出新旧的差异:都有项,新增项,删除项 * * @author fangss * * @param <T> */ public static class Diff<T extends Comparable<T>> { /** 共有项 */ List<T> unionList; /** 差异项 */ List<T> addedList, removedList; /** * * @param unionList * 接受都有项的结果Buffer * @param addedList * 接受新增项的结果Buffer * @param removedList * 接受删除项的结果Buffer */ public Diff(List<T> unionList, List<T> addedList, List<T> removedList) { super(); this.unionList = unionList; this.addedList = addedList; this.removedList = removedList; } /** * 新旧数据列表两个都只遍历一次,适用于数据只能向前滚动一次,如读文件行 <br> * A B 6 1 7 \ 2 8 \ 3 9 \ 5 10 . 6 11 . 10 12 16 17 18 * * @param fromList * 必须有序,且升序,一般是旧数据, The List to compare from * @param againstList * 必须有序,且升序,一般是新数据 A List to compare against * @param fromOnlyList * 补集 * @param againstOnlyList * 补集 * @param bothList * 交集 */ public static <T extends Comparable<T>> void diff(List<T> fromList, List<T> againstList, List<T> fromOnlyList, List<T> againstOnlyList, List<T> bothList) { // 0 - both, 'f' - from, 'a' - against, 和比较结果一致:一样大小都移动,否则谁小谁移动 int whoMakeWay = 'b'; Iterator<T> fromIterator = fromList.iterator(); Iterator<T> againstIterator = againstList.iterator(); T from = null, against = null; while (true) { T fromNext = null; if ('a' != whoMakeWay) { if (hasNextOrExhaustRival(fromIterator, null, againstIterator, againstOnlyList)) { from = fromIterator.next(); fromNext = from; } else { return; } } if ('f' != whoMakeWay) { if (hasNextOrExhaustRival(againstIterator, fromNext, fromIterator, fromOnlyList)) { against = againstIterator.next(); } else { return; } } // 先两个都判断有下一个,然后再移动,否则先移动有下一个而另一个没有,前一个仅自己有的就丢失一项 int cmpResult = from.compareTo(against); // 谁小移动谁,一样就都移动。 if (0 == cmpResult) { whoMakeWay = 'b'; bothList.add(from); } else if (0 > cmpResult) { // from < against: fromIterator continue until 持平0或超过1 whoMakeWay = 'f'; fromOnlyList.add(from); } else { // from > against: againstIterator continue until 持平0或超过1 whoMakeWay = 'a'; againstOnlyList.add(against); } } } public static <T extends Comparable<T>> boolean hasNextOrExhaustRival(Iterator<T> hasNext, T rivalCurVal, Iterator<T> rival, List<T> list) { if (hasNext.hasNext()) { return true; } if (null != rivalCurVal) { list.add(rivalCurVal); } while (rival.hasNext()) { list.add(rival.next()); } return false; } /** * 新旧数据列表两个遍历可能不只一次 * * @param newList * 必须有序,且升序 * @param oldList * 必须有序,且升序 * @param unionList * @param addedList * @param removedList */ private static <T> void innerDifference(List<T> newList, List<T> oldList, List<T> unionList, List<T> addedList, List<T> removedList) { for (Iterator<T> iterator = removedList.iterator(); iterator.hasNext();) { T item = iterator.next(); if (addedList.contains(item)) { unionList.add(item); iterator.remove(); addedList.remove(item); } } } /** * 新旧数据列表两个遍历可能不只一次 * * @param newList * 新数据,必须有序,且升序 * @param oldList * 旧数据,必须有序,且升序 * @return */ public static <T extends Comparable<T>> Diff<T> difference(List<T> newList, List<T> oldList) { List<T> unionList = new ArrayList<T>(); List<T> addedList = new ArrayList<T>(oldList); List<T> removedList = new ArrayList<T>(newList); innerDifference(newList, oldList, unionList, addedList, removedList); return new Diff<T>(unionList, addedList, removedList); } /** * 新旧数据列表两个遍历可能不只一次 * * @param cursorList * 新数据,必须有序,且升序 * @param baseList * 旧数据,必须有序,且升序 * @param unionList * @param addedList * @param removedList */ public static <T extends Comparable<T>> void difference(List<T> cursorList, List<T> baseList, List<T> unionList, List<T> addedList, List<T> removedList) { addedList.addAll(cursorList); removedList.addAll(baseList); innerDifference(cursorList, baseList, unionList, addedList, removedList); } } public List diff(String aFilePath, String bFilePath, String resultFilePath) throws FileNotFoundException { BufferedReader aReader = null, bReader = null; String aLine, bLine; String delimiter = " "; try { aReader = new BufferedReader(new FileReader(aFilePath)); bReader = new BufferedReader(new FileReader(aFilePath)); if (null != (aLine = aReader.readLine())) { } } catch (Exception e) { // TODO: handle exception } finally { closeQuietly(aReader); closeQuietly(bReader); closeQuietly(aReader); } return null; } public static <T extends Closeable> T closeQuietly(T c) { if (null != c) { try { c.close(); } catch (IOException e) { // ALog.d("close", e.getMessage()); } } return null; } }
如下情况:
Integer[] as = new Integer[] { 1, 2, 3, 6, 12 }; Integer[] bs = new Integer[] { 1, 2, 3, 8, 10, 22, 26 };
输出:
旧项:[1, 2, 3, 6, 12]
新项:[1, 2, 3, 8, 10, 22, 26]
都有项:[1, 2, 3]
新增项:[8, 10, 26]
删除项:[6, 12]
有误,新项有剩余
当以其中一组进行移动时,这个值都是小于不移动那组中的某个位置的值,设为base
修改后代码(diff方法):
/** * 新旧数据列表两个都只遍历一次,适用于数据只能向前滚动一次,如读文件行 <br> * A B 6 1 7 \ 2 8 \ 3 9 \ 5 10 . 6 11 . 10 12 16 17 18 * * @param fromList * 必须有序,且升序,一般是旧数据, The List to compare from * @param againstList * 必须有序,且升序,一般是新数据 A List to compare against * @param fromOnlyList * 补集 * @param againstOnlyList * 补集 * @param bothList * 交集 */ public static <T extends Comparable<T>> void diff(List<T> fromList, List<T> againstList, List<T> fromOnlyList, List<T> againstOnlyList, List<T> bothList) { // 0 - both, 'f' - from, 'a' - against, 和比较结果一致:一样大小都移动,否则谁小谁移动 int whoMakeWay = 'b'; Iterator<T> fromIterator = fromList.iterator(); Iterator<T> againstIterator = againstList.iterator(); // 本次循环的大值,他是不移动那组数据里的值,易知还没入结果集的项,也许为null这时是等于(两个都应该继续移动)时 T baseBigger = null; T from = null, against = null; while (true) { // 预判能不能移动 if ('a' != whoMakeWay && !hasNextOrExhaustRival(fromIterator, baseBigger, againstIterator, againstOnlyList)) {// 'f' or // 'b' break; } if ('f' != whoMakeWay && !hasNextOrExhaustRival(againstIterator, baseBigger, fromIterator, fromOnlyList)) {// 'a' or 'b' break; } // 真正开始移动 if ('a' != whoMakeWay) {// 'f' or 'b' from = fromIterator.next(); } if ('f' != whoMakeWay) {// 'a' or 'b' against = againstIterator.next(); } int cmpResult = from.compareTo(against); // 谁小移动谁,一样就都移动。 // 入结果集的都是较小值或等值,而大值baseBigger在未来比较时如果小了或等了才入, // 故而如果由于某一组数据结束了,就不走以下语句,从而入结果集需要检查这种情况。 if (0 == cmpResult) { whoMakeWay = 'b'; bothList.add(from); baseBigger = null; } else if (0 > cmpResult) { // from < against: fromIterator continue until 持平0或超过1 if ('f' != whoMakeWay) { whoMakeWay = 'f'; baseBigger = against; } fromOnlyList.add(from); } else { // from > against: againstIterator continue until 持平0或超过1 if ('a' != whoMakeWay) { whoMakeWay = 'a'; baseBigger = from; } againstOnlyList.add(against); } } } /** * 如果入参hasNext还有下一项,返回true;否则追加还未入结果集的rivalCurVal以及另一组数据的剩余项到结果集 * * @param hasNext * 带检查是否有下一项 * @param rivalCurVal * 还未入结果集的项 * @param rival * 另一组数据的剩余项开始位置 * @param list * 结果集 * @return */ public static <T extends Comparable<T>> boolean hasNextOrExhaustRival(Iterator<T> hasNext, T rivalCurVal, Iterator<T> rival, List<T> list) { if (hasNext.hasNext()) { return true; } if (null != rivalCurVal) { list.add(rivalCurVal); } while (rival.hasNext()) { list.add(rival.next()); } return false; }
旧项:[1, 2, 3, 6, 12]
新项:[1, 2, 3, 8, 10, 22, 26]
都有项:[1, 2, 3]
新增项:[8, 10, 22, 26]
删除项:[6, 12]
public static <T extends Comparable<T>> void diff(List<T> fromList, List<T> againstList, List<T> fromOnlyList, List<T> againstOnlyList, List<T> bothList) { Iterator<T> fromIterator = fromList.iterator(); Iterator<T> againstIterator = againstList.iterator(); // 本次循环的大值就是对方值(本次循环不移动的那组数据,还未入结果集),如果相等则两者都被设置为null,这也是初始值 T from = null, against = null; while (true) { // 预判能不能移动,既然是我该移动,说明我是小值或等值。所以判断有下一个时,除了第一入参其他都是对方相关的参数。 // 在等于时,即两者都移动,from和against中是不存在未入结果集的数据项,两者应该是null; //否则需要考虑未入结果集,即,只有一方移动时却没有下一项而跳出循环,大值没入结果集: if (null == from && !hasNextOrExhaustRival(fromIterator, against, againstIterator, againstOnlyList)) {// 'f' or 'b' break; } if (null == against && !hasNextOrExhaustRival(againstIterator, from, fromIterator, fromOnlyList)) {// 'a' or 'b' break; } // 真正开始移动,获得值 if (null == from) {// 'f' or 'b' from = fromIterator.next(); } if (null == against) {// 'a' or 'b' against = againstIterator.next(); } int cmpResult = from.compareTo(against); // 谁小移动谁,一样就都移动。 // 入结果集的都是较小值或等值,而大值baseBigger在未来比较时如果小了或等了才入, // 故而如果由于某一组数据结束了,就不走以下语句,从而入结果集需要检查这种情况。 if (0 == cmpResult) { bothList.add(from); from = null; against = null; } else if (0 > cmpResult) { // from < against: fromIterator continue until 持平0或超过1 fromOnlyList.add(from); from = null; } else { // from > against: againstIterator continue until 持平0或超过1 againstOnlyList.add(against); against = null; } } }
Diff
Diff<Integer> diff = new Diff<Integer>() { @Override public int compare(Integer o1, Integer o2) { // TODO Auto-generated method stub return o1.compareTo(o2); } @Override public void diff(Integer from, Integer against, int diff) { System.out.println((char) diff + ": " + from + " vs " + against); } }; List<Integer> aList = Arrays.asList(new Integer[] { 1, 2, 3, 6, 12 });// old data List<Integer> bList = Arrays.asList(new Integer[] { 1, 2, 3, 8, 10, 22, 26 });// new data Diff.diff(aList.iterator(), bList.iterator(), diff);
diff
/** * * @param <T> * 项不允许为null * @usage <pre> * Diff<Integer> diff = new Diff<Integer>() { * * @Override * public int compare(Integer o1, Integer o2) { * // TODO Auto-generated method stub * return o1.compareTo(o2); * } * * @Override * public void diff(Integer from, Integer against, int diff) { * System.out.println((char) diff + ": " + from + " vs " + against); * } * }; * List<Integer> aList = Arrays.asList(new Integer[] { 1, 2, 3, 6, 12 });// old data * List<Integer> bList = Arrays.asList(new Integer[] { 1, 2, 3, 8, 10, 22, 26 });// new data * Diff.diff(aList.iterator(), bList.iterator(), diff); * </pre> */ public static interface Diff<T> extends Comparator<T> { public void diff(T from, T against, int diff); static final int MOVE_FROM = -1; static final int MOVE_AGAINST = 1; static final int MOVE_FROM_AND_AGAINST = 0; public static <T> void diff(Iterator<T> fromIterator, Iterator<T> againstIterator, Diff<T> diffcallback) { // 本次循环的大值就是对方值(本次循环不移动的那组数据,还未入结果集),如果相等则两者都被设置为null,这也是初始值 T from = null, against = null; int cmpResult = 0; while (true) { // 预判能不能移动,既然是我该移动,说明我是小值或等值。所以判断有下一个时,除了第一入参其他都是对方相关的参数。 // 在等于时,即两者都移动,from和against中是不存在未入结果集的数据项,两者应该是null; // 否则需要考虑未入结果集,即,只有一方移动时却没有下一项而跳出循环,大值没入结果集: if (MOVE_AGAINST != cmpResult && !fromIterator.hasNext()) {// 'f' or 'b' if (null != against) { diffcallback.diff(null, against, '+'); } while (againstIterator.hasNext()) { diffcallback.diff(null, againstIterator.next(), '+'); } break; } if (MOVE_FROM != cmpResult && !againstIterator.hasNext()) {// 'a' or 'b' if (null != from) { diffcallback.diff(from, null, '-'); } while (fromIterator.hasNext()) { diffcallback.diff(fromIterator.next(), null, '-'); } break; } // 真正开始移动,获得值 if (MOVE_AGAINST != cmpResult) {// 'f' or 'b' from = fromIterator.next(); } if (MOVE_FROM != cmpResult) {// 'a' or 'b' against = againstIterator.next(); } cmpResult = diffcallback.compare(from, against); // 谁小移动谁,一样就都移动。 // 入结果集的都是较小值或等值,而大值baseBigger在未来比较时如果小了或等了才入, // 故而如果由于某一组数据结束了,就不走以下语句,从而入结果集需要检查这种情况。 if (0 == cmpResult) { diffcallback.diff(from, against, '*'); from = null; against = null; } else if (0 > cmpResult) { // from < against: fromIterator continue until 持平0或超过1 diffcallback.diff(from, null, '-'); from = null; } else { // from > against: againstIterator continue until 持平0或超过1 diffcallback.diff(null, against, '+'); against = null; } } } }
迭代相同行为
/** 类似{@link BufferedReader#readLine}没有判断下一项而是通过null判断,这个类可以实现 Iterator接口,有相同的行为,使用很少 */ public static abstract class IterateAdapter<T> implements Iterator<T> { private boolean hasReadNext; /** 跟着游标走的值,调用hasNext和next,保护的成员 */ public T value; protected boolean hasNext; @Override public boolean hasNext() { if (!hasReadNext) { value = readNext(); hasReadNext = true; } return hasNext; } /** 真正的,没有下一项需要设置保护的成员 hasNext,因为没有下一项不能单纯靠返回null,如果下一项允许null就不正确了 */ public abstract T readNext(); @Override public T next() { if (hasReadNext) { hasReadNext = false; return value; } else { value = readNext(); } if (!hasNext) { throw new NoSuchElementException(); } return value; } @Override public void remove() { throw new UnsupportedOperationException("remove"); } } /** 使用例子 */ public static class IterableAdapter<T> extends IterateAdapter<T> { private Iterator<T> iterator; public IterableAdapter(Iterator<T> iterator) { this.iterator = iterator; } @Override public T readNext() { return (hasNext = iterator.hasNext()) ? iterator.next() : null; } }