尝试了很多方法都不行,在网上看到的这个方法,亲测有效。
原网站:http://blog.csdn.net/b45bobo/article/details/77534819
/* * Diff Match and Patch * * Copyright 2006 Google Inc. * http://code.google.com/p/google-diff-match-patch/ * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.Set; import java.util.Stack; import java.util.regex.Matcher; import java.util.regex.Pattern; /* * Functions for diff, match and patch. * Computes the difference between two texts to create a patch. * Applies the patch onto another text, allowing for errors. * * @author fraser@google.com (Neil Fraser) */ /** * Class containing the diff, match and patch methods. * Also contains the behaviour settings. */ public class Diff_match_patch { // Defaults. // Set these on your diff_match_patch instance to override the defaults. /** * Number of seconds to map a diff before giving up (0 for infinity). */ public float Diff_Timeout = 1.0f; /** * Cost of an empty edit operation in terms of edit characters. */ public short Diff_EditCost = 4; /** * The size beyond which the double-ended diff activates. * Double-ending is twice as fast, but less accurate. */ public short Diff_DualThreshold = 32; /** * At what point is no match declared (0.0 = perfection, 1.0 = very loose). */ public float Match_Threshold = 0.5f; /** * How far to search for a match (0 = exact location, 1000+ = broad match). * A match this many characters away from the expected location will add * 1.0 to the score (0.0 is a perfect match). */ public int Match_Distance = 1000; /** * When deleting a large block of text (over ~64 characters), how close does * the contents have to match the expected contents. (0.0 = perfection, * 1.0 = very loose). Note that Match_Threshold controls how closely the * end points of a delete need to match. */ public float Patch_DeleteThreshold = 0.5f; /** * Chunk size for context length. */ public short Patch_Margin = 4; /** * The number of bits in an int. */ private int Match_MaxBits = 32; /** * Internal class for returning results from diff_linesToChars(). * Other less paranoid languages just use a three-element array. */ protected static class LinesToCharsResult { protected String chars1; protected String chars2; protected List<String> lineArray; protected LinesToCharsResult(String chars1, String chars2, List<String> lineArray) { this.chars1 = chars1; this.chars2 = chars2; this.lineArray = lineArray; } } // DIFF FUNCTIONS /** * The data structure representing a diff is a Linked list of Diff objects: * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), * Diff(Operation.EQUAL, " world.")} * which means: delete "Hello", add "Goodbye" and keep " world." */ public enum Operation { DELETE, INSERT, EQUAL } /** * Find the differences between two texts. * Run a faster slightly less optimal diff * This method allows the 'checklines' of diff_main() to be optional. * Most of the time checklines is wanted, so default to true. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ public LinkedList<Diff> diff_main(String text1, String text2) { return diff_main(text1, text2, true); } /** * Find the differences between two texts. Simplifies the problem by * stripping any common prefix or suffix off the texts before diffing. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @param checklines Speedup flag. If false, then don't run a * line-level diff first to identify the changed areas. * If true, then run a faster slightly less optimal diff * @return Linked List of Diff objects. */ public LinkedList<Diff> diff_main(String text1, String text2, boolean checklines) { // Check for equality (speedup) LinkedList<Diff> diffs; if (text1.equals(text2)) { diffs = new LinkedList<Diff>(); diffs.add(new Diff(Operation.EQUAL, text1)); return diffs; } // Trim off common prefix (speedup) int commonlength = diff_commonPrefix(text1, text2); String commonprefix = text1.substring(0, commonlength); text1 = text1.substring(commonlength); text2 = text2.substring(commonlength); // Trim off common suffix (speedup) commonlength = diff_commonSuffix(text1, text2); String commonsuffix = text1.substring(text1.length() - commonlength); text1 = text1.substring(0, text1.length() - commonlength); text2 = text2.substring(0, text2.length() - commonlength); // Compute the diff on the middle block diffs = diff_compute(text1, text2, checklines); // Restore the prefix and suffix if (commonprefix.length() != 0) { diffs.addFirst(new Diff(Operation.EQUAL, commonprefix)); } if (commonsuffix.length() != 0) { diffs.addLast(new Diff(Operation.EQUAL, commonsuffix)); } diff_cleanupMerge(diffs); return diffs; } /** * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @param checklines Speedup flag. If false, then don't run a * line-level diff first to identify the changed areas. * If true, then run a faster slightly less optimal diff * @return Linked List of Diff objects. */ protected LinkedList<Diff> diff_compute(String text1, String text2,boolean checklines) { LinkedList<Diff> diffs = new LinkedList<Diff>(); if (text1.length() == 0) { // Just add some text (speedup) diffs.add(new Diff(Operation.INSERT, text2)); return diffs; } if (text2.length() == 0) { // Just delete some text (speedup) diffs.add(new Diff(Operation.DELETE, text1)); return diffs; } String longtext = text1.length() > text2.length() ? text1 : text2; String shorttext = text1.length() > text2.length() ? text2 : text1; int i = longtext.indexOf(shorttext); if (i != -1) { // Shorter text is inside the longer text (speedup) Operation op = (text1.length() > text2.length()) ? Operation.DELETE : Operation.INSERT; diffs.add(new Diff(op, longtext.substring(0, i))); diffs.add(new Diff(Operation.EQUAL, shorttext)); diffs.add(new Diff(op, longtext.substring(i + shorttext.length()))); return diffs; } longtext = shorttext = null; // Garbage collect. // Check to see if the problem can be split in two. String[] hm = diff_halfMatch(text1, text2); if (hm != null) { // A half-match was found, sort out the return data. String text1_a = hm[0]; String text1_b = hm[1]; String text2_a = hm[2]; String text2_b = hm[3]; String mid_common = hm[4]; // Send both pairs off for separate processing. LinkedList<Diff> diffs_a = diff_main(text1_a, text2_a, checklines); LinkedList<Diff> diffs_b = diff_main(text1_b, text2_b, checklines); // Merge the results. diffs = diffs_a; diffs.add(new Diff(Operation.EQUAL, mid_common)); diffs.addAll(diffs_b); return diffs; } // Perform a real diff. if (checklines && (text1.length() < 100 || text2.length() < 100)) { checklines = false; // Too trivial for the overhead. } List<String> linearray = null; if (checklines) { // Scan the text on a line-by-line basis first. LinesToCharsResult b = diff_linesToChars(text1, text2); text1 = b.chars1; text2 = b.chars2; linearray = b.lineArray; } diffs = diff_map(text1, text2); if (diffs == null) { // No acceptable result. diffs = new LinkedList<Diff>(); diffs.add(new Diff(Operation.DELETE, text1)); diffs.add(new Diff(Operation.INSERT, text2)); } if (checklines) { // Convert the diff back to original text. diff_charsToLines(diffs, linearray); // Eliminate freak matches (e.g. blank lines) diff_cleanupSemantic(diffs); // Rediff any replacement blocks, this time character-by-character. // Add a dummy entry at the end. diffs.add(new Diff(Operation.EQUAL, "")); int count_delete = 0; int count_insert = 0; String text_delete = ""; String text_insert = ""; ListIterator<Diff> pointer = diffs.listIterator(); Diff thisDiff = pointer.next(); while (thisDiff != null) { switch (thisDiff.operation) { case INSERT: count_insert++; text_insert += thisDiff.text; break; case DELETE: count_delete++; text_delete += thisDiff.text; break; case EQUAL: // Upon reaching an equality, check for prior redundancies. if (count_delete >= 1 && count_insert >= 1) { // Delete the offending records and add the merged ones. pointer.previous(); for (int j = 0; j < count_delete + count_insert; j++) { pointer.previous(); pointer.remove(); } for (Diff newDiff : diff_main(text_delete, text_insert, false)) { pointer.add(newDiff); } } count_insert = 0; count_delete = 0; text_delete = ""; text_insert = ""; break; } thisDiff = pointer.hasNext() ? pointer.next() : null; } diffs.removeLast(); // Remove the dummy entry at the end. } return diffs; } /** * Split two texts into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text1 First string. * @param text2 Second string. * @return An object containing the encoded text1, the encoded text2 and * the List of unique strings. The zeroth element of the List of * unique strings is intentionally blank. */ protected LinesToCharsResult diff_linesToChars(String text1, String text2) { List<String> lineArray = new ArrayList<String>(); Map<String, Integer> lineHash = new HashMap<String, Integer>(); // e.g. linearray[4] == "Hello\n" // e.g. linehash.get("Hello\n") == 4 // "\x00" is a valid character, but various debuggers don't like it. // So we'll insert a junk entry to avoid generating a null character. lineArray.add(""); String chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); String chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); return new LinesToCharsResult(chars1, chars2, lineArray); } /** * Split a text into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text String to encode. * @param lineArray List of unique strings. * @param lineHash Map of strings to indices. * @return Encoded string. */ private String diff_linesToCharsMunge(String text, List<String> lineArray, Map<String, Integer> lineHash) { int lineStart = 0; int lineEnd = -1; String line; StringBuilder chars = new StringBuilder(); // Walk the text, pulling out a substring for each line. // text.split('\n') would would temporarily double our memory footprint. // Modifying text would create many large strings to garbage collect. while (lineEnd < text.length() - 1) { lineEnd = text.indexOf('\n', lineStart); if (lineEnd == -1) { lineEnd = text.length() - 1; } line = text.substring(lineStart, lineEnd + 1); lineStart = lineEnd + 1; if (lineHash.containsKey(line)) { chars.append(String.valueOf((char) (int) lineHash.get(line))); }