概述
CharMatcher提供了多种对字符串处理的方法, 它的主要意图有:
1. 找到匹配的字符
2. 处理匹配的字符
CharMatcher内部主要实现包括两部分:
1. 实现了大量公用内部类, 用来方便用户对字符串做匹配: 例如 JAVA_DIGIT 匹配数字, JAVA_LETTER 匹配字母等等.
2. 实现了大量处理字符串的方法, 使用特定的CharMatcher可以对匹配到的字符串做出多种处理, 例如 remove(), replace(), trim(), retain()等等
CharMatcher本身是一个抽象类, 其中一些操作方法是抽象方法, 他主要依靠内部继承CharMatcher的内部子类来实现抽象方法和重写一些操作方法, 因为不同的匹配规则的这些操作方法具有不同的实现要求
常用方法介绍
默认实现类
CharMatcher本身提供了很多CharMatcher实现类,如下:
ANY: 匹配任何字符
ASCII: 匹配是否是ASCII字符
BREAKING_WHITESPACE: 匹配所有可换行的空白字符(不包括非换行空白字符,例如"\u00a0")
DIGIT: 匹配ASCII数字
INVISIBLE: 匹配所有看不见的字符
JAVA_DIGIT: 匹配UNICODE数字, 使用 Character.isDigit() 实现
JAVA_ISO_CONTROL: 匹配ISO控制字符, 使用 Charater.isISOControl() 实现
JAVA_LETTER: 匹配字母, 使用 Charater.isLetter() 实现
JAVA_LETTER_OR_DIGET: 匹配数字或字母
JAVA_LOWER_CASE: 匹配小写
JAVA_UPPER_CASE: 匹配大写
NONE: 不匹配所有字符
SINGLE_WIDTH: 匹配单字宽字符, 如中文字就是双字宽
WHITESPACE: 匹配所有空白字符
常用操作方法
CharMatcher is(char match): 返回匹配指定字符的Matcher
CharMatcher isNot(char match): 返回不匹配指定字符的Matcher
CharMatcher anyOf(CharSequence sequence): 返回匹配sequence中任意字符的Matcher
CharMatcher noneOf(CharSequence sequence): 返回不匹配sequence中任何一个字符的Matcher
CharMatcher inRange(char startInclusive, char endIncludesive): 返回匹配范围内任意字符的Matcher
CharMatcher forPredicate(Predicate<? super Charater> predicate): 返回使用predicate的apply()判断匹配的Matcher
CharMatcher negate(): 返回以当前Matcher判断规则相反的Matcher
CharMatcher and(CharMatcher other): 返回与other匹配条件组合做与来判断的Matcher
CharMatcher or(CharMatcher other): 返回与other匹配条件组合做或来判断的Matcher
boolean matchesAnyOf(CharSequence sequence): 只要sequence中有任意字符能匹配Matcher,返回true
boolean matchesAllOf(CharSequence sequence): sequence中所有字符都能匹配Matcher,返回true
boolean matchesNoneOf(CharSequence sequence): sequence中所有字符都不能匹配Matcher,返回true
int indexIn(CharSequence sequence): 返回sequence中匹配到的第一个字符的坐标
int indexIn(CharSequence sequence, int start): 返回从start开始,在sequence中匹配到的第一个字符的坐标
int lastIndexIn(CharSequence sequence): 返回sequence中最后一次匹配到的字符的坐标
int countIn(CharSequence sequence): 返回sequence中匹配到的字符计数
String removeFrom(CharSequence sequence): 删除sequence中匹配到到的字符并返回
String retainFrom(CharSequence sequence): 保留sequence中匹配到的字符并返回
String replaceFrom(CharSequence sequence, char replacement): 替换sequence中匹配到的字符并返回
String trimFrom(CharSequence sequence): 删除首尾匹配到的字符并返回
String trimLeadingFrom(CharSequence sequence): 删除首部匹配到的字符
String trimTrailingFrom(CharSequence sequence): 删除尾部匹配到的字符
String collapseFrom(CharSequence sequence, char replacement): 将匹配到的组(连续匹配的字符)替换成replacement
String trimAndCollapseFrom(CharSequence sequence, char replacement): 先trim在replace
部分实现源码介绍
下面对CharMatcher的常用的操作方法实现做一些介绍
/** * 返回一个与当前Matcher匹配规则相反的Matcher */ public CharMatcher negate() { final CharMatcher original = this; return new CharMatcher(original + ".negate()") { @Override public boolean matches(char c) { return !original.matches(c); } @Override public boolean matchesAllOf(CharSequence sequence) { return original.matchesNoneOf(sequence); } @Override public boolean matchesNoneOf(CharSequence sequence) { return original.matchesAllOf(sequence); } @Override public int countIn(CharSequence sequence) { return sequence.length() - original.countIn(sequence); } @Override public CharMatcher negate() { return original; } }; } /** * 返回一个具有组合规则链的Matcher */ public CharMatcher and(CharMatcher other) { return new And(this, checkNotNull(other)); } /** * And的实现和Ordering的Compound是一样的 * 使用一个内部子类继承Matcher,然后内部使用组合的方式将 * 多个Matcher组合在一起,调用操作方法的时候依次调用这些 * Matcher的同名操作方法即可 */ private static class And extends CharMatcher { final CharMatcher first; final CharMatcher second; And(CharMatcher a, CharMatcher b) { this(a, b, "CharMatcher.and(" + a + ", " + b + ")"); } And(CharMatcher a, CharMatcher b, String description) { super(description); first = checkNotNull(a); second = checkNotNull(b); } @Override public CharMatcher and(CharMatcher other) { return new And(this, other); } @Override public boolean matches(char c) { return first.matches(c) && second.matches(c); } @Override CharMatcher withToString(String description) { return new And(first, second, description); } } /** * Or的实现与And一样,不再赘述 */ public CharMatcher or(CharMatcher other) { return new Or(this, checkNotNull(other)); } private static class Or extends CharMatcher { final CharMatcher first; final CharMatcher second; Or(CharMatcher a, CharMatcher b, String description) { super(description); first = checkNotNull(a); second = checkNotNull(b); } Or(CharMatcher a, CharMatcher b) { this(a, b, "CharMatcher.or(" + a + ", " + b + ")"); } @Override public CharMatcher or(CharMatcher other) { return new Or(this, checkNotNull(other)); } @Override public boolean matches(char c) { return first.matches(c) || second.matches(c); } @Override CharMatcher withToString(String description) { return new Or(first, second, description); } } /** * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to * query than the original; your mileage may vary. Precomputation takes time and is likely to be * worthwhile only if the precomputed matcher is queried many thousands of times. * * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a * worthwhile tradeoff in a browser. */ public CharMatcher precomputed() { return Platform.precomputeCharMatcher(this); } /** * 使用最慢的方式来返回字符全集中所有能被Matcher匹配的字符 * 最慢的方式?! */ char[] slowGetChars() { char[] allChars = new char[65536]; int size = 0; for (int c = Character.MIN_VALUE; c <= Character.MAX_VALUE; c++) { if (matches((char) c)) { allChars[size++] = (char) c; } } char[] retValue = new char[size]; System.arraycopy(allChars, 0, retValue, 0, size); return retValue; } /** * 只要sequence有任意字符匹配Matcher,则返回true */ public boolean matchesAnyOf(CharSequence sequence) { return !matchesNoneOf(sequence); } /** * 如果sequence所有字符都匹配Matcher,则返回true */ public boolean matchesAllOf(CharSequence sequence) { for (int i = sequence.length() - 1; i >= 0; i--) { if (!matches(sequence.charAt(i))) { return false; } } return true; } /** * 如果sequence所有字符都不匹配Matcher,则返回true */ public boolean matchesNoneOf(CharSequence sequence) { return indexIn(sequence) == -1; } /** * 返回Matcher在sequence中匹配到的第一个字符的坐标 * 没有匹配则返回 -1 */ public int indexIn(CharSequence sequence) { int length = sequence.length(); for (int i = 0; i < length; i++) { if (matches(sequence.charAt(i))) { return i; } } return -1; } /** * 返回Matcher在sequence中从start开始的匹配到的第一个字符的坐标 */ public int indexIn(CharSequence sequence, int start) { int length = sequence.length(); Preconditions.checkPositionIndex(start, length); for (int i = start; i < length; i++) { if (matches(sequence.charAt(i))) { return i; } } return -1; } /** * 返回sequence最后一次匹配到Matcher的坐标 */ public int lastIndexIn(CharSequence sequence) { for (int i = sequence.length() - 1; i >= 0; i--) { if (matches(sequence.charAt(i))) { return i; } } return -1; } /** * 返回Sequence匹配到Matcher的次数 */ public int countIn(CharSequence sequence) { int count = 0; for (int i = 0; i < sequence.length(); i++) { if (matches(sequence.charAt(i))) { count++; } } return count; } /** * 删除sequence中匹配到的所有字符并返回 */ @CheckReturnValue public String removeFrom(CharSequence sequence) { String string = sequence.toString(); int pos = indexIn(string); if (pos == -1) { return string; } char[] chars = string.toCharArray(); int spread = 1; // This unusual loop comes from extensive benchmarking // 位移删除算法, 使用了双层循环和break OUT 写法 OUT: while (true) { pos++; while (true) { if (pos == chars.length) { break OUT; } if (matches(chars[pos])) { break; } chars[pos - spread] = chars[pos]; pos++; } spread++; } return new String(chars, 0, pos - spread); } /** * 保留所有匹配的Matcher的字符并返回 * 使用逆向的Matcher的removeFrom()实现 */ @CheckReturnValue public String retainFrom(CharSequence sequence) { return negate().removeFrom(sequence); } /** * 将所有匹配到Matcher的字符换成指定字符 */ @CheckReturnValue public String replaceFrom(CharSequence sequence, char replacement) { String string = sequence.toString(); int pos = indexIn(string); if (pos == -1) { return string; } char[] chars = string.toCharArray(); chars[pos] = replacement; for (int i = pos + 1; i < chars.length; i++) { if (matches(chars[i])) { chars[i] = replacement; } } return new String(chars); } /** * 将所有可以匹配到的字符换成指定字符串 * 他的实现与替换成字符不相同,他是使用indexIn和StringBuilder实现的 */ @CheckReturnValue public String replaceFrom(CharSequence sequence, CharSequence replacement) { int replacementLen = replacement.length(); if (replacementLen == 0) { return removeFrom(sequence); } if (replacementLen == 1) { return replaceFrom(sequence, replacement.charAt(0)); } String string = sequence.toString(); int pos = indexIn(string); if (pos == -1) { return string; } int len = string.length(); StringBuilder buf = new StringBuilder((len * 3 / 2) + 16); int oldpos = 0; do { buf.append(string, oldpos, pos); buf.append(replacement); oldpos = pos + 1; pos = indexIn(string, oldpos); } while (pos != -1); buf.append(string, oldpos, len); return buf.toString(); } /** * 去除sequence首尾所有这个Matcher匹配的字符 */ @CheckReturnValue public String trimFrom(CharSequence sequence) { int len = sequence.length(); int first; int last; for (first = 0; first < len; first++) { if (!matches(sequence.charAt(first))) { break; } } for (last = len - 1; last > first; last--) { if (!matches(sequence.charAt(last))) { break; } } return sequence.subSequence(first, last + 1).toString(); } /** * 去掉sequence开头的所有Matcher能匹配的字符 */ @CheckReturnValue public String trimLeadingFrom(CharSequence sequence) { int len = sequence.length(); int first; for (first = 0; first < len; first++) { if (!matches(sequence.charAt(first))) { break; } } return sequence.subSequence(first, len).toString(); } /** * 删除字符串尾部所有能匹配Matcher的字符 */ @CheckReturnValue public String trimTrailingFrom(CharSequence sequence) { int len = sequence.length(); int last; for (last = len - 1; last >= 0; last--) { if (!matches(sequence.charAt(last))) { break; } } return sequence.subSequence(0, last + 1).toString(); } /** * 将所有能被Matcher匹配的组(连续匹配的字串)替换成指定字符 */ @CheckReturnValue public String collapseFrom(CharSequence sequence, char replacement) { int first = indexIn(sequence); if (first == -1) { return sequence.toString(); } // TODO(kevinb): see if this implementation can be made faster StringBuilder builder = new StringBuilder(sequence.length()) .append(sequence.subSequence(0, first)) .append(replacement); boolean in = true; for (int i = first + 1; i < sequence.length(); i++) { char c = sequence.charAt(i); if (matches(c)) { if (!in) { builder.append(replacement); in = true; } } else { builder.append(c); in = false; } } return builder.toString(); } /** * 先trim再Collapse */ @CheckReturnValue public String trimAndCollapseFrom(CharSequence sequence, char replacement) { int first = negate().indexIn(sequence); if (first == -1) { return ""; // everything matches. nothing's left. } StringBuilder builder = new StringBuilder(sequence.length()); boolean inMatchingGroup = false; for (int i = first; i < sequence.length(); i++) { char c = sequence.charAt(i); if (matches(c)) { inMatchingGroup = true; } else { if (inMatchingGroup) { builder.append(replacement); inMatchingGroup = false; } builder.append(c); } } return builder.toString(); } // Predicate interface /** * matches()的异名方法 */ @Override public boolean apply(Character character) { return matches(character); }
补完:
1. 提供的默认实现CharMatcher功能及介绍
2. 操作方法签名及功能列表
3. 使用代码示例