Java源码赏析(五)再识 String 类
在 Java源码赏析(三)初识 String 类 中,我们已经大概理解了String的接口,接下来我们描述一下String的常用工具方法。
/**
* 为了精简的String结构,之前提到的方法省略,重点介绍剩余的方法(省略构造方法、indexOf、lastIndexOf、valueOf) */ public final class String implements java.io.Serializable, Comparable<String>, CharSequence { private final char value[]; private int hash; // Default to 0 private static final long serialVersionUID = -6849794470754667710L; /** 指明需要实例化的字段 */ private static final ObjectStreamField[] serialPersistentFields = new ObjectStreamField[0]; /** 省略CaseInsensitiveComparator()的实现,主要是用于按ASCII码的排序规则进行排序 */ public static final Comparator<String> CASE_INSENSITIVE_ORDER = new CaseInsensitiveComparator(); /** 实现Comparable<String> */ public int compareTo(String anotherString) { ... } /** 覆盖hashCode() */ public int hashCode() { ... } /** 覆盖父类equal() */ public boolean equals(Object anObject) { ... } /** 覆盖父类toString() */ public String toString() { return this; } /** * 实现CharSequence接口 * 共有length(), charAt(int index), subSequence(int beginIndex, int endIndex) 等 * 在实现subSequence()方法时使用了String中substring()方法 */ /** 获取字符串长度 */ public int length() { return value.length; } /** 获取index位置的字符 */ public char charAt(int index) { ... } /** 获取子字符序列 */ public CharSequence subSequence(int beginIndex, int endIndex) { return this.substring(beginIndex, endIndex); } /** 获取子串 */ public String substring(int beginIndex, int endIndex) { ... } /** 将字符串存入常量池并返回在常量池的引用 */ public native String intern(); /**分隔线*/
将字符串str连接至末尾,创建一个新字符串*/ public String concat(String str) { int otherLen = str.length(); if (otherLen == 0) { return this; } int len = value.length; char buf[] = Arrays.copyOf(value, len + otherLen); str.getChars(buf, len); return new String(buf, true); } /** 将oldChar字符替换为newChar字符*/ public String replace(char oldChar, char newChar) { if (oldChar != newChar) { int len = value.length; int i = -1; char[] val = value; /* avoid getfield opcode */ while (++i < len) { if (val[i] == oldChar) { break; } } if (i < len) { char buf[] = new char[len]; for (int j = 0; j < i; j++) { buf[j] = val[j]; } while (i < len) { char c = val[i]; buf[i] = (c == oldChar) ? newChar : c; i++; } return new String(buf, true); } } return this; } /** 返回是否被regex匹配*/ public boolean matches(String regex) { return Pattern.matches(regex, this); } /** 查看字符串s是否被包含*/ public boolean contains(CharSequence s) { return indexOf(s.toString()) > -1; } /** 替换第一个根据regex匹配到的字符串为replacement*/ public String replaceFirst(String regex, String replacement) { return Pattern.compile(regex).matcher(this).replaceFirst(replacement); } /** 将regex匹配到的内容全部替换为replacement*/ public String replaceAll(String regex, String replacement) { return Pattern.compile(regex).matcher(this).replaceAll(replacement); } /** 将字符串target替换为replacement*/ public String replace(CharSequence target, CharSequence replacement) { return Pattern.compile(target.toString(), Pattern.LITERAL).matcher( this).replaceAll(Matcher.quoteReplacement(replacement.toString())); } /** 根据regex分割limit次字符串*/ public String[] split(String regex, int limit) { /* fastpath if the regex is a (1)one-char String and this character is not one of the RegEx's meta characters ".$|()[{^?*+\\", or (2)two-char String and the first char is the backslash and the second is not the ascii digit or ascii letter. */ char ch = 0; if (((regex.value.length == 1 && ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) || (regex.length() == 2 && regex.charAt(0) == '\\' && (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 && ((ch-'a')|('z'-ch)) < 0 && ((ch-'A')|('Z'-ch)) < 0)) && (ch < Character.MIN_HIGH_SURROGATE || ch > Character.MAX_LOW_SURROGATE)) { int off = 0; int next = 0; boolean limited = limit > 0; ArrayList<String> list = new ArrayList<>(); while ((next = indexOf(ch, off)) != -1) { if (!limited || list.size() < limit - 1) { list.add(substring(off, next)); off = next + 1; } else { // last one //assert (list.size() == limit - 1); list.add(substring(off, value.length)); off = value.length; break; } } // If no match was found, return this if (off == 0) return new String[]{this}; // Add remaining segment if (!limited || list.size() < limit) list.add(substring(off, value.length)); // Construct result int resultSize = list.size(); if (limit == 0) { while (resultSize > 0 && list.get(resultSize - 1).length() == 0) { resultSize--; } } String[] result = new String[resultSize]; return list.subList(0, resultSize).toArray(result); } return Pattern.compile(regex).split(this, limit); } /** 根据regex分隔字符串*/ public String[] split(String regex) { return split(regex, 0); } /** 将elements字符串数组中插入delimiter字符串 比如 String.join(",", {"a", "b", "c"}) == "a,b,c" */ public static String join(CharSequence delimiter, CharSequence... elements) { Objects.requireNonNull(delimiter); Objects.requireNonNull(elements); // Number of elements not likely worth Arrays.stream overhead. StringJoiner joiner = new StringJoiner(delimiter); for (CharSequence cs: elements) { joiner.add(cs); } return joiner.toString(); } /** 同上,数组改为了集合*/ public static String join(CharSequence delimiter, Iterable<? extends CharSequence> elements) { Objects.requireNonNull(delimiter); Objects.requireNonNull(elements); StringJoiner joiner = new StringJoiner(delimiter); for (CharSequence cs: elements) { joiner.add(cs); } return joiner.toString(); } /** 全部转化为小写字母*/ public String toLowerCase(Locale locale) { if (locale == null) { throw new NullPointerException(); } int firstUpper; final int len = value.length; /* Now check if there are any characters that need to be changed. */ scan: { for (firstUpper = 0 ; firstUpper < len; ) { char c = value[firstUpper]; if ((c >= Character.MIN_HIGH_SURROGATE) && (c <= Character.MAX_HIGH_SURROGATE)) { int supplChar = codePointAt(firstUpper); if (supplChar != Character.toLowerCase(supplChar)) { break scan; } firstUpper += Character.charCount(supplChar); } else { if (c != Character.toLowerCase(c)) { break scan; } firstUpper++; } } return this; } char[] result = new char[len]; int resultOffset = 0; /* result may grow, so i+resultOffset * is the write location in result */ /* Just copy the first few lowerCase characters. */ System.arraycopy(value, 0, result, 0, firstUpper); String lang = locale.getLanguage(); boolean localeDependent = (lang == "tr" || lang == "az" || lang == "lt"); char[] lowerCharArray; int lowerChar; int srcChar; int srcCount; for (int i = firstUpper; i < len; i += srcCount) { srcChar = (int)value[i]; if ((char)srcChar >= Character.MIN_HIGH_SURROGATE && (char)srcChar <= Character.MAX_HIGH_SURROGATE) { srcChar = codePointAt(i); srcCount = Character.charCount(srcChar); } else { srcCount = 1; } if (localeDependent || srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale); } else { lowerChar = Character.toLowerCase(srcChar); } if ((lowerChar == Character.ERROR) || (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) { if (lowerChar == Character.ERROR) { lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale); } else if (srcCount == 2) { resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount; continue; } else { lowerCharArray = Character.toChars(lowerChar); } /* Grow result if needed */ int mapLen = lowerCharArray.length; if (mapLen > srcCount) { char[] result2 = new char[result.length + mapLen - srcCount]; System.arraycopy(result, 0, result2, 0, i + resultOffset); result = result2; } for (int x = 0; x < mapLen; ++x) { result[i + resultOffset + x] = lowerCharArray[x]; } resultOffset += (mapLen - srcCount); } else { result[i + resultOffset] = (char)lowerChar; } } return new String(result, 0, len + resultOffset); } /** 全部转化为小写字母*/ public String toLowerCase() { return toLowerCase(Locale.getDefault()); } /** 全部转化为大写字母*/ public String toUpperCase(Locale locale) { if (locale == null) { throw new NullPointerException(); } int firstLower; final int len = value.length; /* Now check if there are any characters that need to be changed. */ scan: { for (firstLower = 0 ; firstLower < len; ) { int c = (int)value[firstLower]; int srcCount; if ((c >= Character.MIN_HIGH_SURROGATE) && (c <= Character.MAX_HIGH_SURROGATE)) { c = codePointAt(firstLower); srcCount = Character.charCount(c); } else { srcCount = 1; } int upperCaseChar = Character.toUpperCaseEx(c); if ((upperCaseChar == Character.ERROR) || (c != upperCaseChar)) { break scan; } firstLower += srcCount; } return this; } /* result may grow, so i+resultOffset is the write location in result */ int resultOffset = 0; char[] result = new char[len]; /* may grow */ /* Just copy the first few upperCase characters. */ System.arraycopy(value, 0, result, 0, firstLower); String lang = locale.getLanguage(); boolean localeDependent = (lang == "tr" || lang == "az" || lang == "lt"); char[] upperCharArray; int upperChar; int srcChar; int srcCount; for (int i = firstLower; i < len; i += srcCount) { srcChar = (int)value[i]; if ((char)srcChar >= Character.MIN_HIGH_SURROGATE && (char)srcChar <= Character.MAX_HIGH_SURROGATE) { srcChar = codePointAt(i); srcCount = Character.charCount(srcChar); } else { srcCount = 1; } if (localeDependent) { upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale); } else { upperChar = Character.toUpperCaseEx(srcChar); } if ((upperChar == Character.ERROR) || (upperChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) { if (upperChar == Character.ERROR) { if (localeDependent) { upperCharArray = ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale); } else { upperCharArray = Character.toUpperCaseCharArray(srcChar); } } else if (srcCount == 2) { resultOffset += Character.toChars(upperChar, result, i + resultOffset) - srcCount; continue; } else { upperCharArray = Character.toChars(upperChar); } /* Grow result if needed */ int mapLen = upperCharArray.length; if (mapLen > srcCount) { char[] result2 = new char[result.length + mapLen - srcCount]; System.arraycopy(result, 0, result2, 0, i + resultOffset); result = result2; } for (int x = 0; x < mapLen; ++x) { result[i + resultOffset + x] = upperCharArray[x]; } resultOffset += (mapLen - srcCount); } else { result[i + resultOffset] = (char)upperChar; } } return new String(result, 0, len + resultOffset); } /** 全部转化为大写字母*/ public String toUpperCase() { return toUpperCase(Locale.getDefault()); } /** 去除字符串前后的空格*/ public String trim() { int len = value.length; int st = 0; char[] val = value; /* avoid getfield opcode */ while ((st < len) && (val[st] <= ' ')) { st++; } while ((st < len) && (val[len - 1] <= ' ')) { len--; } return ((st > 0) || (len < value.length)) ? substring(st, len) : this; } /** 转化为字符数组*/ public char[] toCharArray() { // Cannot use Arrays.copyOf because of class initialization order issues char result[] = new char[value.length]; System.arraycopy(value, 0, result, 0, value.length); return result; } /** 格式化字符串,便于输出*/ public static String format(String format, Object... args) { return new Formatter().format(format, args).toString(); } /** 使用指定的地区l,格式化字符串,便于输出*/ public static String format(Locale l, String format, Object... args) { return new Formatter(l).format(format, args).toString(); } /** 根据data字符数组从offset开始复制count个字符*/ public static String valueOf(char data[], int offset, int count) { return new String(data, offset, count); } /** 根据data字符数组从offset开始拷贝count个字符*/ public static String copyValueOf(char data[], int offset, int count) { return new String(data, offset, count); } /** 拷贝字符串的值*/ public static String copyValueOf(char data[]) { return new String(data); } /** 省略大部分的类型转化为字符串*/ public static String valueOf(boolean b) { return b ? "true" : "false"; } ... }