分享一个string.Join()的源码分析
首先看下源码:
// Joins an array of strings together as one string with a separator between each original string. // [System.Security.SecuritySafeCritical] // auto-generated public unsafe static String Join(String separator, String[] value, int startIndex, int count) { //Range check the array if (value == null) throw new ArgumentNullException("value"); if (startIndex < 0) throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_StartIndex")); if (count < 0) throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_NegativeCount")); if (startIndex > value.Length - count) throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); Contract.EndContractBlock(); //Treat null as empty string. if (separator == null) { separator = String.Empty; } //If count is 0, that skews a whole bunch of the calculations below, so just special case that. if (count == 0) { return String.Empty; } int jointLength = 0; //Figure out the total length of the strings in value int endIndex = startIndex + count - 1; for (int stringToJoinIndex = startIndex; stringToJoinIndex <= endIndex; stringToJoinIndex++) { if (value[stringToJoinIndex] != null) { jointLength += value[stringToJoinIndex].Length; } } //Add enough room for the separator. jointLength += (count - 1) * separator.Length; // Note that we may not catch all overflows with this check (since we could have wrapped around the 4gb range any number of times // and landed back in the positive range.) The input array might be modifed from other threads, // so we have to do an overflow check before each append below anyway. Those overflows will get caught down there. if ((jointLength < 0) || ((jointLength + 1) < 0) ) { throw new OutOfMemoryException(); } //If this is an empty string, just return. if (jointLength == 0) { return String.Empty; } string jointString = FastAllocateString( jointLength ); fixed (char * pointerToJointString = &jointString.m_firstChar) { UnSafeCharBuffer charBuffer = new UnSafeCharBuffer( pointerToJointString, jointLength); // Append the first string first and then append each following string prefixed by the separator. charBuffer.AppendString( value[startIndex] ); for (int stringToJoinIndex = startIndex + 1; stringToJoinIndex <= endIndex; stringToJoinIndex++) { charBuffer.AppendString( separator ); charBuffer.AppendString( value[stringToJoinIndex] ); } Contract.Assert(*(pointerToJointString + charBuffer.Length) == '\0', "String must be null-terminated!"); } return jointString; }
其他重载函数:
[ComVisible(false)] public static String Join(String separator, IEnumerable<String> values) { if (values == null) throw new ArgumentNullException("values"); Contract.Ensures(Contract.Result<String>() != null); Contract.EndContractBlock(); if (separator == null) separator = String.Empty; using(IEnumerator<String> en = values.GetEnumerator()) { if (!en.MoveNext()) return String.Empty; StringBuilder result = StringBuilderCache.Acquire(); if (en.Current != null) { result.Append(en.Current); } while (en.MoveNext()) { result.Append(separator); if (en.Current != null) { result.Append(en.Current); } } return StringBuilderCache.GetStringAndRelease(result); } }
自己写的方法:
public static String MyJoin<T>(char seperator, IEnumerable<T> sourceList) { StringBuilder sb = new StringBuilder(); if (sourceList == null && sourceList.Count() > 0) { throw new ArgumentNullException("sourceList"); } if (seperator == ' ') { return string.Empty; } foreach (var item in sourceList) { sb.Append(item); sb.Append(seperator); } return sb.ToString().TrimEnd(seperator); }
这些是其他博客来的。对比之前手动实现的方法,发现自己写的代码看起来很挫,这个就是差距,String的Join方法中我们可以看到一下几个地方值得注意:
- 在方法的开始处,使用了Contract 这个类来进行验证协助代码的编写,这个在之前的文章中有所介绍,这里使用了后置条件判断,表示方法的返回值需要是string类型,并且不为空;还有就是在方法开始处做必要的参数合法性验证;在方法中及时判断,及时返回。
- 在实现中,使用了枚举器,C#中的foreach语句其实就是这种枚举器的语法糖,所以这里没有什么好说的,值得一提的是在while循环中的判断语句while(en.MoveNext) 很好的避免了我们方法中在字符串末尾添加多余的字符串,最后还要调用TrimEnd的这种无谓的内存开销。这其实也是do{…}while(..),和while(…){…}这两种循环体的差异体现。
- 实现中,没有直接new直接分配StringBuilder,在返回字符串时也没有直接使用ToString方法,而是使用了StringBuilderCache这个类,这个在之前翻译的.NET程序的性能要领和优化建议 这篇文章中有所介绍。
这个类一看就是对StringBuilder的缓存,因为对于一些小的字符串,创建StringBuilder也是一笔开销。StringBuilder的实现如下:
看下trimEnd的实现,内存开销
// Removes a string of characters from the end of this string. public String TrimEnd(params char[] trimChars) { if (null==trimChars || trimChars.Length == 0) { return TrimHelper(TrimTail); } return TrimHelper(trimChars,TrimTail); }
[System.Security.SecuritySafeCritical] // auto-generated private String TrimHelper(char[] trimChars, int trimType) { //end will point to the first non-trimmed character on the right //start will point to the first non-trimmed character on the Left int end = this.Length-1; int start=0; //Trim specified characters. if (trimType !=TrimTail) { for (start=0; start < this.Length; start++) { int i = 0; char ch = this[start]; for( i = 0; i < trimChars.Length; i++) { if( trimChars[i] == ch) break; } if( i == trimChars.Length) { // the character is not white space break; } } } if (trimType !=TrimHead) { for (end= Length -1; end >= start; end--) { int i = 0; char ch = this[end]; for(i = 0; i < trimChars.Length; i++) { if( trimChars[i] == ch) break; } if( i == trimChars.Length) { // the character is not white space break; } } } return CreateTrimmedString(start, end); }
[System.Security.SecurityCritical] // auto-generated private String CreateTrimmedString(int start, int end) { //Create a new STRINGREF and initialize it from the range determined above. int len = end -start + 1; if (len == this.Length) { // Don't allocate a new string as the trimmed string has not changed. return this; } if( len == 0) { return String.Empty; } return InternalSubString(start, len); }
[System.Security.SecurityCritical] // auto-generated unsafe string InternalSubString(int startIndex, int length) { Contract.Assert( startIndex >= 0 && startIndex <= this.Length, "StartIndex is out of range!"); Contract.Assert( length >= 0 && startIndex <= this.Length - length, "length is out of range!"); String result = FastAllocateString(length); fixed(char* dest = &result.m_firstChar) fixed(char* src = &this.m_firstChar) { wstrcpy(dest, src + startIndex, length); } return result; }
StringBuilderCache.GetStringAndRelease(result):
public static string GetStringAndRelease(StringBuilder sb) { string result = sb.ToString(); Release(sb); return result; }
Release(sb):
public static void Release(StringBuilder sb) { if (sb.Capacity <= MAX_BUILDER_SIZE) { StringBuilderCache.CachedInstance = sb; } }
关于StringBuilderCache.CachedInstance:
[ThreadStatic] private static StringBuilder CachedInstance;
这个是完整源码:
/*============================================================ ** ** Class: StringBuilderCache **
**目的:提供stringbuilder的缓存可重用实例
**每个线程都是一种优化,可以减少
**构造和收集的实例数。
** ** Acquire - is used to get a string builder to use of a ** particular size. It can be called any number of ** times, if a stringbuilder is in the cache then ** it will be returned and the cache emptied. ** subsequent calls will return a new stringbuilder. ** ** A StringBuilder instance is cached in ** Thread Local Storage and so there is one per thread ** ** Release - Place the specified builder in the cache if it is ** not too big. ** The stringbuilder should not be used after it has ** been released. ** Unbalanced Releases are perfectly acceptable. It ** will merely cause the runtime to create a new ** stringbuilder next time Acquire is called. ** ** GetStringAndRelease ** - ToString() the stringbuilder, Release it to the ** cache and return the resulting string ** ===========================================================*/ using System.Threading; namespace System.Text { internal static class StringBuilderCache { // The value 360 was chosen in discussion with performance experts as a compromise between using // as litle memory (per thread) as possible and still covering a large part of short-lived // StringBuilder creations on the startup path of VS designers. private const int MAX_BUILDER_SIZE = 360; [ThreadStatic] private static StringBuilder CachedInstance; public static StringBuilder Acquire(int capacity = StringBuilder.DefaultCapacity) { if(capacity <= MAX_BUILDER_SIZE) { StringBuilder sb = StringBuilderCache.CachedInstance; if (sb != null) { // Avoid stringbuilder block fragmentation by getting a new StringBuilder // when the requested size is larger than the current capacity if(capacity <= sb.Capacity) { StringBuilderCache.CachedInstance = null; sb.Clear(); return sb; } } } return new StringBuilder(capacity); } public static void Release(StringBuilder sb) { if (sb.Capacity <= MAX_BUILDER_SIZE) { StringBuilderCache.CachedInstance = sb; } } public static string GetStringAndRelease(StringBuilder sb) { string result = sb.ToString(); Release(sb); return result; } } }