分享一个string.Join()的源码分析

首先看下源码:

        // Joins an array of strings together as one string with a separator between each original string.
        //
        [System.Security.SecuritySafeCritical]  // auto-generated
        public unsafe static String Join(String separator, String[] value, int startIndex, int count) {
            //Range check the array
            if (value == null)
                throw new ArgumentNullException("value");
 
            if (startIndex < 0)
                throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_StartIndex"));
            if (count < 0)
                throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_NegativeCount"));
 
            if (startIndex > value.Length - count)
                throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
            Contract.EndContractBlock();
 
            //Treat null as empty string.
            if (separator == null) {
                separator = String.Empty;
            }
 
            //If count is 0, that skews a whole bunch of the calculations below, so just special case that.
            if (count == 0) {
                return String.Empty;
            }
            
            int jointLength = 0;
            //Figure out the total length of the strings in value
            int endIndex = startIndex + count - 1;
            for (int stringToJoinIndex = startIndex; stringToJoinIndex <= endIndex; stringToJoinIndex++) {
                if (value[stringToJoinIndex] != null) {
                    jointLength += value[stringToJoinIndex].Length;
                }
            }
            
            //Add enough room for the separator.
            jointLength += (count - 1) * separator.Length;
 
            // Note that we may not catch all overflows with this check (since we could have wrapped around the 4gb range any number of times
            // and landed back in the positive range.) The input array might be modifed from other threads, 
            // so we have to do an overflow check before each append below anyway. Those overflows will get caught down there.
            if ((jointLength < 0) || ((jointLength + 1) < 0) ) {
                throw new OutOfMemoryException();
            }
 
            //If this is an empty string, just return.
            if (jointLength == 0) {
                return String.Empty;
            }
 
            string jointString = FastAllocateString( jointLength );
            fixed (char * pointerToJointString = &jointString.m_firstChar) {
                UnSafeCharBuffer charBuffer = new UnSafeCharBuffer( pointerToJointString, jointLength);                
                
                // Append the first string first and then append each following string prefixed by the separator.
                charBuffer.AppendString( value[startIndex] );
                for (int stringToJoinIndex = startIndex + 1; stringToJoinIndex <= endIndex; stringToJoinIndex++) {
                    charBuffer.AppendString( separator );
                    charBuffer.AppendString( value[stringToJoinIndex] );
                }
                Contract.Assert(*(pointerToJointString + charBuffer.Length) == '\0', "String must be null-terminated!");
            }
 
            return jointString;
        }

其他重载函数:

       [ComVisible(false)]
        public static String Join(String separator, IEnumerable<String> values) {
            if (values == null)
                throw new ArgumentNullException("values");
            Contract.Ensures(Contract.Result<String>() != null);
            Contract.EndContractBlock();
 
            if (separator == null)
                separator = String.Empty;
 
 
            using(IEnumerator<String> en = values.GetEnumerator()) {
                if (!en.MoveNext())
                    return String.Empty;
 
                StringBuilder result = StringBuilderCache.Acquire();
                if (en.Current != null) {
                    result.Append(en.Current);
                }
 
                while (en.MoveNext()) {
                    result.Append(separator);
                    if (en.Current != null) {
                        result.Append(en.Current);
                    }
                }            
                return StringBuilderCache.GetStringAndRelease(result);
            }           
        }
自己写的方法:
public static String MyJoin<T>(char seperator, IEnumerable<T> sourceList)
    {
        StringBuilder sb = new StringBuilder();
        if (sourceList == null && sourceList.Count() > 0)
        {
            throw new ArgumentNullException("sourceList");
        }
        if (seperator == ' ')
        {
            return string.Empty;
        }
        foreach (var item in sourceList)
        {
            sb.Append(item);
            sb.Append(seperator);
        }
        return sb.ToString().TrimEnd(seperator);
    }

这些是其他博客来的。对比之前手动实现的方法,发现自己写的代码看起来很挫,这个就是差距,String的Join方法中我们可以看到一下几个地方值得注意:

  1. 在方法的开始处,使用了Contract 这个类来进行验证协助代码的编写,这个在之前的文章中有所介绍,这里使用了后置条件判断,表示方法的返回值需要是string类型,并且不为空;还有就是在方法开始处做必要的参数合法性验证;在方法中及时判断,及时返回。
  2. 在实现中,使用了枚举器,C#中的foreach语句其实就是这种枚举器的语法糖,所以这里没有什么好说的,值得一提的是在while循环中的判断语句while(en.MoveNext) 很好的避免了我们方法中在字符串末尾添加多余的字符串,最后还要调用TrimEnd的这种无谓的内存开销。这其实也是do{…}while(..),和while(…){…}这两种循环体的差异体现。
  3. 实现中,没有直接new直接分配StringBuilder,在返回字符串时也没有直接使用ToString方法,而是使用了StringBuilderCache这个类,这个在之前翻译的.NET程序的性能要领和优化建议 这篇文章中有所介绍。

这个类一看就是对StringBuilder的缓存,因为对于一些小的字符串,创建StringBuilder也是一笔开销。StringBuilder的实现如下:

看下trimEnd的实现,内存开销
 // Removes a string of characters from the end of this string.
        public String TrimEnd(params char[] trimChars) {
            if (null==trimChars || trimChars.Length == 0) {
                return TrimHelper(TrimTail);
            }
            return TrimHelper(trimChars,TrimTail);
        }
[System.Security.SecuritySafeCritical]  // auto-generated
        private String TrimHelper(char[] trimChars, int trimType) {
            //end will point to the first non-trimmed character on the right
            //start will point to the first non-trimmed character on the Left
            int end = this.Length-1;
            int start=0;
 
            //Trim specified characters.
            if (trimType !=TrimTail)  {
                for (start=0; start < this.Length; start++) {
                    int i = 0;
                    char ch = this[start];
                    for( i = 0; i < trimChars.Length; i++) {
                        if( trimChars[i] == ch) break;
                    }
                    if( i == trimChars.Length) { // the character is not white space
                        break;  
                    }
                }
            }
            
            if (trimType !=TrimHead) {
                for (end= Length -1; end >= start;  end--) {
                    int i = 0;    
                    char ch = this[end];                    
                    for(i = 0; i < trimChars.Length; i++) {
                        if( trimChars[i] == ch) break;
                    }
                    if( i == trimChars.Length) { // the character is not white space
                        break;  
                    }                    
                }
            }
 
            return CreateTrimmedString(start, end);
        }
       [System.Security.SecurityCritical]  // auto-generated
        private String CreateTrimmedString(int start, int end) {
            //Create a new STRINGREF and initialize it from the range determined above.
            int len = end -start + 1;
            if (len == this.Length) {
                // Don't allocate a new string as the trimmed string has not changed.
                return this;
            }
 
            if( len == 0) {
                return String.Empty;
            }
            return InternalSubString(start, len);
        }
[System.Security.SecurityCritical]  // auto-generated
        unsafe string InternalSubString(int startIndex, int length) {
            Contract.Assert( startIndex >= 0 && startIndex <= this.Length, "StartIndex is out of range!");
            Contract.Assert( length >= 0 && startIndex <= this.Length - length, "length is out of range!");            
            
            String result = FastAllocateString(length);
 
            fixed(char* dest = &result.m_firstChar)
                fixed(char* src = &this.m_firstChar) {
                    wstrcpy(dest, src + startIndex, length);
                }
 
            return result;
        }

 



StringBuilderCache.GetStringAndRelease(result):
 public static string GetStringAndRelease(StringBuilder sb)
        {
            string result = sb.ToString();
            Release(sb);
            return result;
        }
Release(sb):
 public static void Release(StringBuilder sb)
        {
            if (sb.Capacity <= MAX_BUILDER_SIZE)
            {
                StringBuilderCache.CachedInstance = sb;
            }
        }

关于StringBuilderCache.CachedInstance:

 [ThreadStatic]
 private static StringBuilder CachedInstance;

这个是完整源码:

/*============================================================
**
** Class:  StringBuilderCache
**

  **目的:提供stringbuilder的缓存可重用实例
  **每个线程都是一种优化,可以减少
  **构造和收集的实例数。

**
**  Acquire - is used to get a string builder to use of a 
**            particular size.  It can be called any number of 
**            times, if a stringbuilder is in the cache then
**            it will be returned and the cache emptied.
**            subsequent calls will return a new stringbuilder.
**
**            A StringBuilder instance is cached in 
**            Thread Local Storage and so there is one per thread
**
**  Release - Place the specified builder in the cache if it is 
**            not too big.
**            The stringbuilder should not be used after it has 
**            been released.
**            Unbalanced Releases are perfectly acceptable.  It
**            will merely cause the runtime to create a new 
**            stringbuilder next time Acquire is called.
**
**  GetStringAndRelease
**          - ToString() the stringbuilder, Release it to the 
**            cache and return the resulting string
**
===========================================================*/
using System.Threading;
 
namespace System.Text
{
    internal static class StringBuilderCache
    {
        // The value 360 was chosen in discussion with performance experts as a compromise between using
        // as litle memory (per thread) as possible and still covering a large part of short-lived
        // StringBuilder creations on the startup path of VS designers.
        private const int MAX_BUILDER_SIZE = 360;
 
        [ThreadStatic]
        private static StringBuilder CachedInstance;
 
        public static StringBuilder Acquire(int capacity = StringBuilder.DefaultCapacity)
        {
            if(capacity <= MAX_BUILDER_SIZE)
            {
                StringBuilder sb = StringBuilderCache.CachedInstance;
                if (sb != null)
                {
                    // Avoid stringbuilder block fragmentation by getting a new StringBuilder
                    // when the requested size is larger than the current capacity
                    if(capacity <= sb.Capacity)
                    {
                        StringBuilderCache.CachedInstance = null;
                        sb.Clear();
                        return sb;
                    }
                }
            }
            return new StringBuilder(capacity);
        }
 
        public static void Release(StringBuilder sb)
        {
            if (sb.Capacity <= MAX_BUILDER_SIZE)
            {
                StringBuilderCache.CachedInstance = sb;
            }
        }
 
        public static string GetStringAndRelease(StringBuilder sb)
        {
            string result = sb.ToString();
            Release(sb);
            return result;
        }
    }
}

 

 
posted @ 2021-11-17 10:27  vba是最好的语言  阅读(170)  评论(0编辑  收藏  举报