判断重复字符存在：更有意义一点

   1:      class Program

   2:      {

   3:          static void Main(string[] args)

   4:          {

   5:              Program p = new Program();

   6:              List<char> result = p.FindMostFrequentlyUsedChars("hjkluuiuikjk");

7:

   8:              Console.ReadKey();

   9:          }

10:

  11:          /// <summary>

  12:          /// 判断字符串中是否存在重复字符

  13:          /// 该算法假设的前提条件：所有字符都是ASCII

  14:          /// 时间复杂度O(n),n=s.Length

  15:          /// 空间复杂度为常数O(256)

  16:          /// 时间复杂度已经最低了，空间复杂度还能有更优化的解法嘛？

  17:          /// 参见IsStringWithNoDuplicatedChar_Implemenation2

  18:          /// </summary>

  19:          public bool IsStringWithNoDuplicatedChar_Implemenation1(string s)

  20:          {

  21:              bool[] flags = new bool[256];

  22:              foreach (char c in s)

  23:              {

  24:                  //错误处理：含有非ASCII码情况

  25:                  if (c > 255)

  26:                  {

  27:                      throw new ArgumentException("string contains UNICODE char");

  28:                  }

29:

  30:                  if (flags[c])

  31:                  {

  32:                      return false;

  33:                  }

  34:                  flags[c] = true;

  35:              }

  36:              return true;

  37:          }

38:

  39:          /// <summary>

  40:          /// bit映射法，该解法对比IsStringWithNoDuplicatedChar_Implemenation1空间使用上更少了

  41:          /// </summary>

  42:          public bool IsStringWithNoDuplicatedChar_Implemenation2(string s)

  43:          {

  44:              //注意是Int32而不是Int，在64位机器上Int=Int64

  45:              Int32[] flags = new Int32[8];

  46:              foreach (char c in s)

  47:              {

  48:                  //依旧需要该错误处理，因为假设前提是不变的,否则该算法无效

  49:                  if (c > 255)

  50:                  {

  51:                      throw new ArgumentException("string contains UNICODE char");

  52:                  }

53:

  54:                  int index = c / 32;

  55:                  int relative_position = c % 32;

  56:                  if ((flags[index] & (1 << relative_position)) >0)

  57:                  {

  58:                      return false;

  59:                  }

  60:                  flags[index] |= (1 << relative_position);

  61:              }

  62:              return true;

  63:          }

64:

  65:          /// <summary>

  66:          /// 再次增进假设，假设所有字符都是介于a-z的小写字母

  67:          /// 26个字母可用一个32位的int映射

  68:          /// </summary>

  69:          public bool IsStringWithNoDuplicatedChar_Implemenation3(string s)

  70:          {

  71:              Int32 flag = 0;

  72:              foreach (char c in s)

  73:              {

  74:                  if (c <='a' && c>='z')

  75:                  {

  76:                      throw new ArgumentException("string contains char out of a-z");

  77:                  }

78:

  79:                  int relative_position = c - 'a';

  80:                  if ((flag & (1 << relative_position)) > 0)

  81:                  {

  82:                      return false;

  83:                  }

  84:                  flag |= (1 << relative_position);

  85:              }

  86:              return true;

  87:          }

88:

89:

  90:          /// <summary>

  91:          ///以上算法有意义麽？

  92:          ///很明显这些算法都基于一个共同的前提就是字符集的基数是ASCII码，

  93:          ///如果是Unicode，那么无无论是哪一种，都需要更大的空间

  94:          ///如果假设前提还是ASCII,其实也还有更好的思路：

  95:          ///如果字符串长度大于字符集范围，那么一定属于两种情况之一：

  96:          ///要么有重复字符，要麽输入非法

  97:          ///这便是IsStringWithNoDuplicatedChar_Implemenation4

  98:          /// </summary>

  99:          public bool IsStringWithNoDuplicatedChar_Implemenation4(string s)

 100:          {

 101:              if (s.Length > 256)

 102:              {

 103:                  return false;

 104:              }

 105:              else

 106:              {

 107:                  return IsStringWithNoDuplicatedChar_Implemenation2(s);

 108:              }

 109:          }

 110:

 111:          /// <summary>

 112:          /// 基于输入是ASCII这个前提，

 113:          /// 长度大于256，根本不用判断，肯定有重复

 114:          /// 长度小于256，1-3算法其实对现代计算机来说简直是小菜一碟，不值一提

 115:          /// 所以更有可能的应用场景是找到最常出现的字符，这比判断是否有重复字符看起来更有价值一些

 116:          /// </summary>

 117:          public char? FindMostFrequentlyUsedChar(string s)

 118:          {

 119:              int[] counters = new int[256];

 120:              int most_used_times = 0;

 121:              char? most_used_char=null;

 122:              foreach (char c in s)

 123:              {

 124:                  //错误处理：含有非ASCII码情况

 125:                  if (c > 255)

 126:                  {

 127:                      throw new ArgumentException("string contains UNICODE char");

 128:                  }

 129:

 130:                  counters[c]++;

 131:

 132:                  if (counters[c] > most_used_times)

 133:                  {

 134:                      most_used_times = counters[c];

 135:                      most_used_char = c;

 136:                  }

 137:              }

 138:              return most_used_char;

 139:          }

 140:

 141:          /// <summary>

 142:          /// 处理FindMostFrequentlyUsedChar无法处理并列第一的情况

 143:          /// 时间复杂度依旧是O(n)

 144:          ///

 145:          /// </summary>

 146:          public List<char> FindMostFrequentlyUsedChars(string s)

 147:          {

 148:              List<char> most_used_chars = new List<char>();

 149:              int[] counters = new int[256];

 150:              int most_used_times = 0;

 151:              foreach (char c in s)

 152:              {

 153:                  //错误处理：含有非ASCII码情况

 154:                  if (c > 255)

 155:                  {

 156:                      throw new ArgumentException("string contains UNICODE char");

 157:                  }

 158:

 159:                  counters[c]++;

 160:

 161:                  if (counters[c] > most_used_times)

 162:                  {

 163:                      most_used_chars.Clear();

 164:                      most_used_times = counters[c];

 165:                      most_used_chars.Add(c);

 166:                  }

 167:                  else if (counters[c] == most_used_times)

 168:                  {

 169:                      most_used_chars.Add(c);

 170:                  }

 171:              }

 172:              return most_used_chars;

 173:          }

 174:      }

posted @ 2013-12-24 20:34 Dance With Automation Views(255) Comments(0) 收藏举报

刷新页面返回顶部

Dance With Automation