1: class Program
2: {
3: static void Main(string[] args)
4: {
5: Program p = new Program();
6: List<char> result = p.FindMostFrequentlyUsedChars("hjkluuiuikjk");
7:
8: Console.ReadKey();
9: }
10:
11: /// <summary>
12: /// 判断字符串中是否存在重复字符
13: /// 该算法假设的前提条件:所有字符都是ASCII
14: /// 时间复杂度O(n),n=s.Length
15: /// 空间复杂度为常数O(256)
16: /// 时间复杂度已经最低了,空间复杂度还能有更优化的解法嘛?
17: /// 参见IsStringWithNoDuplicatedChar_Implemenation2
18: /// </summary>
19: public bool IsStringWithNoDuplicatedChar_Implemenation1(string s)
20: {
21: bool[] flags = new bool[256];
22: foreach (char c in s)
23: {
24: //错误处理:含有非ASCII码情况
25: if (c > 255)
26: {
27: throw new ArgumentException("string contains UNICODE char");
28: }
29:
30: if (flags[c])
31: {
32: return false;
33: }
34: flags[c] = true;
35: }
36: return true;
37: }
38:
39: /// <summary>
40: /// bit映射法,该解法对比IsStringWithNoDuplicatedChar_Implemenation1空间使用上更少了
41: /// </summary>
42: public bool IsStringWithNoDuplicatedChar_Implemenation2(string s)
43: {
44: //注意是Int32而不是Int,在64位机器上Int=Int64
45: Int32[] flags = new Int32[8];
46: foreach (char c in s)
47: {
48: //依旧需要该错误处理,因为假设前提是不变的,否则该算法无效
49: if (c > 255)
50: {
51: throw new ArgumentException("string contains UNICODE char");
52: }
53:
54: int index = c / 32;
55: int relative_position = c % 32;
56: if ((flags[index] & (1 << relative_position)) >0)
57: {
58: return false;
59: }
60: flags[index] |= (1 << relative_position);
61: }
62: return true;
63: }
64:
65: /// <summary>
66: /// 再次增进假设,假设所有字符都是介于a-z的小写字母
67: /// 26个字母可用一个32位的int映射
68: /// </summary>
69: public bool IsStringWithNoDuplicatedChar_Implemenation3(string s)
70: {
71: Int32 flag = 0;
72: foreach (char c in s)
73: {
74: if (c <='a' && c>='z')
75: {
76: throw new ArgumentException("string contains char out of a-z");
77: }
78:
79: int relative_position = c - 'a';
80: if ((flag & (1 << relative_position)) > 0)
81: {
82: return false;
83: }
84: flag |= (1 << relative_position);
85: }
86: return true;
87: }
88:
89:
90: /// <summary>
91: ///以上算法有意义麽?
92: ///很明显这些算法都基于一个共同的前提就是字符集的基数是ASCII码,
93: ///如果是Unicode,那么无无论是哪一种,都需要更大的空间
94: ///如果假设前提还是ASCII,其实也还有更好的思路:
95: ///如果字符串长度大于字符集范围,那么一定属于两种情况之一:
96: ///要么有重复字符,要麽输入非法
97: ///这便是IsStringWithNoDuplicatedChar_Implemenation4
98: /// </summary>
99: public bool IsStringWithNoDuplicatedChar_Implemenation4(string s)
100: {
101: if (s.Length > 256)
102: {
103: return false;
104: }
105: else
106: {
107: return IsStringWithNoDuplicatedChar_Implemenation2(s);
108: }
109: }
110:
111: /// <summary>
112: /// 基于输入是ASCII这个前提,
113: /// 长度大于256,根本不用判断,肯定有重复
114: /// 长度小于256,1-3算法其实对现代计算机来说简直是小菜一碟,不值一提
115: /// 所以更有可能的应用场景是找到最常出现的字符,这比判断是否有重复字符看起来更有价值一些
116: /// </summary>
117: public char? FindMostFrequentlyUsedChar(string s)
118: {
119: int[] counters = new int[256];
120: int most_used_times = 0;
121: char? most_used_char=null;
122: foreach (char c in s)
123: {
124: //错误处理:含有非ASCII码情况
125: if (c > 255)
126: {
127: throw new ArgumentException("string contains UNICODE char");
128: }
129:
130: counters[c]++;
131:
132: if (counters[c] > most_used_times)
133: {
134: most_used_times = counters[c];
135: most_used_char = c;
136: }
137: }
138: return most_used_char;
139: }
140:
141: /// <summary>
142: /// 处理FindMostFrequentlyUsedChar无法处理并列第一的情况
143: /// 时间复杂度依旧是O(n)
144: ///
145: /// </summary>
146: public List<char> FindMostFrequentlyUsedChars(string s)
147: {
148: List<char> most_used_chars = new List<char>();
149: int[] counters = new int[256];
150: int most_used_times = 0;
151: foreach (char c in s)
152: {
153: //错误处理:含有非ASCII码情况
154: if (c > 255)
155: {
156: throw new ArgumentException("string contains UNICODE char");
157: }
158:
159: counters[c]++;
160:
161: if (counters[c] > most_used_times)
162: {
163: most_used_chars.Clear();
164: most_used_times = counters[c];
165: most_used_chars.Add(c);
166: }
167: else if (counters[c] == most_used_times)
168: {
169: most_used_chars.Add(c);
170: }
171: }
172: return most_used_chars;
173: }
174: }