字符编码转换(内附字符串类似string.Format的具体实现)
下面的代码用来测试字符编码转换。
1 namespace CA_Encoding
2 {
3 using System;
4 using System.Text;
5
6 class Program
7 {
8 static void Main(string[] args)
9 {
10 // 1.Declare the encoding sets.
11 System.Text.Encoding[] encodings = {
12 Encoding.Unicode,
13 Encoding.UTF8,
14 Encoding.UTF7,
15 Encoding.UTF32,
16 Encoding.ASCII,
17 Encoding.BigEndianUnicode,
18 Encoding.Default
19 };
20 // 2.Declare a stard word.
21 // string text = "我是一棵小草, I am a Grass, Unë jam një Gras(阿尔巴尼亚语), 私は、グラス午前。";
22 string text = "我是一棵小草(I am a Grass)";
23
24 // 3.Output the text.
25 ConsoleColor defaultColor = Console.ForegroundColor;
26 Console.ForegroundColor = ConsoleColor.Blue;
27 Console.WriteLine("The text is : {0}", text);
28 Console.ForegroundColor = defaultColor;
29
30 // 4.For each to encoding or decoding.
31 for (int i = 0, length = encodings.Length; i < length; ++i)
32 {
33 Console.ForegroundColor = ConsoleColor.Cyan;
34 Console.WriteLine("{0}", encodings[i].EncodingName);
35
36 byte[] textBytes = encodings[i].GetBytes(text);
37 for (int j = 0; j < length; ++j)
38 {
39 Console.ForegroundColor = ConsoleColor.Yellow;
40 Console.Write("\t-> {0}:", encodings[j].EncodingName);
41 Console.ForegroundColor = defaultColor;
42 string decodingText = encodings[j].GetString(textBytes);
43 Console.WriteLine("{0}", decodingText);
44 }
45 }
46 }
47 }
48 }
49
2 {
3 using System;
4 using System.Text;
5
6 class Program
7 {
8 static void Main(string[] args)
9 {
10 // 1.Declare the encoding sets.
11 System.Text.Encoding[] encodings = {
12 Encoding.Unicode,
13 Encoding.UTF8,
14 Encoding.UTF7,
15 Encoding.UTF32,
16 Encoding.ASCII,
17 Encoding.BigEndianUnicode,
18 Encoding.Default
19 };
20 // 2.Declare a stard word.
21 // string text = "我是一棵小草, I am a Grass, Unë jam një Gras(阿尔巴尼亚语), 私は、グラス午前。";
22 string text = "我是一棵小草(I am a Grass)";
23
24 // 3.Output the text.
25 ConsoleColor defaultColor = Console.ForegroundColor;
26 Console.ForegroundColor = ConsoleColor.Blue;
27 Console.WriteLine("The text is : {0}", text);
28 Console.ForegroundColor = defaultColor;
29
30 // 4.For each to encoding or decoding.
31 for (int i = 0, length = encodings.Length; i < length; ++i)
32 {
33 Console.ForegroundColor = ConsoleColor.Cyan;
34 Console.WriteLine("{0}", encodings[i].EncodingName);
35
36 byte[] textBytes = encodings[i].GetBytes(text);
37 for (int j = 0; j < length; ++j)
38 {
39 Console.ForegroundColor = ConsoleColor.Yellow;
40 Console.Write("\t-> {0}:", encodings[j].EncodingName);
41 Console.ForegroundColor = defaultColor;
42 string decodingText = encodings[j].GetString(textBytes);
43 Console.WriteLine("{0}", decodingText);
44 }
45 }
46 }
47 }
48 }
49
下面则是以上代码的输出:
结论:
只要是编码不匹配的,或多或少都会出现问题。
另:
调用Windows API,可以查看当前系统语言设置。
下面的设置将影响代码的输出结果:
C++代码:
1 // Encoding.cpp : Defines the entry point for the console application.
2 //
3
4 #include "stdafx.h"
5 #include <iostream>
6 #include "windows.h"
7 #include <string>
8 // #include "mlang.h"
9
10 using namespace std;
11 int _tmain(int argc, _TCHAR* argv[])
12 {
13 LANGID langId = :: GetSystemDefaultLangID();
14 string displayName;
15 string defaultCharSet;
16 // http://msdn.microsoft.com/zh-cn/library/ms776260(en-us,VS.85).aspx
17 // http://msdn.microsoft.com/en-us/library/bb165625(VS.80).aspx
18 cout<<langId<<endl;
19 switch(langId){
20 case 0x804:
21 displayName = "Chinese (Simplified)";
22 defaultCharSet = "GB2312";
23 break;
24 }
25
26 cout << "DisplayName = " << displayName << ";" << endl << "Default CharSet = " << defaultCharSet << endl;
27 return 0;
28 }
29
30
2 //
3
4 #include "stdafx.h"
5 #include <iostream>
6 #include "windows.h"
7 #include <string>
8 // #include "mlang.h"
9
10 using namespace std;
11 int _tmain(int argc, _TCHAR* argv[])
12 {
13 LANGID langId = :: GetSystemDefaultLangID();
14 string displayName;
15 string defaultCharSet;
16 // http://msdn.microsoft.com/zh-cn/library/ms776260(en-us,VS.85).aspx
17 // http://msdn.microsoft.com/en-us/library/bb165625(VS.80).aspx
18 cout<<langId<<endl;
19 switch(langId){
20 case 0x804:
21 displayName = "Chinese (Simplified)";
22 defaultCharSet = "GB2312";
23 break;
24 }
25
26 cout << "DisplayName = " << displayName << ";" << endl << "Default CharSet = " << defaultCharSet << endl;
27 return 0;
28 }
29
30
最后,我们更换我们的C#代码,然后输出全部的Encoding(下载附件)
因为在命令行输出比较大,可能会死机,这里我改造代码,并将其写入到文本文件中。
1 namespace CA_Encoding
2 {
3 using System;
4 using System.Text;
5 using System.IO;
6
7 class Program
8 {
9 static void Main(string[] args)
10 {
11 // 1.Declare the encoding sets.
12 System.Text.Encoding[] encodings = {
13 Encoding.Unicode,
14 Encoding.UTF8,
15 Encoding.UTF7,
16 Encoding.UTF32,
17 Encoding.ASCII,
18 Encoding.BigEndianUnicode,
19 Encoding.Default
20 };
21 EncodingInfo[] encodingInfos = Encoding.GetEncodings();
22
23 // 2.Declare a stard word.
24 // string text = "我是一棵小草, I am a Grass, Unë jam një Gras(阿尔巴尼亚语), 私は、グラス午前。";
25 string text = "我是一棵小草(I am a Grass)";
26
27 // 3.Output the text.
28 WriteMeLine("The text is : {0}", text);
29
30 // 4.For each to encoding or decoding.
31 for (int i = 0, length = encodingInfos.Length; i < length; ++i)
32 {
33 WriteMeLine("{0},{1},{2},{3}", encodingInfos[i].GetEncoding().EncodingName,
34 encodingInfos[i].CodePage,
35 encodingInfos[i].DisplayName,
36 encodingInfos[i].Name
37 );
38
39 byte[] textBytes = encodingInfos[i].GetEncoding().GetBytes(text);
40 for (int j = 0; j < length; ++j)
41 {
42 WriteMe("\t-> {0}:", encodingInfos[j].GetEncoding().EncodingName);
43 string decodingText = encodingInfos[j].GetEncoding().GetString(textBytes);
44 WriteMeLine("{0}", decodingText);
45 }
46 }
47 }
48
49 static void WriteMeLine(string format, params object[] args)
50 {
51 WriteMe(format + "\r\n", args);
52 }
53 static void WriteMe(string format, params object[] args)
54 {
55 StringBuilder sb = new StringBuilder();
56 if (args != null && args.Length > 0)
57 {
58 for (int i = 0, length = format.Length; i < length; ++i)
59 {
60 if (format[i] == '{')
61 {
62 int j = i + 1;
63 while (SafeArrayIndex(ref format, j) && format[j] != '}')
64 ++j;
65 if (j != length)
66 {
67 string between = FetchBetween(ref format, i + 1, j);
68 int betweenInt = -1;
69 if (int.TryParse(between, out betweenInt) && betweenInt < args.Length)
70 {
71 sb.Append(string.Format("{0}", args[betweenInt]));
72 }
73 else
74 {
75 sb.Append(FetchBetween(ref format, i, j + 1));
76 }
77 i = j;
78 }
79 else
80 {
81 sb.Append(FetchBetween(ref format, i, j + 1));
82 }
83 }
84 else
85 {
86 sb.Append(format[i]);
87 }
88 }
89 }
90 WriteLog(sb.ToString());
91 }
92
93 static bool SafeArrayIndex(ref string format, int index)
94 {
95 if (index >= 0 && index < format.Length)
96 return true;
97 else
98 return false;
99 }
100
101 static string FetchBetween(ref string format, int begin, int end)
102 {
103 string result = null;
104 if (SafeArrayIndex(ref format, begin) && SafeArrayIndex(ref format, end - 1))
105 {
106 while (begin != end)
107 {
108 result += format[begin++];
109 }
110 }
111 return result;
112 }
113
114 static string logFile = null;
115 static void WriteLog(string msg)
116 {
117 FileStream fs = null;
118 try
119 {
120 //msg = string.Format("[{0}] {1}\r\n", System.DateTime.Now.ToString("yyyyMMdd HH:mm:ss.fff"), msg);
121
122 if (string.IsNullOrEmpty(logFile))
123 {
124 string path = string.Format("log{0}.txt", System.DateTime.Now.ToString("yyyyMMdd"));
125 if (!File.Exists(path))
126 {
127 fs = File.Create(path);
128 }
129 else
130 {
131 fs = File.Open(path, FileMode.Append);
132 }
133 logFile = path;
134 }
135 else
136 {
137 fs = File.Open(logFile, FileMode.Append);
138 }
139 if (fs != null && fs.CanWrite)
140 {
141 byte[] msgBytes = System.Text.Encoding.Default.GetBytes(msg);
142
143 fs.Write(msgBytes, 0, msgBytes.Length);
144 }
145 }
146 catch (Exception ex)
147 {
148 //
149 }
150 finally
151 {
152 if (fs != null)
153 fs.Close();
154 }
155 }
156 }
157 }
158
2 {
3 using System;
4 using System.Text;
5 using System.IO;
6
7 class Program
8 {
9 static void Main(string[] args)
10 {
11 // 1.Declare the encoding sets.
12 System.Text.Encoding[] encodings = {
13 Encoding.Unicode,
14 Encoding.UTF8,
15 Encoding.UTF7,
16 Encoding.UTF32,
17 Encoding.ASCII,
18 Encoding.BigEndianUnicode,
19 Encoding.Default
20 };
21 EncodingInfo[] encodingInfos = Encoding.GetEncodings();
22
23 // 2.Declare a stard word.
24 // string text = "我是一棵小草, I am a Grass, Unë jam një Gras(阿尔巴尼亚语), 私は、グラス午前。";
25 string text = "我是一棵小草(I am a Grass)";
26
27 // 3.Output the text.
28 WriteMeLine("The text is : {0}", text);
29
30 // 4.For each to encoding or decoding.
31 for (int i = 0, length = encodingInfos.Length; i < length; ++i)
32 {
33 WriteMeLine("{0},{1},{2},{3}", encodingInfos[i].GetEncoding().EncodingName,
34 encodingInfos[i].CodePage,
35 encodingInfos[i].DisplayName,
36 encodingInfos[i].Name
37 );
38
39 byte[] textBytes = encodingInfos[i].GetEncoding().GetBytes(text);
40 for (int j = 0; j < length; ++j)
41 {
42 WriteMe("\t-> {0}:", encodingInfos[j].GetEncoding().EncodingName);
43 string decodingText = encodingInfos[j].GetEncoding().GetString(textBytes);
44 WriteMeLine("{0}", decodingText);
45 }
46 }
47 }
48
49 static void WriteMeLine(string format, params object[] args)
50 {
51 WriteMe(format + "\r\n", args);
52 }
53 static void WriteMe(string format, params object[] args)
54 {
55 StringBuilder sb = new StringBuilder();
56 if (args != null && args.Length > 0)
57 {
58 for (int i = 0, length = format.Length; i < length; ++i)
59 {
60 if (format[i] == '{')
61 {
62 int j = i + 1;
63 while (SafeArrayIndex(ref format, j) && format[j] != '}')
64 ++j;
65 if (j != length)
66 {
67 string between = FetchBetween(ref format, i + 1, j);
68 int betweenInt = -1;
69 if (int.TryParse(between, out betweenInt) && betweenInt < args.Length)
70 {
71 sb.Append(string.Format("{0}", args[betweenInt]));
72 }
73 else
74 {
75 sb.Append(FetchBetween(ref format, i, j + 1));
76 }
77 i = j;
78 }
79 else
80 {
81 sb.Append(FetchBetween(ref format, i, j + 1));
82 }
83 }
84 else
85 {
86 sb.Append(format[i]);
87 }
88 }
89 }
90 WriteLog(sb.ToString());
91 }
92
93 static bool SafeArrayIndex(ref string format, int index)
94 {
95 if (index >= 0 && index < format.Length)
96 return true;
97 else
98 return false;
99 }
100
101 static string FetchBetween(ref string format, int begin, int end)
102 {
103 string result = null;
104 if (SafeArrayIndex(ref format, begin) && SafeArrayIndex(ref format, end - 1))
105 {
106 while (begin != end)
107 {
108 result += format[begin++];
109 }
110 }
111 return result;
112 }
113
114 static string logFile = null;
115 static void WriteLog(string msg)
116 {
117 FileStream fs = null;
118 try
119 {
120 //msg = string.Format("[{0}] {1}\r\n", System.DateTime.Now.ToString("yyyyMMdd HH:mm:ss.fff"), msg);
121
122 if (string.IsNullOrEmpty(logFile))
123 {
124 string path = string.Format("log{0}.txt", System.DateTime.Now.ToString("yyyyMMdd"));
125 if (!File.Exists(path))
126 {
127 fs = File.Create(path);
128 }
129 else
130 {
131 fs = File.Open(path, FileMode.Append);
132 }
133 logFile = path;
134 }
135 else
136 {
137 fs = File.Open(logFile, FileMode.Append);
138 }
139 if (fs != null && fs.CanWrite)
140 {
141 byte[] msgBytes = System.Text.Encoding.Default.GetBytes(msg);
142
143 fs.Write(msgBytes, 0, msgBytes.Length);
144 }
145 }
146 catch (Exception ex)
147 {
148 //
149 }
150 finally
151 {
152 if (fs != null)
153 fs.Close();
154 }
155 }
156 }
157 }
158
See also:
1、《Language Identifier Constants and Strings》http://msdn.microsoft.com/zh-cn/library/dd318693(en-us,VS.85).aspx
2、《如何取得 charset》http://topic.csdn.net/u/20091129/05/a897fdb7-6792-41a1-b703-81e408fb430b.html
3、《Locale Identifier Constants and Strings》http://msdn.microsoft.com/zh-cn/library/ms776260(en-us,VS.85).aspx
Download:
posted on 2009-12-18 03:03 volnet(可以叫我大V) 阅读(1608) 评论(0) 编辑 收藏 举报