Delphi操作Unicode字符

===================================Delphi:===================================================
当前版本(2007)中的默认状态下, String 就是 AnsiString
在 Delphi 2009 中:
string = UnicodeString; (同样: PString = PUnicodeString;)
Char = WideChar; (同样: PChar = PWideChar;)

procedure
var
List: TStrings;
begin
List := TStringList.Create;
List.Text := str;
List.SaveToFile(FilePath, TEncoding.ASCII);
List.LoadFromFile(FilePath, TEncoding.ASCII); // TEncoding.UTF8;
Memo2.Lines := List;
List.Free;
end;

请教下万老师:我发现delphi2009里用idhttp来获取网页,如果网页源码是utf8的话可以直接用htmsrc:=idhttp.get(url),而如果是gb码的话只能用stream方式。但是用stream取得的gb编码网页怎样把他转成utf呢?如果不转的话在非中文操作系统下会乱码。
procedure TForm1.Button1Click(Sender: TObject);
var
stream1,stream2: TStringStream;
b: Byte;
bs: string;
begin
{建立第一个流, 使用默认的双字节编码; 流中的数据是 Memo 中的字符串}
stream1 := TStringStream.Create(Memo1.Text, 54936);
{把第一个流的十六进制编码显示在 Memo 中}
bs := '';
for b in stream1.Bytes do bs := Format(bs + '%2x ', [b]);
Memo1.Lines.Add(bs);
stream2 := TStringStream.Create(stream1.DataString, TEncoding.UTF8);
{把第二个流的十六进制编码显示在 Memo 中}
bs := '';
for b in stream2.Bytes do bs := Format(bs + '%2x ', [b]);
Memo1.Lines.Add(bs);
stream1.Free;
stream2.Free;

 

function Str_Gb2UniCode(text: string): String;
var
i,len: Integer;
cur: Integer;
t: String;
ws: WideString;
begin
Result := '';
ws := text;
len := Length(ws);
i := 1;
while i <= len do
begin
cur := Ord(ws[i]);
FmtStr(t,'%4.4X',[cur]);
Result := Result + t;
Inc(i);
end;
end;


function Unicode_str(text: string):string;
var
i,len: Integer;
ws: WideString;
begin
ws := '';
i := 1;
len := Length(text);
while i < len do
begin
ws := ws + Widechar(StrToInt('$' + Copy(text,i,4)));
i := i+4;
end;
Result := ws;
end;


procedure TForm1.Button1Click(Sender: TObject);
begin
ShowMessage(Str_Gb2UniCode('ÍòÒ»')); //4E074E00
ShowMessage(Unicode_str('4E074E00')); //ÍòÒ»
end;


Unicode和字符相互转化的函数
2009-11-09 14:52
// 将字符转化成Unicode
function AnsiToUnicode(Ansi: string):string;
var
s:string;
i:integer;
j,k:string[2];
a:array [1..1000] of char;
begin
s:='';
StringToWideChar(Ansi,@(a[1]),500);
i:=1;
while ((a[i]<>#0) or (a[i+1]<>#0)) do begin
j:=IntToHex(Integer(a[i]),2);
k:=IntToHex(Integer(a[i+1]),2);
s:=s+k+j;
i:=i+2;
end;
Result:=s;
end;

// 将Unicode转化成字符
function ReadHex(AString:string):integer;
begin
Result:=StrToInt('$'+AString)
end;

function UnicodeToAnsi(Unicode: string):string;
var
s:string;
i:integer;
j,k:string[2];
begin
i:=1;
s:='';
while i< if end; s:="s+Char(ReadHex(j))+Char(ReadHex(k));" i:="i+4;" k:="Copy(Unicode,i,2);" j:="Copy(Unicode,i+2,2);" begin do>'' then
s:=WideCharToString(PWideChar(s+#0#0#0#0))
else
s:='';
Result:=s;
end;


//WideChar 兼容了 AnsiChar 的 #0..#255; 但占用了 2 字节大小
//UniCode 字符 WideChar; 和 AnsiChar 不同, WideChar 是占 2 字节大小.
var
c: WideChar; {WideChar 的取值范围是: #0..#65535, 用十六进制表示是: #$0..#$FFFF}
begin
{WideChar 兼容了 AnsiChar 的 #0..#255; 但占用了 2 字节大小}
c := #65;
ShowMessage(c); {A}
ShowMessage(IntToStr(Length(c))); {1; 这是字符长度}
ShowMessage(IntToStr(SizeOf(c))); {2; 但占用 2 个字节}

Navigation: 问与答 >

汉字与多字节编码的转换


汉字与多字节编码的转换 - 回复 "不知道" 的问题

问题来源:

TEncoding.Default码(中的16位 CE D2 C3 C7 )如何转成汉字呢?

汉字为'我们';

--------------------------------------------------------------------------------
Delphi 2009 默认的编码是多字节编码(MBCS), Delphi 这样表示它: TEncoding.Default.

下面是多字节编码与汉字之间转换的例子:

--------------------------------------------------------------------------------

 


{汉字到多字节编码}

procedure TForm1.Button1Click(Sender: TObject);

var

stream: TStringStream;

b: Byte;

string;

begin

stream := TStringStream.Create('我们', TEncoding.Default);
s := '';
for b in stream.Bytes do s := Format('%s%x '
ShowMessage(s); {CE D2 C3 C7}
stream.Free;

end;

{多字节编码到汉字}

procedure TForm1.Button2Click(Sender: TObject);
var
stream: TStringStream;
begin

stream := TStringStream.Create;
stream.Size := 4;
stream.Bytes[0] := $CE;
stream.Bytes[1] := $D2;
stream.Bytes[2] := $C3;
stream.Bytes[3] := $C7;
ShowMessage(stream.DataString);
stream.Free;

end;

 

{把多字节编码的字符串转换到汉字}

procedure TForm1.Button3Click(Sender: TObject);
var
stream: TStringStream;
i: Integer;
begin
str := 'CED2C3C7';
stream := TStringStream.Create;
stream.Size := Length(str) div 2;
for i := 1to Length(str) do
if Odd(i) then stream.Bytes[i div 2] := StrToIntDef(Concat(#36,str[i],str[i+1]), 0);
ShowMessage(stream.DataString); {我们}
stream.Free;
end;
end.


获取所有汉字与 Unicode 的对照表
var
w: WideString;
i: Integer;
s: string;
List: TStringList;
begin
List := TStringList.Create;
for i := $4e00 to $9fa5 do
begin
s := #36 + IntToHex(i,4); {#36 是 $ 字符}
w := WideChar(i);
List.Add(s + '='
end;

List.SaveToFile('c:\temp\Unicode-Hz.txt');
List.Free;
end;

汉字与 Unicode 转换
{感谢 robin(xuebin418@163.com)提供}

//转换

functionstring): String;
var
i,len: Integer;
cur: Integer;
t: String;
ws: WideString;
begin
Result := '';
ws := text;
len := Length(ws);
i := 1
while i <= len do
begin
cur := Ord(ws[i]);
FmtStr(t,'%4.4X',[cur]);
Result := Result + t;
Inc(i);
end;

end;

 

//恢复
Unicode_str(text: string):string;
var
i,len: Integer;
ws: WideString;
begin
ws := '';
i := 1;
len := Length(text);
while i < len do
begin
ws := ws + Widechar(StrToInt('$' + Copy(text,i,4)));
i := i+4;
end;
Result := ws;
end;

 

//测试
procedure TForm1.Button1Click(Sender: TObject);
begin
ShowMessage(Str_Gb2UniCode('万一')); //4E074E00
ShowMessage(Unicode_str('4E074E00')); //万一
end;

Windows API 中的字符串对应这 Delphi 的 PChar(PAnsiChar); 在 API 中使用 Delphi 的字符串还是比较灵活的.
定长字符串不是 #0 结束的, 和 API 不好兼容, 一般不用于 API 中.
//赋值方法1: 给直接量

begin
SetWindowText(Handle, '新标题');
end;

--------------------------------------------------------------------------------

 

//赋值方法2: 定义它要的类型
var
p: PChar;
begin
p := '新标题';
SetWindowText(Handle, p);
end;

--------------------------------------------------------------------------------

 

//赋值方法3: 转换成它要的类型
var
str: string;
begin
str := '新标题';
SetWindowText(Handle, PChar(str));
end;

//赋值方法4: 用字符数组
var
arr: array[0..255] of Char;
begin
arr := '新标题';
SetWindowText(Handle, arr);
end;

--------------------------------------------------------------------------------

 

posted @ 2011-11-05 14:17  findumars  Views(19285)  Comments(3Edit  收藏  举报