Lazarus 字符集转换 Utf8ToAnsi,UTF8ToWinCP,UTF8ToSys,UTF8ToConsole
由于Lazarus从1.2版开始默认字符集就是UTF8,如果要转到系统正常显示或文本保存,就必须对字符集进行转换。Lazarus提供了很多函数。如题。
那么这里面有什么关系呢?
UTF8ToSys 需要 启用编译参数 –dDisableUTF8RTL,否则还是UTF8,如果变码还是Utf8ToAnsi,但做了基本的处理。
对应代码
function UTF8ToSys(const s: string): string; begin {$IFDEF UTF8_RTL} Result:=s; {$ELSE} if NeedRTLAnsi and (not IsASCII(s)) then Result:=UTF8ToAnsi(s) else Result:=s; {$ENDIF} end;
Utf8ToAnsi是freePascal 自带的,变码道Ansi ,在ustringh.inc文件里面
function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif} begin Result:=RawByteString(Utf8Decode(s)); end;
{$ifdef FPC_HAS_CPSTRING} RawByteString = type AnsiString(CP_NONE); {$else FPC_HAS_CPSTRING} RawByteString = ansistring; {$endif FPC_HAS_CPSTRING}
UTF8ToWinCP 是Lazarus 针对Windows 功能封包,不变化代码页,仅仅变化字符编码
{$ifdef WinCe} function UTF8ToWinCP(const s: string): string; inline; begin Result := Utf8ToSys(s); end; {$else} function UTF8ToWinCP(const s: string): string; // result has codepage CP_ACP var src: UnicodeString; len: LongInt; begin Result:=s; if IsASCII(Result) then begin {$ifdef FPC_HAS_CPSTRING} // prevent codepage conversion magic SetCodePage(RawByteString(Result), CP_ACP, False); {$endif} exit; end; src:=UTF8Decode(s); if src='' then exit; len:=WideCharToMultiByte(CP_ACP,0,PUnicodeChar(src),length(src),nil,0,nil,nil); SetLength(Result,len); if len>0 then begin WideCharToMultiByte(CP_ACP,0,PUnicodeChar(src),length(src),@Result[1],length(Result),nil,nil); {$ifdef FPC_HAS_CPSTRING} // prevent codepage conversion magic SetCodePage(RawByteString(Result), CP_ACP, False); {$endif} end; end; {$endif not wince}
UTF8ToConsole 是Lazarus 针对Windows 功能封包 ,变码后,同时会配置成默认代码页文本。
{$ifdef WinCe} function UTF8ToConsole(const s: string): string; // converts UTF8 to console string (used by Write, WriteLn) begin Result := UTF8ToSys(s); end; {$else} function UTF8ToConsole(const s: string): string; // converts UTF8 to console string (used by Write, WriteLn) var Dst: PChar; begin {$ifndef NO_CP_RTL} Result := UTF8ToWinCP(s); {$else NO_CP_RTL} Result := UTF8ToSys(s); // Kept for compatibility {$endif NO_CP_RTL} Dst := AllocMem((Length(Result) + 1) * SizeOf(Char)); if CharToOEM(PChar(Result), Dst) then Result := StrPas(Dst); FreeMem(Dst); {$ifndef NO_CP_RTL} SetCodePage(RawByteString(Result), CP_OEMCP, False); {$endif NO_CP_RTL} end; {$endif not WinCE}