原版的fpc/lazarus不支持UTF8(中文)变量、过程和函数,经debug发现,只需修改12个文件(fpcsrc7个文件:scanner.pas、sysstr.inc、parser.inc、options.pas、objcutil.pas、assemble.pas和dfmreader.pp,lazarus5个文件:keywordfunclists.pas、lresources.pp、lazstringutils.pas、propedits.pp和CustomCodeTool.pas,synhighlighterpas.pp)28处(原理很简单,将字符范围扩大)就能实现fpc/lazarus支持UTF8(中文)变量/过程/函数/中文控件名称,按以下步骤修改就可以。
经过多次改进,fpc/lazarus支持UTF8(中文)变量/过程/函数/UTF8(中文)控件名称已相对完善。
中文变量等的功能给需要的人多个选择,不喜勿喷!
2024.10.10 【原创】修复lazarus for windows的debug模式为GDB时不能正确显示中文字符的问题
2024.10.09 【原创】今天终于解决lazarus debug模式为GDB时中文变量显示出错的问题
2024.09.23 lazarus使用拼音首字母实现中文变量等快速代码补全
2024.09.20 实现lazarus中文代码补全功能
2024.09.19 修正中文控件名称时IDE代码提示出错的Bug
2024.09.18 为lazarus的IDE代码提示增加中文变量等拼音首字母模糊搜索中文的功能
2024.09.13 修复lazarus断点时不能显示中文变量的值和未定义的中文变量在Messages显示乱码的2个Bug
2023.6.01 终于解决了中文显示红色的问题
2023.2.21 增加支持中文控件名称
测试环境:
FPC:3.2.2,3.3.1 trunk
lazarus:Lazarus 2.2.2、2.2.4、2.2.6、3.0、3.2、3.4和3.6
第一步:修改FPC源码:
修改fpcsrc\compiler\scanner.pas以下4个位置。
1、第4054行:
procedure tscannerfile.readstring; var i : longint; err : boolean; begin err:=false; i:=0; repeat case c of '_', '0'..'9', 'A'..'Z' : begin if i<255 then begin
将'A'..'Z':修改为:'A'..'Z',#$80..#255 :
case c of '_', '0'..'9', 'A'..'Z',#$80..#255 ://2022.11.26 LBZ begin if i<255 then
2、第4729行:
{ Save current token position, for EOF its already loaded } if c<>#26 then gettokenpos; { Check first for a identifier/keyword, this is 20+% faster (PFV) } if c in ['A'..'Z','a'..'z','_'] then begin readstring; token:=_ID; idtoken:=_ID; { keyword or any other known token, pattern is always uppercased } if (pattern[1]<>'_') and (length(pattern) in [tokenlenmin..tokenlenmax]) then begin low:=ord(tokenidx^[length(pattern),pattern[1]].first); high:=ord(tokenidx^[length(pattern),pattern[1]].last); while low<high do begin
改为:
if (c in ['A'..'Z','a'..'z','_',#$80..#255]) then //2022.11.26 LBZ begin readstring; token:=_ID; idtoken:=_ID;
3、第4822行:
将
'&' : begin if [m_fpc,m_delphi] * current_settings.modeswitches <> [] then begin readnumber; if length(pattern)=1 then begin { does really an identifier follow? } if not (c in ['_','A'..'Z','a'..'z') then message2(scan_f_syn_expected,tokeninfo^[_ID].str,c); readstring; token:=_ID; idtoken:=_ID; end else token:=_INTCONST; goto exit_label; end else if m_mac in current_settings.modeswitches then begin readchar; token:=_AMPERSAND; goto exit_label; end else Illegal_Char(c); end;
改为:
if not ((c in ['_','A'..'Z','a'..'z',#$80..#255])) then //2022.11.26 LBZ message2(scan_f_syn_expected,tokeninfo^[_ID].str,c);
4、第5456行:
将
function tscannerfile.readpreproc:ttoken; var low,high,mid: longint; optoken: ttoken; begin skipspace; case c of '_', 'A'..'Z', 'a'..'z': begin readstring; optoken:=_ID; if (pattern[1]<>'_') and (length(pattern) in [tokenlenmin..tokenlenmax]) then begin low:=ord(tokenidx^[length(pattern),pattern[1]].first); high:=ord(tokenidx^[length(pattern),pattern[1]].last); while low<high do
改为:
case c of '_', 'A'..'Z', 'a'..'z' , #$80..#FF://2022.11.26 LBZ begin readstring; optoken:=_ID;
打开\fpcsrc\compiler\assemble.pas
修改第1321行:
'.','_', 'A'..'Z', 'a'..'z': begin pstart:=p; while not(p^ in [#0,' ','-','+']) do inc(p); len:=p-pstart; if len>255 then internalerror(200509187); hs[0]:=chr(len); move(pstart^,hs[1],len); sym:=objdata.symbolref(hs); { Second symbol? }
改为:
'a'..'z',#$80..#$FF :
打开fpcsrc\compiler\objcutil.pas
{ no special characters other than ':' } for i:=0 to len-1 do if (value_str[i] = ':') then gotcolon:=true else if not(value_str[i] in ['_','A'..'Z','a'..'z','0'..'9',':']) then exit; { if there is at least one colon, the final character must also be a colon (in case it's only one character that is a colon, this was already checked before the above loop) } if gotcolon and
修改第96行:
else if not(value_str[i] in ['_','A'..'Z','a'..'z','0'..'9',':',#$80..#$FF]) then
打开fpcsrc\compiler\options.pas
function is_identifier(const s: TCmdStr): boolean; var i: longint; begin result:=false; if (s='') or not (s[1] in ['A'..'Z','a'..'z','_']) then exit; for i:=2 to length(s) do if not (s[I] in ['A'..'Z','a'..'z','0'..'9','_']) then exit; result:=true; end;
修改第181行:
if (s='') or not (s[1] in ['A'..'Z','a'..'z','_',#$80..#$FF]) then
修改第184行:
if not (s[I] in ['A'..'Z','a'..'z','0'..'9','_',#$80..#$FF]) then
修改第2714行:
function GetName(var fn:TPathStr):TPathStr; var i : longint; begin i:=0; while (i<length(fn)) and (fn[i+1] in ['a'..'z','A'..'Z','0'..'9','_','-']) do inc(i); GetName:=Copy(fn,1,i); Delete(fn,1,i); end;
改为:
while (i<length(fn)) and (fn[i+1] in ['a'..'z','A'..'Z','0'..'9','_','-',#$80..#$FF]) do
打开fpcsrc\rtl\objpas\sysutils\sysstr.inc
修改第815行
将:
function IsValidIdent(const Ident: string; AllowDots: Boolean = False; StrictDots: Boolean = False): Boolean;
const
Alpha = ['A'..'Z', 'a'..'z', '_'];
AlphaNum = Alpha + ['0'..'9'];
Dot = '.';
var
First: Boolean;
I, Len: Integer;
begin
Len := Length(Ident);
改为:
Alpha = ['A'..'Z', 'a'..'z', '_',#$80..#$FF];
打开fpcsrc\rtl\objpas\classes\parser.inc
修改第82行:
function TParser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE} begin Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z']; end;
改为:
Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z',#$80..#$FF];
修改第444行:
function TParser.NextToken: Char; begin SkipWhiteSpace; if fEofReached then HandleEof else case fBuf[fPos] of '_','A'..'Z','a'..'z': HandleAlphaNum; '$' : HandleHexNumber; '-' : HandleMinus; '0'..'9' : HandleNumber; '''','#' : HandleString; else HandleUnknown; end; Result:=fToken; end;
改为:
'_','A'..'Z','a'..'z',#$80..#$FF : HandleAlphaNum;
打开fpcsrc\packages\fcl-res\src\dfmreader.pp
修改第75行:
function TDfmResourceReader.IsAlpha: boolean; begin Result:=pchar(fLine)[fLinePos] in ['_','A'..'Z','a'..'z']; end;
改为:
Result:=pchar(fLine)[fLinePos] in ['_','A'..'Z','a'..'z',#$80..#$FF];
第二步:并重新编译fpcsrc源码
我用FPCUPdeluxe重新编译FPC,重新编译前需要在setup+设置FPC/Laz rebuild only打勾,然后点击Only FPC编译就可以。
第三步:修改lazarus源码
1、修改lazarus\components\codetools\keywordfunclists.pas
将第851、852行:
procedure InternalInit; var c: char; w: word; begin for c:=Low(char) to High(char) do begin case c of 'a'..'z':CharToIHash[c]:=ord(c)-ord('a')+1; 'A'..'Z':CharToIHash[c]:=ord(c)-ord('A')+1; else CharToIHash[c]:=ord(c); end; UpChars[c]:=upcase(c); IsLineEndChar[c]:=c in [#10,#13]; IsSpaceChar[c]:=c in [#0..#32]; IsIdentStartChar[c]:=c in ['a'..'z','A'..'Z','_']; IsIdentChar[c]:=c in ['a'..'z','A'..'Z','_','0'..'9']; IsDottedIdentChar[c]:=c in ['.','a'..'z','A'..'Z','_','0'..'9']; IsNumberChar[c]:=c in ['0'..'9']; IsNumberOrSepChar[c]:=c in ['0'..'9','_']; IsCommentStartChar[c]:=c in ['/','{','(']; IsCommentEndChar[c]:=c in ['}',')',#13,#10]; IsHexNumberChar[c]:=c in ['0'..'9','a'..'f','A'..'F']; IsOctNumberChar[c]:=c in ['0'..'7']; IsEqualOperatorStartChar[c]:=c in [':','+','-','/','*','<','>']; IsWordChar[c]:=c in ['a'..'z','A'..'Z']; IsNonWordChar[c]:=(c in [#0..#127]) and (not IsIdentChar[c]); IsAfterFloatPointChar[c]:=c in ['0'..'9','e','E']; end;
改为:
procedure InternalInit; var c: char; w: word; begin for c:=Low(char) to High(char) do begin case c of 'a'..'z':CharToIHash[c]:=ord(c)-ord('a')+1; 'A'..'Z':CharToIHash[c]:=ord(c)-ord('A')+1; else CharToIHash[c]:=ord(c); end; UpChars[c]:=upcase(c); IsLineEndChar[c]:=c in [#10,#13]; IsSpaceChar[c]:=c in [#0..#32]; IsIdentStartChar[c]:=c in ['a'..'z','A'..'Z','_',#$80..#$FF];//2022.02.19 LBZ IsIdentChar[c]:=c in ['a'..'z','A'..'Z','_','0'..'9',#$80..#$FF];//2023.02.19 LBZ IsDottedIdentChar[c]:=c in ['.','a'..'z','A'..'Z','_','0'..'9']; IsNumberChar[c]:=c in ['0'..'9']; IsCommentStartChar[c]:=c in ['/','{','(']; IsCommentEndChar[c]:=c in ['}',')',#13,#10]; IsHexNumberChar[c]:=c in ['0'..'9','a'..'f','A'..'F']; IsOctNumberChar[c]:=c in ['0'..'7']; IsEqualOperatorStartChar[c]:=c in [':','+','-','/','*','<','>']; IsWordChar[c]:=c in ['a'..'z','A'..'Z']; IsNonWordChar[c]:=(c in [#0..#127]) and (not IsIdentChar[c]); IsAfterFloatPointChar[c]:=c in ['0'..'9','e','E']; end;
2、修改lazarus\components\codetools\CustomCodeTool.pas
将1167行:
CurPos.StartPos:=p-PChar(Src)+1; CurPos.EndPos:=CurPos.StartPos; // read atom c1:=p^; case c1 of #0: ; '_','A'..'Z','a'..'z': begin inc(p); while IsIdentChar[p^] do inc(p); CurPos.Flag:=cafWord; CurPos.EndPos:=p-PChar(Src)+1; case c1 of 'e','E': if (CurPos.EndPos-CurPos.StartPos=3)
改为:
'_','A'..'Z','a'..'z',#128..#255://2023.2.19 LBZ
将1341~1365注释
{ #192..#255: begin // read UTF8 character inc(p); if ((ord(c1) and %11100000) = %11000000) then begin // could be 2 byte character if (ord(p[0]) and %11000000) = %10000000 then inc(p); end else if ((ord(c1) and %11110000) = %11100000) then begin // could be 3 byte character if ((ord(p[0]) and %11000000) = %10000000) and ((ord(p[1]) and %11000000) = %10000000) then begin inc(p,2); end; end else if ((ord(c1) and %11111000) = %11110000) then begin // could be 4 byte character if ((ord(p[0]) and %11000000) = %10000000) and ((ord(p[1]) and %11000000) = %10000000) and ((ord(p[2]) and %11000000) = %10000000) then inc(p,3); end; CurPos.EndPos:=p-PChar(Src)+1; end; } else
第1621行:
LastAtoms.AddReverse(CurPos); exit; end; c2:=Src[CurPos.StartPos]; case c2 of 'A'..'Z','a'..'z': begin // identifier or keyword or hexnumber while (CurPos.StartPos>1) do begin if (IsIdentChar[Src[CurPos.StartPos-1]]) then dec(CurPos.StartPos) else begin case UpChars[Src[CurPos.StartPos-1]] of '@': if (CurPos.StartPos>2) and (Src[CurPos.StartPos-2]='@') then
改为:
'A'..'Z','a'..'z',#$80..#$FF:
3、修改lazarus\lcl\lresources.pp
第720行:
procedure TReaderUniqueNamer.OnSetName(Reader: TReader; Component: TComponent; var Name: string); procedure MakeValidIdentifier; var i: Integer; begin for i:=length(Name) downto 1 do if not (Name[i] in ['0'..'9','_','a'..'z','A'..'Z']) then System.Delete(Name,i,1); if (Name<>'') and (Name[1] in ['0'..'9']) then Name:='_'+Name; end;
改为:
if not (Name[i] in ['0'..'9','_','a'..'z','A'..'Z',#$80..#$FF]) then
第1344行:
function FindLFMClassName(LFMStream:TStream):ansistring; { examples: object Form1: TForm1 inherited AboutBox2: TAboutBox2 -> the classname is the last word of the first line } var c:char; StartPos, EndPos: Int64; begin Result:=''; StartPos:=-1; c:=' '; // read till end of line repeat // remember last non identifier char position if (not (c in ['a'..'z','A'..'Z','0'..'9','_'])) then StartPos:=LFMStream.Position; if LFMStream.Read(c,1)<>1 then exit; if LFMStream.Position>1000 then exit; until c in [#10,#13];
改为:
if (not (c in ['a'..'z','A'..'Z','0'..'9','_',#$80..#$FF])) then
第2039行:
procedure ReadLFMHeader(LFMStream: TStream; out LFMType, LFMComponentName, LFMClassName: String); var c:char; Token: String; begin { examples: object Form1: TForm1 inherited AboutBox2: TAboutBox2 } LFMComponentName:=''; LFMClassName := ''; LFMType := ''; Token := ''; while (LFMStream.Read(c,1)=1) and (LFMStream.Position<1000) do begin if c in ['a'..'z','A'..'Z','0'..'9','_'] then Token := Token + c else begin if Token<>'' then begin if LFMType = '' then LFMType := Token
改为:
if c in ['a'..'z','A'..'Z','0'..'9','_',#$80..#$FF] then
第2083行:
procedure ReadLFMHeader(const LFMSource: string; out LFMType, LFMComponentName, LFMClassName: String); var p: Integer; StartPos: LongInt; begin { examples: object Form1: TForm1 inherited AboutBox2: TAboutBox2 - LFMType is the first word on the line - LFMComponentName is the second word - LFMClassName is the fourth token } // read first word => LFMType p:=1; while (p<=length(LFMSource)) and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_']) do inc(p); LFMType:=copy(LFMSource,1,p-1); // read second word => LFMComponentName while (p<=length(LFMSource)) and (LFMSource[p] in [' ',#9]) do inc(p); StartPos:=p; while (p<=length(LFMSource)) and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_']) do inc(p); LFMComponentName:=copy(LFMSource,StartPos,p-StartPos); // read third word => LFMClassName while (p<=length(LFMSource)) and (LFMSource[p] in [' ',#9,':']) do inc(p); StartPos:=p; while (p<=length(LFMSource)) and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_']) do inc(p); LFMClassName:=copy(LFMSource,StartPos,p-StartPos); end;
第2083行:
and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_',#$80..#$FF]) do
第2091行:
and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_',#$80..#$FF]) do
第2099行:
and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_',#$80..#$FF]) do
第5465行:
function TUTF8Parser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE} begin Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z']; end;
改为:
Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z',#$80..#$FF];
第5777行:
function TUTF8Parser.NextToken: Char; begin SkipWhiteSpace; if fEofReached then HandleEof else case fBuf[fPos] of '_','A'..'Z','a'..'z': HandleAlphaNum; '$' : HandleHexNumber; '-' : HandleMinus; '0'..'9' : HandleNumber; '''','#' : HandleString else HandleUnknown; end; Result:=fToken; end;
改为:
'_','A'..'Z','a'..'z',#$80..#$FF : HandleAlphaNum;
4、修改lazarus\components\lazutils\lazstringutils.pas
第1420行:
function LazIsValidIdent(const Ident: string; AllowDots: Boolean = False; StrictDots: Boolean = False): Boolean; // This is a copy of IsValidIdent from FPC 3.1. // ToDo: Switch to using IsValidIdent from FPC 3.2 when it is the minimum requirement. const Alpha = ['A'..'Z', 'a'..'z', '_']; AlphaNum = Alpha + ['0'..'9']; var First: Boolean; I, Len: Integer; begin Len := Length(Ident);
改为:
Alpha = ['A'..'Z', 'a'..'z', '_',#$80..#$FF];
5、lazarus\components\ideintf\propedits.pp
第4927行:
function TrimNonAscii(const Txt: String): String; // ToDo: Find a similar function from FPC libs and use it instead. var I: Integer; begin Result := Txt; for I := Length(Result) downto 1 do if not ( (Result[I] in ['a'..'z', 'A'..'Z', '_']) or (I > 1) and (Result[I] in ['0'..'9']) ) then Delete(Result, I, 1); end;
改为:
(Result[I] in ['a'..'z', 'A'..'Z', '_',#$80..#$FF]) or
最后,处理中文显示红色的问题
打开lazarus\components\synedit\synhighlighterpas.pp
将procedure MakeIdentTable的(第711行)
'_', '0'..'9', 'a'..'z', 'A'..'Z': Identifiers[I] := True;
改为:
'_', '0'..'9', 'a'..'z', 'A'..'Z',#$80..#$FF: Identifiers[I] := True;
procedure MakeIdentTable; var I, J: Char; begin for I := #0 to #255 do begin case I of '_', '0'..'9', 'a'..'z', 'A'..'Z',#$80..#$FF: Identifiers[I] := True;else Identifiers[I] := False; end; J := UpCase(I); case I of 'a'..'z', 'A'..'Z': mHashTable[I] := Ord(J) - 64; '_': mHashTable[I] := 27; // after Z '0'..'9': mHashTable[I] := Ord(J) - 48 + 28; // after "_" else mHashTable[Char(I)] := 0; end; IsIntegerChar[I]:=(I in ['0'..'9', 'A'..'F', 'a'..'f']); IsNumberChar[I]:=(I in ['0'..'9']); IsSpaceChar[I]:=(I in [#1..#9, #11, #12, #14..#32]); IsUnderScoreOrNumberChar[I]:=(I in ['_','0'..'9']); IsLetterChar[I]:=(I in ['a'..'z','A'..'Z']); end; end;
将procedure TSynPasSyn.MakeMethodTables(第2446行);
'A'..'Z', 'a'..'z', '_':
改为:
'A'..'Z', 'a'..'z', '_',#$80..#$FF:
procedure TSynPasSyn.MakeMethodTables; var I: Char; begin for I := #0 to #255 do case I of #0: fProcTable[I] := @NullProc; #10: fProcTable[I] := @LFProc; #13: fProcTable[I] := @CRProc; #1..#9, #11, #12, #14..#32: fProcTable[I] := @SpaceProc; '#': fProcTable[I] := @AsciiCharProc; '$': fProcTable[I] := @HexProc; '%': fProcTable[I] := @BinaryProc; '&': fProcTable[I] := @OctalProc; #39: fProcTable[I] := @StringProc; '0'..'9': fProcTable[I] := @NumberProc; 'A'..'Z', 'a'..'z', '_',#$80..#$FF: fProcTable[I] := @IdentProc; '^': fProcTable[I] := @CaretProc; '{': fProcTable[I] := @BraceOpenProc; '}', '!', '"', '('..'/', ':'..'@', '[', ']', '\', '`', '~': begin case I of '(': fProcTable[I] := @RoundOpenProc; ')': fProcTable[I] := @RoundCloseProc; '[': fProcTable[I] := @SquareOpenProc; ']': fProcTable[I] := @SquareCloseProc; '=': fProcTable[I] := @EqualSignProc; '.': fProcTable[I] := @PointProc; ';': fProcTable[I] := @SemicolonProc; //mh 2000-10-08 '/': fProcTable[I] := @SlashProc; ':': fProcTable[I] := @ColonProc; '>': fProcTable[I] := @GreaterProc; '<': fProcTable[I] := @LowerProc; '@': fProcTable[I] := @AddressOpProc; else fProcTable[I] := @SymbolProc; end; end; else fProcTable[I] := @UnknownProc; end; end;
第四步:重新编译lazarus
最后重新编译lazarus就可以支持中文变量/过程/函数。
第五:中文使用效果
2023.06.01已修正中文变量、过程和函数显示为红色的问题:
program project1; {$mode objfpc}{$H+} uses Classes, SysUtils; var 中文变量测试: string; カンスウ: string; begin 中文变量测试:='中文变量测试:Chinese Test!'; カンスウ:='カンスウ Test!'; writeln(utf8toansi(中文变量测试)); writeln(utf8toansi(カンスウ)); readln; end.