秋·风

  博客园 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 订阅 :: 管理 ::

       原版的fpc/lazarus不支持中文变量、过程和函数,经debug发现,只需修改12个文件(fpcsrc7个文件:scanner.pas、sysstr.inc、parser.inc、options.pas、objcutil.pas、assemble.pas和dfmreader.pp,lazarus5个文件:keywordfunclists.pas、lresources.pp、lazstringutils.pas、propedits.pp和CustomCodeTool.pas,synhighlighterpas.pp)28处(原理很简单,将字符范围扩大)就能实现fpc/lazarus支持中文变量/过程/函数/中文控件名称,按以下步骤修改就可以,不过修改后的中文变量等会用红色显示
       让fpc/lazarus支持中文变量/过程/函数/中文控件名称给需要的人多个选择,不喜勿喷!
2023.2.21增加支持中文控件名称
2023.6.01 终于解决了中文显示红色的问题

测试环境:
FPC:3.2.2
lazarus:Lazarus 2.2.2、2.2.4和2.2.6
第一步:修改FPC源码:
修改fpcsrc/compiler/scanner.pas以下4个位置。 

1、第4054行:

    procedure tscannerfile.readstring;
      var
        i : longint;
        err : boolean;
      begin
        err:=false;
        i:=0;
        repeat
          case c of
            '_',
            '0'..'9',
            'A'..'Z' :
              begin
                if i<255 then
                 begin
将'A'..'Z':修改为:'A'..'Z',#$80..#255 :
          case c of
            '_',
            '0'..'9',
            'A'..'Z',#$80..#255 ://2022.11.26 LBZ
              begin
                if i<255 then

2、第4729行:

      { Save current token position, for EOF its already loaded }
        if c<>#26 then
          gettokenpos;

      { Check first for a identifier/keyword, this is 20+% faster (PFV) }
        if c in ['A'..'Z','a'..'z','_'] then
         begin
           readstring;
           token:=_ID;
           idtoken:=_ID;
         { keyword or any other known token,
           pattern is always uppercased }
           if (pattern[1]<>'_') and (length(pattern) in [tokenlenmin..tokenlenmax]) then
            begin
              low:=ord(tokenidx^[length(pattern),pattern[1]].first);
              high:=ord(tokenidx^[length(pattern),pattern[1]].last);
              while low<high do
               begin

改为:

        if (c in ['A'..'Z','a'..'z','_',#$80..#255]) then //2022.11.26 LBZ
         begin
           readstring;
           token:=_ID;
           idtoken:=_ID;

3、第4822行:

             '&' :
               begin
                 if [m_fpc,m_delphi] * current_settings.modeswitches <> [] then
                  begin
                    readnumber;
                    if length(pattern)=1 then
                      begin
                        { does really an identifier follow? }
                        if not (c in ['_','A'..'Z','a'..'z') then 
                          message2(scan_f_syn_expected,tokeninfo^[_ID].str,c);
                        readstring;
                        token:=_ID;
                        idtoken:=_ID;
                      end
                    else
                      token:=_INTCONST;
                    goto exit_label;
                  end
                 else if m_mac in current_settings.modeswitches then
                  begin
                    readchar;
                    token:=_AMPERSAND;
                    goto exit_label;
                  end
                 else
                  Illegal_Char(c);
               end;

改为:

 if not ((c in ['_','A'..'Z','a'..'z',#$80..#255])) then    //2022.11.26 LBZ
    message2(scan_f_syn_expected,tokeninfo^[_ID].str,c);

4、第5456行:

    function tscannerfile.readpreproc:ttoken;
      var
        low,high,mid: longint;
        optoken: ttoken;
      begin
         skipspace;
         case c of
           '_',
           'A'..'Z',
           'a'..'z':
             begin
               readstring;
               optoken:=_ID;
               if (pattern[1]<>'_') and (length(pattern) in [tokenlenmin..tokenlenmax]) then
                begin
                  low:=ord(tokenidx^[length(pattern),pattern[1]].first);
                  high:=ord(tokenidx^[length(pattern),pattern[1]].last);
                  while low<high do

改为:

         case c of
           '_',
           'A'..'Z',
           'a'..'z' , #$80..#FF://2022.11.26 LBZ
             begin
               readstring;
               optoken:=_ID;

打开\fpcsrc\compiler\assemble.pas

修改第1321行:

              '.','_',
              'A'..'Z',
              'a'..'z':
                begin
                  pstart:=p;
                  while not(p^ in [#0,' ','-','+']) do
                    inc(p);
                  len:=p-pstart;
                  if len>255 then
                    internalerror(200509187);
                  hs[0]:=chr(len);
                  move(pstart^,hs[1],len);
                  sym:=objdata.symbolref(hs);
                  { Second symbol? }

改为:

              'a'..'z',#$80..#$FF :

打开fpcsrc\compiler\objcutil.pas

      { no special characters other than ':'
      }
      for i:=0 to len-1 do
        if (value_str[i] = ':') then
          gotcolon:=true
        else if not(value_str[i] in ['_','A'..'Z','a'..'z','0'..'9',':']) then
          exit;

      { if there is at least one colon, the final character must
        also be a colon (in case it's only one character that is
        a colon, this was already checked before the above loop)
      }
      if gotcolon and

修改第96行:

        else if not(value_str[i] in ['_','A'..'Z','a'..'z','0'..'9',':',#$80..#$FF]) then

打开fpcsrc\compiler\options.pas

function is_identifier(const s: TCmdStr): boolean;
var
  i: longint;
begin
  result:=false;
  if (s='') or not (s[1] in ['A'..'Z','a'..'z','_']) then
    exit;
  for i:=2 to length(s) do
    if not (s[I] in ['A'..'Z','a'..'z','0'..'9','_']) then
      exit;
  result:=true;
end;

修改第181行:

  if (s='') or not (s[1] in ['A'..'Z','a'..'z','_',#$80..#$FF]) then

修改第184行:

    if not (s[I] in ['A'..'Z','a'..'z','0'..'9','_',#$80..#$FF]) then

修改第2714行:

  function GetName(var fn:TPathStr):TPathStr;
  var
    i : longint;
  begin
    i:=0;
    while (i<length(fn)) and (fn[i+1] in ['a'..'z','A'..'Z','0'..'9','_','-']) do
     inc(i);
    GetName:=Copy(fn,1,i);
    Delete(fn,1,i);
  end;

改为:

    while (i<length(fn)) and (fn[i+1] in ['a'..'z','A'..'Z','0'..'9','_','-',#$80..#$FF]) do

打开fpcsrc\rtl\objpas\sysutils\sysstr.inc
修改第815行

将:

function IsValidIdent(const Ident: string; AllowDots: Boolean = False; StrictDots: Boolean = False): Boolean;
const
  Alpha = ['A'..'Z', 'a'..'z', '_'];
  AlphaNum = Alpha + ['0'..'9'];
  Dot = '.';
var
  First: Boolean;
  I, Len: Integer;
begin
  Len := Length(Ident);

改为:

Alpha = ['A'..'Z', 'a'..'z', '_',#$80..#$FF];

打开fpcsrc\rtl\objpas\classes\parser.inc

修改第82行:

function TParser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
begin
  Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
end;

改为:

  Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z',#$80..#$FF];

修改第444行:

function TParser.NextToken: Char;

begin
  SkipWhiteSpace;
  if fEofReached then
    HandleEof
  else
    case fBuf[fPos] of
      '_','A'..'Z','a'..'z': HandleAlphaNum;
      '$'                   : HandleHexNumber;
      '-'                   : HandleMinus;
      '0'..'9'              : HandleNumber;
      '''','#'              : HandleString;
      else
        HandleUnknown;
    end;
  Result:=fToken;
end;

改为:

      '_','A'..'Z','a'..'z',#$80..#$FF : HandleAlphaNum;

打开fpcsrc\packages\fcl-res\src\dfmreader.pp
修改第75行:

function TDfmResourceReader.IsAlpha: boolean;
begin
  Result:=pchar(fLine)[fLinePos] in ['_','A'..'Z','a'..'z'];
end;

 改为:

  Result:=pchar(fLine)[fLinePos] in ['_','A'..'Z','a'..'z',#$80..#$FF];

第二步:并重新编译fpcsrc源码

我用FPCUPdeluxe重新编译FPC,重新编译前需要在setup+设置FPC/Laz rebuild only打勾,然后点击Only FPC编译就可以。

第三步:修改lazarus源码
1、修改lazarus\components\codetools\keywordfunclists.pas
将第851、852行:

procedure InternalInit;
var
  c: char;
  w: word;
begin
  for c:=Low(char) to High(char) do begin
    case c of
    'a'..'z':CharToIHash[c]:=ord(c)-ord('a')+1;
    'A'..'Z':CharToIHash[c]:=ord(c)-ord('A')+1;
    else CharToIHash[c]:=ord(c);
    end;
    UpChars[c]:=upcase(c);
    IsLineEndChar[c]:=c in [#10,#13];
    IsSpaceChar[c]:=c in [#0..#32];
    IsIdentStartChar[c]:=c in ['a'..'z','A'..'Z','_'];
    IsIdentChar[c]:=c in ['a'..'z','A'..'Z','_','0'..'9'];
    IsDottedIdentChar[c]:=c in ['.','a'..'z','A'..'Z','_','0'..'9'];
    IsNumberChar[c]:=c in ['0'..'9'];
    IsNumberOrSepChar[c]:=c in ['0'..'9','_'];
    IsCommentStartChar[c]:=c in ['/','{','('];
    IsCommentEndChar[c]:=c in ['}',')',#13,#10];
    IsHexNumberChar[c]:=c in ['0'..'9','a'..'f','A'..'F'];
    IsOctNumberChar[c]:=c in ['0'..'7'];
    IsEqualOperatorStartChar[c]:=c in [':','+','-','/','*','<','>'];
    IsWordChar[c]:=c in ['a'..'z','A'..'Z'];
    IsNonWordChar[c]:=(c in [#0..#127]) and (not IsIdentChar[c]);
    IsAfterFloatPointChar[c]:=c in ['0'..'9','e','E'];
  end;

改为:

procedure InternalInit;
var
  c: char;
  w: word;
begin
  for c:=Low(char) to High(char) do begin
    case c of
    'a'..'z':CharToIHash[c]:=ord(c)-ord('a')+1;
    'A'..'Z':CharToIHash[c]:=ord(c)-ord('A')+1;
    else CharToIHash[c]:=ord(c);
    end;
    UpChars[c]:=upcase(c);
    IsLineEndChar[c]:=c in [#10,#13];
    IsSpaceChar[c]:=c in [#0..#32];
    IsIdentStartChar[c]:=c in ['a'..'z','A'..'Z','_',#$80..#$FF];//2022.02.19 LBZ
    IsIdentChar[c]:=c in ['a'..'z','A'..'Z','_','0'..'9',#$80..#$FF];//2023.02.19 LBZ
    IsDottedIdentChar[c]:=c in ['.','a'..'z','A'..'Z','_','0'..'9'];
    IsNumberChar[c]:=c in ['0'..'9'];
    IsCommentStartChar[c]:=c in ['/','{','('];
    IsCommentEndChar[c]:=c in ['}',')',#13,#10];
    IsHexNumberChar[c]:=c in ['0'..'9','a'..'f','A'..'F'];
    IsOctNumberChar[c]:=c in ['0'..'7'];
    IsEqualOperatorStartChar[c]:=c in [':','+','-','/','*','<','>'];
    IsWordChar[c]:=c in ['a'..'z','A'..'Z'];
    IsNonWordChar[c]:=(c in [#0..#127]) and (not IsIdentChar[c]);
    IsAfterFloatPointChar[c]:=c in ['0'..'9','e','E'];
  end;

2、修改lazarus\components\codetools\CustomCodeTool.pas
将1167行:

  CurPos.StartPos:=p-PChar(Src)+1;
  CurPos.EndPos:=CurPos.StartPos;
  // read atom
  c1:=p^;
  case c1 of
  #0: ;
  '_','A'..'Z','a'..'z':
    begin
      inc(p);
      while IsIdentChar[p^] do
        inc(p);
      CurPos.Flag:=cafWord;
      CurPos.EndPos:=p-PChar(Src)+1;
      case c1 of
      'e','E':
        if (CurPos.EndPos-CurPos.StartPos=3)

改为:

'_','A'..'Z','a'..'z',#128..#255://2023.2.19 LBZ

将1341~1365注释

{
  #192..#255:
    begin
      // read UTF8 character
      inc(p);
      if ((ord(c1) and %11100000) = %11000000) then begin
        // could be 2 byte character
        if (ord(p[0]) and %11000000) = %10000000 then
          inc(p);
      end
      else if ((ord(c1) and %11110000) = %11100000) then begin
        // could be 3 byte character
        if ((ord(p[0]) and %11000000) = %10000000)
        and ((ord(p[1]) and %11000000) = %10000000) then begin
          inc(p,2);
        end;
      end
      else if ((ord(c1) and %11111000) = %11110000) then begin
        // could be 4 byte character
        if ((ord(p[0]) and %11000000) = %10000000)
        and ((ord(p[1]) and %11000000) = %10000000)
        and ((ord(p[2]) and %11000000) = %10000000) then
          inc(p,3);
      end;
      CurPos.EndPos:=p-PChar(Src)+1;
    end;
}
    else

第1621行:

    LastAtoms.AddReverse(CurPos);
    exit;
  end;
  c2:=Src[CurPos.StartPos];
  case c2 of
    'A'..'Z','a'..'z':
      begin
        // identifier or keyword or hexnumber
        while (CurPos.StartPos>1) do begin
          if (IsIdentChar[Src[CurPos.StartPos-1]]) then
            dec(CurPos.StartPos)
          else begin
            case UpChars[Src[CurPos.StartPos-1]] of
            '@':
              if (CurPos.StartPos>2)
              and (Src[CurPos.StartPos-2]='@') then

 改为:

    'A'..'Z','a'..'z',#$80..#$FF:

3、修改lazarus\lcl\lresources.pp

第720行:

procedure TReaderUniqueNamer.OnSetName(Reader: TReader; Component: TComponent;
  var Name: string);
  
  procedure MakeValidIdentifier;
  var
    i: Integer;
  begin
    for i:=length(Name) downto 1 do
      if not (Name[i] in ['0'..'9','_','a'..'z','A'..'Z']) then
        System.Delete(Name,i,1);
    if (Name<>'') and (Name[1] in ['0'..'9']) then
      Name:='_'+Name;
  end;

 改为:

      if not (Name[i] in ['0'..'9','_','a'..'z','A'..'Z',#$80..#$FF]) then

 第1344行:

function FindLFMClassName(LFMStream:TStream):ansistring;
{ examples:
  object Form1: TForm1
  inherited AboutBox2: TAboutBox2

  -> the classname is the last word of the first line
}
var c:char;
  StartPos, EndPos: Int64;
begin
  Result:='';
  StartPos:=-1;
  c:=' ';
  // read till end of line
  repeat
    // remember last non identifier char position
    if (not (c in ['a'..'z','A'..'Z','0'..'9','_'])) then
      StartPos:=LFMStream.Position;
    if LFMStream.Read(c,1)<>1 then exit;
    if LFMStream.Position>1000 then exit;
  until c in [#10,#13];

改为:

    if (not (c in ['a'..'z','A'..'Z','0'..'9','_',#$80..#$FF])) then

第2039行:

procedure ReadLFMHeader(LFMStream: TStream;
  out LFMType, LFMComponentName, LFMClassName: String);
var
  c:char;
  Token: String;
begin
  { examples:
    object Form1: TForm1
    inherited AboutBox2: TAboutBox2
  }
  LFMComponentName:='';
  LFMClassName := '';
  LFMType := '';
  Token := '';
  while (LFMStream.Read(c,1)=1) and (LFMStream.Position<1000) do begin
    if c in ['a'..'z','A'..'Z','0'..'9','_'] then
      Token := Token + c
    else begin
      if Token<>'' then begin
        if LFMType = '' then
          LFMType := Token

改为:

    if c in ['a'..'z','A'..'Z','0'..'9','_',#$80..#$FF] then

第2083行:

procedure ReadLFMHeader(const LFMSource: string; out LFMType, LFMComponentName,
  LFMClassName: String);
var
  p: Integer;
  StartPos: LongInt;
begin
  { examples:
    object Form1: TForm1
    inherited AboutBox2: TAboutBox2

    - LFMType is the first word on the line
    - LFMComponentName is the second word
    - LFMClassName is the fourth token
  }

  // read first word => LFMType
  p:=1;
  while (p<=length(LFMSource))
  and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_']) do
    inc(p);
  LFMType:=copy(LFMSource,1,p-1);

  // read second word => LFMComponentName
  while (p<=length(LFMSource)) and (LFMSource[p] in [' ',#9]) do inc(p);
  StartPos:=p;
  while (p<=length(LFMSource))
  and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_']) do
    inc(p);
  LFMComponentName:=copy(LFMSource,StartPos,p-StartPos);

  // read third word => LFMClassName
  while (p<=length(LFMSource)) and (LFMSource[p] in [' ',#9,':']) do inc(p);
  StartPos:=p;
  while (p<=length(LFMSource))
  and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_']) do
    inc(p);
  LFMClassName:=copy(LFMSource,StartPos,p-StartPos);
end;

第2083行:

  and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_',#$80..#$FF]) do

第2091行:

  and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_',#$80..#$FF]) do

第2099行:

  and (LFMSource[p] in ['a'..'z','A'..'Z','0'..'9','_',#$80..#$FF]) do

第5465行:

function TUTF8Parser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
begin
  Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
end; 

改为:

  Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z',#$80..#$FF];

第5777行:

function TUTF8Parser.NextToken: Char;

begin
  SkipWhiteSpace;
  if fEofReached then
    HandleEof
  else
    case fBuf[fPos] of
      '_','A'..'Z','a'..'z': HandleAlphaNum;
      '$'                   : HandleHexNumber;
      '-'                   : HandleMinus;
      '0'..'9'              : HandleNumber;
      '''','#'              : HandleString
      else
        HandleUnknown;
    end;
  Result:=fToken;
end;

改为:

      '_','A'..'Z','a'..'z',#$80..#$FF : HandleAlphaNum;

4、修改lazarus\components\lazutils\lazstringutils.pas
第1420行:

function LazIsValidIdent(const Ident: string; AllowDots: Boolean = False;
                         StrictDots: Boolean = False): Boolean;
// This is a copy of IsValidIdent from FPC 3.1.
// ToDo: Switch to using IsValidIdent from FPC 3.2 when it is the minimum requirement.
const
  Alpha = ['A'..'Z', 'a'..'z', '_'];
  AlphaNum = Alpha + ['0'..'9'];
var
  First: Boolean;
  I, Len: Integer;
begin
  Len := Length(Ident);

 

改为:

  Alpha = ['A'..'Z', 'a'..'z', '_',#$80..#$FF];

 5、lazarus\components\ideintf\propedits.pp
第4927行:

function TrimNonAscii(const Txt: String): String;
// ToDo: Find a similar function from FPC libs and use it instead.
var
  I: Integer;
begin
  Result := Txt;
  for I := Length(Result) downto 1 do
    if not (
            (Result[I] in ['a'..'z', 'A'..'Z', '_']) or
            (I > 1) and (Result[I] in ['0'..'9'])
           )
    then
      Delete(Result, I, 1);
end;

改为:

            (Result[I] in ['a'..'z', 'A'..'Z', '_',#$80..#$FF]) or

最后,处理中文显示红色的问题
打开lazarus\components\synedit\synhighlighterpas.pp
procedure MakeIdentTable的(第711行)

      '_', '0'..'9', 'a'..'z', 'A'..'Z': Identifiers[I] := True;

改为:

      '_', '0'..'9', 'a'..'z', 'A'..'Z',#$80..#$FF: Identifiers[I] := True;
procedure MakeIdentTable;
var
  I, J: Char;
begin
  for I := #0 to #255 do
  begin
    case I of
      '_', '0'..'9', 'a'..'z', 'A'..'Z',#$80..#$FF: Identifiers[I] := True;else Identifiers[I] := False;
    end;
    J := UpCase(I);
    case I of
      'a'..'z', 'A'..'Z': mHashTable[I] := Ord(J) - 64;
      '_': mHashTable[I] := 27; // after Z
      '0'..'9': mHashTable[I] := Ord(J) - 48 + 28; // after "_"
    else
      mHashTable[Char(I)] := 0;
    end;
    IsIntegerChar[I]:=(I in ['0'..'9', 'A'..'F', 'a'..'f']);
    IsNumberChar[I]:=(I in ['0'..'9']);
    IsSpaceChar[I]:=(I in [#1..#9, #11, #12, #14..#32]);
    IsUnderScoreOrNumberChar[I]:=(I in ['_','0'..'9']);
    IsLetterChar[I]:=(I in ['a'..'z','A'..'Z']);
  end;
end;

 

将procedure TSynPasSyn.MakeMethodTables(第2446行);

      'A'..'Z', 'a'..'z', '_':

改为:

      'A'..'Z', 'a'..'z', '_',#$80..#$FF:
procedure TSynPasSyn.MakeMethodTables;
var
  I: Char;
begin
  for I := #0 to #255 do
    case I of
      #0: fProcTable[I] := @NullProc;
      #10: fProcTable[I] := @LFProc;
      #13: fProcTable[I] := @CRProc;
      #1..#9, #11, #12, #14..#32:
        fProcTable[I] := @SpaceProc;
      '#': fProcTable[I] := @AsciiCharProc;
      '$': fProcTable[I] := @HexProc;
      '%': fProcTable[I] := @BinaryProc;
      '&': fProcTable[I] := @OctalProc;
      #39: fProcTable[I] := @StringProc;
      '0'..'9': fProcTable[I] := @NumberProc;
      'A'..'Z', 'a'..'z', '_',#$80..#$FF:
        fProcTable[I] := @IdentProc;
      '^': fProcTable[I] := @CaretProc;
      '{': fProcTable[I] := @BraceOpenProc;
      '}', '!', '"', '('..'/', ':'..'@', '[', ']', '\', '`', '~':
        begin
          case I of
            '(': fProcTable[I] := @RoundOpenProc;
            ')': fProcTable[I] := @RoundCloseProc;
            '[': fProcTable[I] := @SquareOpenProc;
            ']': fProcTable[I] := @SquareCloseProc;
            '=': fProcTable[I] := @EqualSignProc;
            '.': fProcTable[I] := @PointProc;
            ';': fProcTable[I] := @SemicolonProc;                                //mh 2000-10-08
            '/': fProcTable[I] := @SlashProc;
            ':': fProcTable[I] := @ColonProc;
            '>': fProcTable[I] := @GreaterProc;
            '<': fProcTable[I] := @LowerProc;
            '@': fProcTable[I] := @AddressOpProc;
          else
            fProcTable[I] := @SymbolProc;
          end;
        end;
    else
      fProcTable[I] := @UnknownProc;
    end;
end;

第四步:重新编译lazarus

最后重新编译lazarus就可以支持中文变量/过程/函数。

 第五:中文使用效果

2023.06.01已修正中文变量、过程和函数显示为红色的问题:

 

posted on 2022-12-03 09:43  秋·风  阅读(1281)  评论(4编辑  收藏  举报