判断一个文本的类型是Unicod、Ansi、Utf-8

function  TForm22.SearchFileType(const FileName: string): string;   
var
  ms: TFileStream; {声明一个文件流}
  mestr: String;
begin
  ms := TFileStream.Create(FileName, fmOpenRead or fmShareDenyNone);
  if ms.Size <> 0 then
  begin
    setLength(mestr, 2);//申请字符指针内存
    ms.Read(mestr[1], 1); {取第一个字符}
    ms.Read(mestr[2], 1);  {取第二个字符}
    mestr := IntToHex(Byte(mestr[1]), 1) + IntToHex(Byte(mestr[2]), 1); //IntToHex()将整形转为16进制
    if mestr = 'FFFE' then
      showMessage('unicode')
    else if mestr = 'EFBB' then
      showMessage('utf-8')
    else if mestr = 'FEFF' then
      showMessage('unicode big endian')
    else
      showMessage('ansi');
  end;
  ms.Free;
  result := '111';
end;

 

第二种方法

type
  TTextFormat = (tfAnsi, tfUnicode, tfUnicodeBigEndian, tfUtf8); {定义枚举}

const
  TextFormatFlag: array [tfAnsi .. tfUtf8] of word = ($0000, $FFFE, $FEFF,
    $EFBB);            {用数组来定义类型}

function TForm22.GetTextType(const FileName: string): TTextFormat;
var
  w: word;
begin
  with TFileStream.Create(FileName, fmOpenRead or fmShareDenyNone) do
    try
      Read(w, 2);
      asm    // 因为是以Word数据类型读取,故高低字节互换 ,----看不懂
        PUSH EAX
        MOVZX EAX,  w
        XCHG AL,AH
        MOV w, AX
        POP EAX
      end;
      if w = TextFormatFlag[tfUnicode] then
        Result := TTextFormat.tfUnicode
      else if w = TextFormatFlag[tfUnicodeBigEndian] then
        Result := TTextFormat.tfUnicodeBigEndian
      else if w = TextFormatFlag[tfUtf8] then
        Result := TTextFormat.tfUtf8
      else
        Result := TTextFormat.tfAnsi;
    finally
      Free;
    end;
end;

第三种方法

LoadFromStream(getStream, AEncoding);  //自带的方法

function TForm22.GetTextEncoding(const FileName: String): TEncoding;

  function ContainsPreamble(const Buffer, Signature: TBytes): Boolean;
  var
    I: Integer;
  begin
    Result := True;
    if Length(Buffer) >= Length(Signature) then
    begin
      for I := 1 to Length(Signature) do
        if Buffer[I - 1] <> Signature [I - 1] then
        begin
          Result := False;
          Break;
        end;
    end
    else
      Result := False;
  end;

var
  ms: TFileStream;
  Size: Integer;
  Buffer: TBytes;
begin
  ms := TFileStream.Create(fileName, fmOpenRead);
  Size := ms.Size;
  SetLength(Buffer, Size);
  ms.Read(Buffer[0], Size);
  ms.Free;
  if ContainsPreamble(Buffer, TEncoding.Unicode.GetPreamble) then
    Result := TEncoding.Unicode
  else
    if ContainsPreamble(Buffer, TEncoding.BigEndianUnicode.GetPreamble) then
      Result := TEncoding.BigEndianUnicode
    else
      if ContainsPreamble(Buffer, TEncoding.UTF8.GetPreamble) then
        Result := TEncoding.UTF8
      else
        Result := TEncoding.Default;
end;

posted @ 2012-10-17 10:20  邹晟  阅读(450)  评论(0编辑  收藏  举报