delphi XE7 HttpEncode 编码问题
近期在做网址编码相关的工作,发现在用 XE5 编译的时候,一切正常,拿 到 XE7下 就 结果错误了。百度了下,谷歌 了下,有人提出,但是,我没有找到答案,也许都没有碰到这个问题,也许都己经自己默默的解决了,在此 小记一下,方便后人,也方便自己 查寻。
例子 : 原字符 "过年"
httpencode('过年') 结果 :
XE5为 %B9%FD%C4%EA
XE7 调用 相当函数结果 %E8%BF%87%E5%B9%B4
百思不得其解啊,折腾了很长时间,后来终于想到是不是 此函数 官方更新修改了,(没办法,人比较笨)
于是查看源码:
XE5的 web.httpapp 中(分string 和 ansistring, 我用ansistring 版本得到 期望的结果):
1 function HTTPEncode(const AStr: string): string; 2 // The NoConversion set contains characters as specificed in RFC 1738 and 3 // should not be modified unless the standard changes. 4 const 5 NoConversion = [Ord('A')..Ord('Z'), Ord('a')..Ord('z'), Ord('*'), Ord('@'), 6 Ord('.'), Ord('_'), Ord('-'), Ord('0')..Ord('9'), Ord('$'), 7 Ord('!'), Ord(''''), Ord('('), Ord(')')]; 8 var 9 Sp, Rp: PChar; 10 begin 11 SetLength(Result, Length(AStr) * 3); 12 Sp := PChar(AStr); 13 Rp := PChar(Result); 14 while Sp^ <> #0 do 15 begin 16 if Ord(Sp^) in NoConversion then 17 Rp^ := Sp^ 18 else 19 if Sp^ = ' ' then 20 Rp^ := '+' 21 else 22 begin 23 FormatBuf(Rp, 3, string('%%%.2x'), 6, [Ord(Sp^)]); 24 Inc(Rp,2); 25 end; 26 Inc(Rp); 27 Inc(Sp); 28 end; 29 SetLength(Result, Rp - PChar(Result)); 30 end; 31 32 function HTTPDecode(const AStr: string): string; 33 var 34 Sp, Rp, Cp: PChar; 35 S: string; 36 begin 37 SetLength(Result, Length(AStr)); 38 Sp := PChar(AStr); 39 Rp := PChar(Result); 40 Cp := Sp; 41 try 42 while Sp^ <> #0 do 43 begin 44 case Sp^ of 45 '+': Rp^ := ' '; 46 '%': begin 47 // Look for an escaped % (%%) or %<hex> encoded character 48 Inc(Sp); 49 if Sp^ = '%' then 50 Rp^ := '%' 51 else 52 begin 53 Cp := Sp; 54 Inc(Sp); 55 if (Cp^ <> #0) and (Sp^ <> #0) then 56 begin 57 S := Char('$') + Cp^ + Sp^; 58 Rp^ := Char(StrToInt(string(S))); 59 end 60 else 61 raise EWebBrokerException.CreateFmt(sErrorDecodingURLText, [Cp - PChar(AStr)]); 62 end; 63 end; 64 else 65 Rp^ := Sp^; 66 end; 67 Inc(Rp); 68 Inc(Sp); 69 end; 70 except 71 on E:EConvertError do 72 raise EConvertError.CreateFmt(sInvalidURLEncodedChar, 73 [Char('%') + Cp^ + Sp^, Cp - PChar(AStr)]) 74 end; 75 SetLength(Result, Rp - PChar(Result)); 76 end; 77 78 function HTTPEncode(const AStr: AnsiString): AnsiString; 79 // The NoConversion set contains characters as specificed in RFC 1738 and 80 // should not be modified unless the standard changes. 81 const 82 NoConversion = ['A'..'Z','a'..'z','*','@','.','_','-', 83 '0'..'9','$','!','''','(',')']; 84 var 85 Sp, Rp: PAnsiChar; 86 begin 87 SetLength(Result, Length(AStr) * 3); 88 Sp := PAnsiChar(AStr); 89 Rp := PAnsiChar(Result); 90 while Sp^ <> #0 do 91 begin 92 if Sp^ in NoConversion then 93 Rp^ := Sp^ 94 else 95 if Sp^ = ' ' then 96 Rp^ := '+' 97 else 98 begin 99 System.AnsiStrings.FormatBuf(Rp^, 3, AnsiString('%%%.2x'), 6, [Ord(Sp^)]); 100 Inc(Rp,2); 101 end; 102 Inc(Rp); 103 Inc(Sp); 104 end; 105 SetLength(Result, Rp - PAnsiChar(Result)); 106 end;
在XE7中
web.httpapp:
function HTTPEncode(const AStr: string): string; begin Result := TNetEncoding.URL.Encode(AStr); end;
查看 system.netencoding
找到
function TNetEncoding.DoEncode(const Input: array of Byte): TBytes; begin Result := TEncoding.UTF8.GetBytes(DoEncode(TEncoding.UTF8.GetString(@Input[0]))); end;
查看类定义:
TURLEncoding = class(TNetEncoding) protected function DoDecode(const Input: string): string; overload; override; function DoEncode(const Input: string): string; overload; override; end;
查看函数代码:
function TURLEncoding.DoEncode(const Input: string): string; // The NoConversion set contains characters as specificed in RFC 1738 and // should not be modified unless the standard changes. const NoConversion = [Ord('A')..Ord('Z'), Ord('a')..Ord('z'), Ord('*'), Ord('@'), Ord('.'), Ord('_'), Ord('-'), Ord('0')..Ord('9'), Ord('$'), Ord('!'), Ord(''''), Ord('('), Ord(')')]; procedure AppendByte(B: Byte; var Buffer: PChar); const Hex = '0123456789ABCDEF'; begin Buffer[0] := '%'; Buffer[1] := Hex[B shr 4 + Low(string)]; Buffer[2] := Hex[B and $F + Low(string)]; Inc(Buffer, 3); end; var Sp, Rp: PChar; MultibyteChar: TBytes; I, ByteCount: Integer; begin // Characters that require more than 1 byte are translated as "percent-encoded byte" // which will be encoded with 3 chars per byte -> %XX // Example: ?character // Multibyte representation: C391 (2 bytes) // URL encode representation: %C3%91 // // So the worst case is 4 bytes(max) per Char, and 3 characters to represent each byte SetLength(Result, Length(Input) * 4 * 3); Sp := PChar(Input); Rp := PChar(Result); SetLength(MultibyteChar, 4); while Sp^ <> #0 do begin if Ord(Sp^) in NoConversion then begin Rp^ := Sp^; Inc(Rp) end else if Sp^ = ' ' then begin Rp^ := '+'; Inc(Rp) end else begin if (Ord(Sp^) < 128) then // Single byte char AppendByte(Ord(Sp^), Rp) else begin // Multi byte char ByteCount := TEncoding.UTF8.GetBytes([Sp^], 0, 1, MultibyteChar, 0); for I := 0 to ByteCount - 1 do AppendByte(MultibyteChar[I], Rp); end end; Inc(Sp); end; SetLength(Result, Rp - PChar(Result)); end;
似乎有点不同。
目前是我自己建立一个函数 ,复制XE5版本的 代码 放在XE7里面调用,得到希望的结果的。
代码:
function MyHTTPEncode(const AStr: AnsiString): AnsiString; // The NoConversion set contains characters as specificed in RFC 1738 and // should not be modified unless the standard changes. const NoConversion = ['A'..'Z','a'..'z','*','@','.','_','-', '0'..'9','$','!','''','(',')']; var Sp, Rp: PAnsiChar; begin SetLength(Result, Length(AStr) * 3); Sp := PAnsiChar(AStr); Rp := PAnsiChar(Result); while Sp^ <> #0 do begin if Sp^ in NoConversion then Rp^ := Sp^ else if Sp^ = ' ' then Rp^ := '+' else begin System.AnsiStrings.FormatBuf(Rp^, 3, AnsiString('%%%.2x'), 6, [Ord(Sp^)]); Inc(Rp,2); end; Inc(Rp); Inc(Sp); end; SetLength(Result, Rp - PAnsiChar(Result)); end;