代码
#region 判断Url参数是否UTF8编码
public static bool IsUTF8(string url)
{
byte[] buf = GetUrlCodingToBytes(url);
return IsTextUTF8(buf);
}
private static bool IsTextUTF8(byte[] buf)
{
int i;
byte cOctets = 0; // octets to go in this UTF-8 encoded character
bool bAllAscii = true;
long iLen = buf.Length;
for (i = 0; i < iLen; i++)
{
if ((buf[i] & 0x80) != 0) bAllAscii = false;
if (cOctets == 0)
{
if (buf[i] >= 0x80)
{
do
{
buf[i] <<= 1;
cOctets++;
}
while ((buf[i] & 0x80) != 0);
cOctets--;
if (cOctets != 2)
return false;
}
}
else
{
if ((buf[i] & 0xC0) != 0x80)
return false;
cOctets--;
}
}
if (cOctets > 0)
return false;
if (bAllAscii)
return false;
return true;
}
private static byte[] GetUrlCodingToBytes(string url)
{
StringBuilder sb = new StringBuilder();
int i = url.IndexOf('%');
while (i >= 0)
{
if (url.Length < i + 3)
{
break;
}
sb.Append(url.Substring(i, 3));
url = url.Substring(i + 3);
i = url.IndexOf('%');
}
string urlCoding = sb.ToString();
if (string.IsNullOrEmpty(urlCoding))
return new byte[0];
urlCoding = urlCoding.Replace("%", string.Empty);
int len = urlCoding.Length / 2;
byte[] result = new byte[len];
len *= 2;
for (int index = 0; index < len; index++)
{
string s = urlCoding.Substring(index, 2);
int b = int.Parse(s, System.Globalization.NumberStyles.HexNumber);
result[index / 2] = (byte)b;
index++;
}
return result;
}
#endregion 判断Url参数是否UTF8编码
public static bool IsUTF8(string url)
{
byte[] buf = GetUrlCodingToBytes(url);
return IsTextUTF8(buf);
}
private static bool IsTextUTF8(byte[] buf)
{
int i;
byte cOctets = 0; // octets to go in this UTF-8 encoded character
bool bAllAscii = true;
long iLen = buf.Length;
for (i = 0; i < iLen; i++)
{
if ((buf[i] & 0x80) != 0) bAllAscii = false;
if (cOctets == 0)
{
if (buf[i] >= 0x80)
{
do
{
buf[i] <<= 1;
cOctets++;
}
while ((buf[i] & 0x80) != 0);
cOctets--;
if (cOctets != 2)
return false;
}
}
else
{
if ((buf[i] & 0xC0) != 0x80)
return false;
cOctets--;
}
}
if (cOctets > 0)
return false;
if (bAllAscii)
return false;
return true;
}
private static byte[] GetUrlCodingToBytes(string url)
{
StringBuilder sb = new StringBuilder();
int i = url.IndexOf('%');
while (i >= 0)
{
if (url.Length < i + 3)
{
break;
}
sb.Append(url.Substring(i, 3));
url = url.Substring(i + 3);
i = url.IndexOf('%');
}
string urlCoding = sb.ToString();
if (string.IsNullOrEmpty(urlCoding))
return new byte[0];
urlCoding = urlCoding.Replace("%", string.Empty);
int len = urlCoding.Length / 2;
byte[] result = new byte[len];
len *= 2;
for (int index = 0; index < len; index++)
{
string s = urlCoding.Substring(index, 2);
int b = int.Parse(s, System.Globalization.NumberStyles.HexNumber);
result[index / 2] = (byte)b;
index++;
}
return result;
}
#endregion 判断Url参数是否UTF8编码
UTF-8编码规则参考
http://blog.csdn.net/sandyen/archive/2006/08/23/1108168.aspx
上面代码是网络上找的,不过存在大部分不能识别的情况,后根据对于中文,UTF8 一定编码成 3 字节,这个原则
修改了一下,现在大部分情况下都能正确识别