Perl中文/unicode/utf8/GB2312之间的转换

Perl中文/unicode/utf8/GB2312编码之间的转换是一件很让人头疼的事情,我总结了以下主要的转换方法,供大家了解参考。

# author: jiangyujie  
use utf8;  
use Encode;  
use URI::Escape;  
  
$\ = "\n";  
  
#从unicode得到utf8编码  
$str = '%u6536';  
$str =~ s/\%u([0-9a-fA-F]{4})/pack("U",hex($1))/eg;  
$str = encode( "utf8", $str );  
print uc unpack( "H*", $str );  
  
# 从unicode得到gb2312编码  
$str = '%u6536';  
$str =~ s/\%u([0-9a-fA-F]{4})/pack("U",hex($1))/eg;  
$str = encode( "gb2312", $str );  
print uc unpack( "H*", $str );  
  
# 从中文得到utf8编码  
$str = "";  
print uri_escape($str);  
  
# 从utf8编码得到中文  
$utf8_str = uri_escape("");  
print uri_unescape($str);  
  
# 从中文得到perl unicode  
utf8::decode($str);  
@chars = split //, $str;  
foreach (@chars) {  
    printf "%x ", ord($_);  
}  
  
# 从中文得到标准unicode  
$a = "汉语";  
$a = decode( "utf8", $a );  
map { print "\\u", sprintf( "%x", $_ ) } unpack( "U*", $a );  
  
# 从标准unicode得到中文  
$str = '%u6536';  
$str =~ s/\%u([0-9a-fA-F]{4})/pack("U",hex($1))/eg;  
$str = encode( "utf8", $str );  
print $str;  
  
# 从perl unicode得到中文  
my $unicode = "\x{505c}\x{8f66}";  
print encode( "utf8", $unicode );  
posted @ 2012-07-25 13:12  deaconx  阅读(313)  评论(0编辑  收藏  举报