正则表达式-汉字的匹配方法
2013-07-27 23:26 江湖么名 阅读(2927) 评论(0) 编辑 收藏 举报unicode : ([\u4e00-\u9fa5]+)
unicode : ([\u2E80-\u9FFF]+)
utf-8 : ([\x80-\xff]+)
1 #encoding:utf-8 2 import re 3 4 5 6 def main(): 7 8 # ([\u4e00-\u9fa5]+) 9 TEST_STR_1 = u'ab123kk123' 10 pattern_str = u'[0-9]+([\u4e00-\u9fa5]+)[0-9]+' 11 pattern = re.compile (pattern_str) 12 m = pattern.search(TEST_STR_1) 13 print m.group() if m is not None else None 14 print m.group(1) if m is not None else None 15 print '\n' 16 17 TEST_STR_2 = u'ab123汉字123' 18 m = pattern.search(TEST_STR_2) 19 print m.group() if m is not None else None 20 print m.group(1) if m is not None else None 21 print '\n' 22 23 # ([\x80-\xff]+) 24 TEST_STR_3 = 'ab123汉字123' 25 pattern_str = '[0-9]+([\x80-\xff]+)[0-9]+' 26 pattern = re.compile (pattern_str) 27 m = pattern.search(TEST_STR_3) 28 print m.group().decode('utf-8') if m is not None else None 29 print m.group(1).decode('utf-8') if m is not None else None 30 print '\n' 31 32 # ([\u2E80-\u9FFF]+) 33 TEST_STR_2 = u'ab123汉字123' 34 pattern_str = u'[0-9]+([\u2E80-\u9FFF]+)[0-9]+' 35 pattern = re.compile (pattern_str) 36 m = pattern.search(TEST_STR_2) 37 print m.group() if m is not None else None 38 print m.group(1) if m is not None else None 39 print '\n' 40 41 42 43 if __name__ == '__main__': 44 main()