python 正则表达式的处理
1.基本用法
#!/usr/bin/env python # coding=utf-8 import re # example 1 text ="fjsk test\t fjskd bar\t \ttest" regex = re.compile('\s+') print regex.split(text) # example 2 email =""" jfksdfasm@qq.com test@test.com.cn jfdskf@163.com jkmiao@yahoo.123com """ pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z.0-9]{2,6}' regex = re.compile(pattern,flags=re.IGNORECASE) # get all print regex.findall(email) # get the first one m = regex.search(text) # print email[m.start():m.end()] print m # replace print regex.sub('RECORD',email)
显示:
jkmiao@sysucis:~/workplace/python/test$ python regex.py ['fjsk', 'test', 'fjskd', 'bar', 'test'] ['jfksdfasm@qq.com', 'test@test.com.cn', 'jfdskf@163.com', 'jkmiao@yahoo.123com'] None RECORD RECORD RECORD RECORD
2. 分组,返回元组
#example 3 pattern = r'([A-Z0-9._%+=]+)@([A-Z0-9.-]+)\.([A-Z.]{2,5})' regex = re.compile(pattern,flags=re.IGNORECASE) m = regex.match('name@domain.suffix') print m.groups() print regex.findall(email)
# output
('name', 'domain', 'suffi') [('jfksdfasm', 'qq', 'com'), ('test', 'gamil', 'com'), ('jfdskf', '163', 'com'), ('jkmiao', 'yahoo.com', 'cn')]
3.给分组加名称,返回字典
#example 4 regex = re.compile(r""" (?P<userame>[A-Z0-9._%+-]+) @(?P<domain>[A-Z0-9.-]+) \. (?P<suffix>[A-Z0-9.]{2,4}) """,flags=re.IGNORECASE|re.VERBOSE) m = regex.match("jkmaio@sysu.com") print m.groupdict() print regex.findall(email)
# output
jkmiao@sysucis:~/workplace/python/test$ python regex.py {'domain': 'sysu', 'userame': 'jkmaio', 'suffix': 'com'} [('jfksdfasm', 'qq', 'com'), ('test', 'gamil', 'com'), ('jfdskf', '163', 'com'), ('jkmiao', 'yahoo.com', 'cn')]
每天一小步,人生一大步!Good luck~