1 # Author: Sure Feng
 2 
 3 '''
 4 re模块
 5 '''
 6 
 7 import re
 8 
 9 
10 # 最常用的匹配语法(匹配的套路)
11 # 1、re.match 匹配开头,没有则匹配不成功
12 print(re.match("s+","asddf123fssa")) # None
13 print(re.match(".","asddf123fssa")) # <re.Match object; span=(0, 1), match='a'>
14 
15 # 2、re.search  从头到位匹配
16 print(re.search("s+","asddf123fssa")) # <re.Match object; span=(1, 2), match='s'>
17 
18 # 3、re.findall 把所有匹配成功的字符放到列表中并返回
19 print(re.findall("s+","asddf123fssa")) # ['s', 'ss']
20 
21 # 4、re.sub  把成功匹配到的字符替换,并返回一个新的字符串
22 # re.sub(pattern, repl, string, count=0, flags=0),count为替换次数,不写默认全替换
23 print(re.sub("s+","&","asddf123fssa",1)) # a&ddf123fssa,count=1匹配成功一次结束
24 
25 
26 # 常用正则表达式符号
27 # '.'默认匹配除\n之外的任意一个字符, 若指定flag DOTALL,则匹配任意字符,包括换行
28 print(re.search("v.","asddf123fsvsa")) # <re.Match object; span=(10, 12), match='vs'>
29 
30 # '^'、'\A'匹配字符开头,re.match用不上
31 # 若指定flags MULTILINE,这种也可以匹配上,(r"^a","\nabc\neee",flags=re.MULTILINE)
32 print(re.search("^v.","asddf123fsvsa")) # None
33 print(re.search("\Av.","asddf123fsvsa")) # None
34 
35 # '$'、'\Z'匹配字符结尾,或e.search("foo$","bfoo\nsdfsf",flags=re.MULTILINE).group()也可以
36 print(re.search(".a$","asddf123fsvsa")) # <re.Match object; span=(11, 13), match='sa'>
37 print(re.search(".a\Z","asddf123fsvsa")) # <re.Match object; span=(11, 13), match='sa'>
38 
39 # '*'匹配*号前的字符0次或多次
40 print(re.findall("ab*","cabb3abcbbac")) # ['abb', 'ab', 'a'],pattern可以是:a/ab/abb/abbb....
41 
42 # '+'匹配前一个字符1次或多次,re.findall("ab+","ab+cd+abb+bba") 结果['ab', 'abb']
43 print(re.findall("ab+","cabb3abcbbac")) # ['abb', 'ab'],pattern可以是:ab/abb/abbb
44 
45 # '?'匹配前一个字符1次或0次
46 print(re.findall("ab?","cabb3abcbbac")) # ['ab', 'ab', 'a'],pattern可以是:a/ab
47 
48 # '{m}'    匹配前一个字符m次
49 print(re.findall("ab{2}","abb abc abbcbbb")) # ['abb', 'abb'],pattern是:abb
50 
51 # '{n,m}'匹配前一个字符n到m次
52 print(re.findall("ab{1,3}","abbb abc abbcbbb"))#['abbb','ab','abb'],pattern可以是:ab/abb/abbb
53 
54 # '|'匹配|左或|右的字符
55 print(re.search("abc|ABC","ABCBabcCD").group()) # ABC, group()返回匹配成功的结果
56 
57 # '(...)' 分组匹配
58 print(re.search("(abc){2}a(123|456)c", "abcabca456c").group()) # abcabca456c
59 
60 # '(?P<name>...)' 分组匹配
61 print(re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242").groupdict())
62 # 结果:{'province': '3714', 'city': '81', 'birthday': '1993'}
63 
64 
65 # '\d'    匹配数字0-9
66 # '\D'    匹配非数字
67 # '\w'    匹配[A-Za-z0-9]
68 # '\W'    匹配非[A-Za-z0-9]
69 # '\s'    匹配空白字符、\t、\n、\r
70 print(re.search("\s+",'ab\tc1\n3').group()) # 结果 '\t'
71 
72 # re.I(re.IGNORECASE): 忽略大小写(括号内是完整写法,下同)
73 print(re.search("[a-z]+","AFJa",flags=re.I)) # <re.Match object; span=(0, 4), match='AFJa'>
74 
75 # M(MULTILINE): 多行模式,改变'^'和'$'的行为(参见上图)
76 print(re.search(r"^e.","\nabc\neee",flags=re.M)) # <re.Match object; span=(5, 7), match='ee'>
77 
78 # S(DOTALL): 点任意匹配模式,改变'.'的行为
79 print(re.search("v.+","asddf123fsvsa\n", flags=re.S)) # <re.Match object; span=(10, 14), match='vsa\n'>
 1 None
 2 <re.Match object; span=(0, 1), match='a'>
 3 <re.Match object; span=(1, 2), match='s'>
 4 ['s', 'ss']
 5 a&ddf123fssa
 6 <re.Match object; span=(10, 12), match='vs'>
 7 None
 8 None
 9 <re.Match object; span=(11, 13), match='sa'>
10 <re.Match object; span=(11, 13), match='sa'>
11 ['abb', 'ab', 'a']
12 ['abb', 'ab']
13 ['ab', 'ab', 'a']
14 ['abb', 'abb']
15 ['abbb', 'ab', 'abb']
16 ABC
17 abcabca456c
18 {'province': '3714', 'city': '81', 'birthday': '1993'}
19     
20 <re.Match object; span=(0, 4), match='AFJa'>
21 <re.Match object; span=(5, 7), match='ee'>
22 <re.Match object; span=(10, 14), match='vsa\n'>
answer

 

posted on 2018-10-04 15:19  sure_feng  阅读(123)  评论(0编辑  收藏  举报