1 # Author: Sure Feng 2 3 ''' 4 re模块 5 ''' 6 7 import re 8 9 10 # 最常用的匹配语法(匹配的套路) 11 # 1、re.match 匹配开头,没有则匹配不成功 12 print(re.match("s+","asddf123fssa")) # None 13 print(re.match(".","asddf123fssa")) # <re.Match object; span=(0, 1), match='a'> 14 15 # 2、re.search 从头到位匹配 16 print(re.search("s+","asddf123fssa")) # <re.Match object; span=(1, 2), match='s'> 17 18 # 3、re.findall 把所有匹配成功的字符放到列表中并返回 19 print(re.findall("s+","asddf123fssa")) # ['s', 'ss'] 20 21 # 4、re.sub 把成功匹配到的字符替换,并返回一个新的字符串 22 # re.sub(pattern, repl, string, count=0, flags=0),count为替换次数,不写默认全替换 23 print(re.sub("s+","&","asddf123fssa",1)) # a&ddf123fssa,count=1匹配成功一次结束 24 25 26 # 常用正则表达式符号 27 # '.'默认匹配除\n之外的任意一个字符, 若指定flag DOTALL,则匹配任意字符,包括换行 28 print(re.search("v.","asddf123fsvsa")) # <re.Match object; span=(10, 12), match='vs'> 29 30 # '^'、'\A'匹配字符开头,re.match用不上 31 # 若指定flags MULTILINE,这种也可以匹配上,(r"^a","\nabc\neee",flags=re.MULTILINE) 32 print(re.search("^v.","asddf123fsvsa")) # None 33 print(re.search("\Av.","asddf123fsvsa")) # None 34 35 # '$'、'\Z'匹配字符结尾,或e.search("foo$","bfoo\nsdfsf",flags=re.MULTILINE).group()也可以 36 print(re.search(".a$","asddf123fsvsa")) # <re.Match object; span=(11, 13), match='sa'> 37 print(re.search(".a\Z","asddf123fsvsa")) # <re.Match object; span=(11, 13), match='sa'> 38 39 # '*'匹配*号前的字符0次或多次 40 print(re.findall("ab*","cabb3abcbbac")) # ['abb', 'ab', 'a'],pattern可以是:a/ab/abb/abbb.... 41 42 # '+'匹配前一个字符1次或多次,re.findall("ab+","ab+cd+abb+bba") 结果['ab', 'abb'] 43 print(re.findall("ab+","cabb3abcbbac")) # ['abb', 'ab'],pattern可以是:ab/abb/abbb 44 45 # '?'匹配前一个字符1次或0次 46 print(re.findall("ab?","cabb3abcbbac")) # ['ab', 'ab', 'a'],pattern可以是:a/ab 47 48 # '{m}' 匹配前一个字符m次 49 print(re.findall("ab{2}","abb abc abbcbbb")) # ['abb', 'abb'],pattern是:abb 50 51 # '{n,m}'匹配前一个字符n到m次 52 print(re.findall("ab{1,3}","abbb abc abbcbbb"))#['abbb','ab','abb'],pattern可以是:ab/abb/abbb 53 54 # '|'匹配|左或|右的字符 55 print(re.search("abc|ABC","ABCBabcCD").group()) # ABC, group()返回匹配成功的结果 56 57 # '(...)' 分组匹配 58 print(re.search("(abc){2}a(123|456)c", "abcabca456c").group()) # abcabca456c 59 60 # '(?P<name>...)' 分组匹配 61 print(re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242").groupdict()) 62 # 结果:{'province': '3714', 'city': '81', 'birthday': '1993'} 63 64 65 # '\d' 匹配数字0-9 66 # '\D' 匹配非数字 67 # '\w' 匹配[A-Za-z0-9] 68 # '\W' 匹配非[A-Za-z0-9] 69 # '\s' 匹配空白字符、\t、\n、\r 70 print(re.search("\s+",'ab\tc1\n3').group()) # 结果 '\t' 71 72 # re.I(re.IGNORECASE): 忽略大小写(括号内是完整写法,下同) 73 print(re.search("[a-z]+","AFJa",flags=re.I)) # <re.Match object; span=(0, 4), match='AFJa'> 74 75 # M(MULTILINE): 多行模式,改变'^'和'$'的行为(参见上图) 76 print(re.search(r"^e.","\nabc\neee",flags=re.M)) # <re.Match object; span=(5, 7), match='ee'> 77 78 # S(DOTALL): 点任意匹配模式,改变'.'的行为 79 print(re.search("v.+","asddf123fsvsa\n", flags=re.S)) # <re.Match object; span=(10, 14), match='vsa\n'>
1 None 2 <re.Match object; span=(0, 1), match='a'> 3 <re.Match object; span=(1, 2), match='s'> 4 ['s', 'ss'] 5 a&ddf123fssa 6 <re.Match object; span=(10, 12), match='vs'> 7 None 8 None 9 <re.Match object; span=(11, 13), match='sa'> 10 <re.Match object; span=(11, 13), match='sa'> 11 ['abb', 'ab', 'a'] 12 ['abb', 'ab'] 13 ['ab', 'ab', 'a'] 14 ['abb', 'abb'] 15 ['abbb', 'ab', 'abb'] 16 ABC 17 abcabca456c 18 {'province': '3714', 'city': '81', 'birthday': '1993'} 19 20 <re.Match object; span=(0, 4), match='AFJa'> 21 <re.Match object; span=(5, 7), match='ee'> 22 <re.Match object; span=(10, 14), match='vsa\n'>