re正则模块

  1 '.'     : 默认匹配除\n之外的任意字符
  2 '^'     : 匹配字符开头
  3 '$'     : 匹配字符结尾
  4 '*'     : 匹配 * 号前的字符 0 到多次
  5 '+'     : 匹配 + 号前的字符 1 到多次
  6 '?'     : 匹配 ? 号前一个字符 0 或 1  7 '{m}'   : 匹配 m 前一个字符 m 次
  8 '{n,m}' : 匹配前一个一个字符 n 到 m 次
  9 '\'     : 匹配 \ 前或 \ 后的字符
 10 '(...)' : 分组匹配
 11 
 12 '\A'    : 匹配字符开头
 13 '\Z'    : 匹配字符结尾
 14 '\d'    : 匹配数字 0 - 9
 15 '\D'    : 匹配非数字
 16 '\w'    : 匹配 【A-Za-a0-9 17 '\W'    : 匹配非【A-Za-a0-9 18 '\s'    : 匹配空白字符,\t、\n、\r
 19 
 20 re.match    --   从头开始匹配,匹配一次
 21 re.search   --   匹配条件包含,匹配一次
 22 re.findall  --   匹配所有结果并将结果以列表形式返回
 23 re.split    --   以匹配到的字符用作列表分隔符
 24 re.sub      --   对匹配到的字符进行替换
 25 
 26 # 匹配字符或数字开头 1 次到多次
 27 import re
 28 >>> aa = re.match("\w+","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 29 >>> print(aa.group())
 30 inet
 31 # 匹配除 \n 的任意字符
 32 >>> aa = re.match(".","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 33 >>> print(aa.group())
 34 i
 35 
 36 # 匹配除 \n 的任意字符 1 次到多次
 37 >>> aa = re.match(".+","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 38 >>> print(aa.group())
 39 inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255
 40 
 41 # 匹配除 \n 的任意字符 0 次到多次
 42 >>> aa = re.match(".*","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 43 >>> print(aa.group())
 44 inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255
 45 
 46 # 匹配字符或数字开头的 0 次到多次,文本内容以 % 开头会匹配不到,返回 null
 47 >>> aa = re.match("\w*","%inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 48 >>> print(aa.group())
 49 >>>
 50 
 51 # 匹配字符或数字开头的 1 次到多次,文本内容以 % 开头会匹配不到,程序会报错
 52 >>> aa = re.match("\w+","%inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 53 >>> print(aa.group())
 54 Traceback (most recent call last):
 55   File "<stdin>", line 1, in <module>
 56 AttributeError: 'NoneType' object has no attribute 'group'
 57 
 58 # 匹配字符或数字开头的 0 次或 1 次,文本内容以 % 开头会匹配不到,返回 null
 59 >>> aa = re.match("\w?","%inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 60 >>> print(aa.group())
 61 
 62 # 匹配字符或数字开头的 0 次或 1 次,文本内容以字母开头
 63 >>> aa = re.match("\w?","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 64 >>> print(aa.group())
 65 i
 66 
 67 # 匹配字母或数字开头 3 次
 68 >>> aa = re.match("\w{3}","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 69 >>> print(aa.group())
 70 ine
 71 
 72 # 匹配字母或数字开头至少 1 次,最多 4 次,后面的最少 5 次最多 8 次会报错
 73 >>> aa = re.match("\w{1,4}","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 74 >>> print(aa.group())
 75 inet
 76 >>> aa = re.match("\w{5,8}","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 77 >>> print(aa.group())
 78 Traceback (most recent call last):
 79   File "<stdin>", line 1, in <module>
 80 AttributeError: 'NoneType' object has no attribute 'group'
 81 
 82 # 匹配字母 inet 或 INET 开头
 83 >>> aa = re.match("inet|INET","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
 84 >>> print(aa.group())
 85 inet
 86 
 87 上面的语句都可以使用search来替换,如:
 88 >>> re.search("inet|INET","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255").group()
 89 'inet'
 90 
 91 # 使用()组合匹配,组合匹配用groups
 92 >>> re.search("(\w{6}a(123|456)c)","abcabca456c").groups()
 93 ('abcabca456c', '456')
 94 >>>
 95 >>> re.search("(\w{5}a(123|456)c)","abcabca456c").groups()
 96 ('bcabca456c', '456')
 97 
 98 >>> re.search("(\d{2})(\d{2})(\d{2})(\d{4})","110119194910011234,'guoqingsheng'").groups()
 99 ('11', '01', '19', '1949')
100 
101 # 匹配数字开头数字结尾,$ 符号和 \Z
102 >>> re.search("^\d.*\d$","110119194910011234,guoqingsheng1").group()
103 '110119194910011234,guoqingsheng1'
104 >>> re.search("^\d.*\d\Z","110119194910011234,guoqingsheng1").group()
105 '110119194910011234,guoqingsheng1'
106 
107 # 分组采用列表和字典形式
108 >>> re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{8})","110119194910011234").groups()
109 ('1101', '19', '19491001')
110 >>> re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{8})","110119194910011234").groupdict()
111 {'province': '1101', 'city': '19', 'birthday': '19491001'}
112 
113 # 从文本对象中匹配 IP 地址
114 >>> re.search("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255").group()
115 '192.168.10.51'
116 >>>
117 >>> re.search("(\d{1,3}\.){1,3}\d{1,3}","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255").group()
118 '192.168.10.51'
119 
120 # 匹配所有的数字--findall
121 >>> re.findall("\d+","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
122 ['192', '168', '10', '51', '255', '255', '255', '0', '192', '168', '10', '255']
123 
124 # 以数字为分隔符--split
125 >>> re.split("\d+","inet 192.168.10.51  netmask 255.255.255.0  broadcast 192.168.10.255")
126 ['inet ', '.', '.', '.', '  netmask ', '.', '.', '.', '  broadcast ', '.', '.', '.', '']
127 
128 # 替换--sub,查找docker(不区分大小写)并替换成container,加上count指定替换次数
129 >>> re.sub("docker","container","After successfully installing Docker and starting Docker",flags=re.I)
130 'After successfully installing container and starting container'
131 >>> re.sub("docker","container","After successfully installing Docker and starting Docker",flags=re.I,count=1)
132 'After successfully installing container and starting Docker'
133 
134 # 反斜杠匹配,python中需要使用 4 个 \ 来匹配一个反斜杠
135 >>> re.split("\\\\",r"D:\PycharmProjects\second")
136 ['D:', 'PycharmProjects', 'second']
137 
138 # 忽略大小写-- flags=re.I
139 >>> re.findall("pycharm",r"D:\PycharmProjects\second",flags=re.I)
140 ['Pycharm']
141 
142 # 匹配包括换行符\n 在内的字符 -- flags=re.S
143 >>> re.search(".*","\nABDdlskwe403d21sf31dsf\nsfwii4f34ldf3d\nfewkorio133ld43",flags=re.S).group()
144 '\nABDdlskwe403d21sf31dsf\nsfwii4f34ldf3d\nfewkorio133ld43'
145 
146 # 取非 -- ^ 在中括号中是代表取非的意思
147 >>> a = "2 - 1 * ( 20 - 13 + ( 12/2+2-36/9) - 27 * ( (53 -18)/7) - (90 - 72)/9 ) + 70/(60 - 15 * 2) * 3"
148 >>> re.search(r'\([^()]+\)',a).group()
149 '( 12/2+2-36/9)'

 

posted @ 2017-08-19 16:23  炉山假面目  阅读(170)  评论(0编辑  收藏  举报