Python用法速查@正则表达式

re 模块

方法

pattern=re.compile(regular_expressions)

当正则被调用的多时用compile编译表达式比较合适，编译一次调用多次，可减少系统的编译开销

import re

# 将正则表达式编译成 Pattern 对象 
pattern = re.compile(r'\d+')

match_obj=pattern.match(string)

从第一个字符开始匹配，不成功则结束
只要一个匹配结果

>>> import re
>>> pattern = re.compile(r'\d+')                    # 用于匹配至少一个数字
>>> m = pattern.match('one12twothree34four')        # 查找头部，没有匹配
>>> print m
None
>>> m = pattern.match('one12twothree34four', 2, 10) # 从'e'的位置开始匹配，没有匹配
>>> print m
None
>>> m = pattern.match('one12twothree34four', 3, 10) # 从'1'的位置开始匹配，正好匹配
>>> print m                                         # 返回一个 Match 对象
<_sre.SRE_Match object at 0x10a42aac0>
>>> m.group(0)   # 可省略 0
'12'
>>> m.start(0)   # 可省略 0
3
>>> m.end(0)     # 可省略 0
5
>>> m.span(0)    # 可省略 0
(3, 5)

match_obj.start()

match_obj.end()

match_obj.span()

match_obj.group(n)


>>> import re
>>> pattern = re.compile(r'([a-z]+) ([a-z]+)', re.I)   # re.I 表示忽略大小写
>>> m = pattern.match('Hello World Wide Web')
>>> print m                               # 匹配成功，返回一个 Match 对象
<_sre.SRE_Match object at 0x10bea83e8>
>>> m.group(0)                            # 返回匹配成功的整个子串
'Hello World'
>>> m.span(0)                             # 返回匹配成功的整个子串的索引
(0, 11)
>>> m.group(1)                            # 返回第一个分组匹配成功的子串
'Hello'
>>> m.span(1)                             # 返回第一个分组匹配成功的子串的索引
(0, 5)
>>> m.group(2)                            # 返回第二个分组匹配成功的子串
'World'
>>> m.span(2)                             # 返回第二个分组匹配成功的子串
(6, 11)
>>> m.groups()                            # 等价于 (m.group(1), m.group(2), ...)
('Hello', 'World')
>>> m.group(3)                            # 不存在第三个分组
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
IndexError: no such group

pattern.search(string)

第一个字符不符合，可继续向后匹配
只要一个匹配结果

>>> import re
>>> pattern = re.compile('\d+')
>>> m = pattern.search('one12twothree34four')  # 这里如果使用 match 方法则不匹配
>>> m
<_sre.SRE_Match object at 0x10cc03ac0>
>>> m.group()
'12'
>>> m = pattern.search('one12twothree34four', 10, 30)  # 指定字符串区间
>>> m
<_sre.SRE_Match object at 0x10cc03b28>
>>> m.group()
'34'
>>> m.span()
(13, 15)

# -*- coding: utf-8 -*-

import re
 
# 将正则表达式编译成 Pattern 对象
pattern = re.compile(r'\d+') 
 
# 使用 search() 查找匹配的子串，不存在匹配的子串时将返回 None 
# 这里使用 match() 无法成功匹配 
m = pattern.search('hello 123456 789') 
 
if m: 
    # 使用 Match 获得分组信息 
    print 'matching string:',m.group()
    print 'position:',m.span()
#结果
matching string: 123456
position: (6, 12)

pattern.findall(string)

要所有匹配结果

import re
 
pattern = re.compile(r'\d+')   # 查找数字
result1 = pattern.findall('hello 123456 789')
result2 = pattern.findall('one1two2three3four4', 0, 10)
 
print result1
print result2

#结果
['123456', '789']
['1', '2']

pattern.finditer(string)

获得所有匹配结果,返回一个迭代器

# -*- coding: utf-8 -*-

import re
 
pattern = re.compile(r'\d+')

result_iter1 = pattern.finditer('hello 123456 789')
result_iter2 = pattern.finditer('one1two2three3four4', 0, 10)

print type(result_iter1)
print type(result_iter2)

print 'result1...'
for m1 in result_iter1:   # m1 是 Match 对象
    print 'matching string: {}, position: {}'.format(m1.group(), m1.span())

print 'result2...'
for m2 in result_iter2:
    print 'matching string: {}, position: {}'.format(m2.group(), m2.span())

#结果

<type 'callable-iterator'>
<type 'callable-iterator'>
result1...
matching string: 123456, position: (6, 12)
matching string: 789, position: (13, 16)
result2...
matching string: 1, position: (3, 4)
matching string: 2, position: (7, 8)

pattern.split(string[, maxsplit])

maxsplit可指定最大分割次数

import re
 
p = re.compile(r'[\s\,\;]+')#按 空格 逗号 分号 分割字符串
print p.split('a,b;; c   d')

#结果
['a', 'b', 'c', 'd']

pattern.sub(func, string[, maxsub])

func 可以为返回替换字符串的函数

import re
 
p = re.compile(r'(\w+) (\w+)')
s = 'hello 123, hello 456'

def func(m):
    return 'hi' + ' ' + m.group(2)

print p.sub(r'hello world', s)  # 使用 'hello world' 替换 'hello 123' 和 'hello 456'
print p.sub(r'\2 \1', s)        # \id形式 引用分组
print p.sub(func, s)            # 用函数返回替换串
print p.sub(func, s, 1)         # 最多替换一次

#结果

hello world, hello world
123 hello, 456 hello
hi 123, hi 456
hi 123, hello 456

pattern.subn(func, string[, maxsubn])

返回被替换后的串，和被替换次数

import re
 
p = re.compile(r'(\w+) (\w+)')
s = 'hello 123, hello 456'

def func(m):
    return 'hi' + ' ' + m.group(2)

print p.subn(r'hello world', s)
print p.subn(r'\2 \1', s)
print p.subn(func, s)
print p.subn(func, s, 1)

#结果

('hello world, hello world', 2)
('123 hello, 456 hello', 2)
('hi 123, hi 456', 2)
('hi 123, hello 456', 1)

函数

re.match(pattern, string)

import re

m1 = re.match(r'\d+', 'One12twothree34four') #第一个字符不符合，匹配结束
if m1:
    print 'matching string:',m1.group()
else:
    print 'm1 is:',m1
    
m2 = re.match(r'\d+', '12twothree34four') #匹配到第一个符合的子串，匹配结束
if m2:
    print 'matching string:', m2.group()
else:
    print 'm2 is:',m2
#结果
m1 is: None
matching string: 12

re.search(pattern, string)

re.search(pattern, string[, flags])

#第一个字符匹配不成功，可以继续向后匹配，直到匹配到一个符合的\返回空

re.findall(pattern, string)

import re

print re.findall(r'\d+', 'hello 12345 789')#匹配所有符合的子串

# 输出
['12345', '789']

re.finditer(pattern, string)

#返回迭代器

re.split(pattern, string[, maxsplit])

re.sub(pattern, func, string[, count])

re.subn(pattern, func, string[, count])

正则表达式

实例
python.org的正则python\.org
010-258945的正则/d{3}/-/d{6}
数字26字母下划线组成的字符串的正则^\w+$或^[0-9a-zA-Z_]+$
13、15、18开头手机号的正则^(13[0-9]|15[0|1|2|3|5|6|7|8|9]|18[0-9])\d{8}$
匹配金额，精确到小数点2位的正则^[0-9]+(.[0-9]{2})?$
匹配汉字的正则^[\u4e00-\u9fa5]{0,}$

参考

huxi
explore-python/Regular-expression

posted @ 2021-08-03 18:30 HUGBOY 阅读(150) 评论(0) 收藏举报

刷新页面返回顶部

HUGBOY

——a BOY who want give his girl a HUG.