二十七、正则表达式补充
import re
'''
正则表达式:
re.match:从头匹配
re.search:浏览全部字符串,匹配第一个符合规则的字符串
re.findall():将匹配到得的所有内容都放置在一个列表中
#re.finditer():
re.split():
re.sub():
'''
'''
1.match
'''
origin = "hello tom bcd tom lge tom acd 19"
r=re.match("h\w+",origin)
print (r.group()) #获取匹配到得所有结果
print(r.groups()) #获取模型中匹配到的分组结果
print (r.groupdict()) #获取模型中匹配到的分组中的所有执行了key的分组
print ("-------------------------match1----------------------------")
---------------------------------------------------------------------
hello
()
{}
-------------------------match1----------------------------
---------------------------------------------------------------------
r=re.match("(h\w+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match2---------------------------")
---------------------------------------------------------------------
hello
('hello',)
{}
--------------------------match2---------------------------
---------------------------------------------------------------------
r=re.match("(h)(\w+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match3---------------------------")
---------------------------------------------------------------------
hello
('h', 'ello')
{}
--------------------------match3---------------------------
---------------------------------------------------------------------
r=re.match("(?P<n1>h)(?P<n2>\w+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match4---------------------------")
---------------------------------------------------------------------
hello
('h', 'ello')
{'n1': 'h', 'n2': 'ello'}
--------------------------match4---------------------------
---------------------------------------------------------------------
'''
2.search:全字符串匹配
'''
origin = "hello tom bcd tom lge tom acd 19"
r=re.search("(t\w+).*(?P<name>\d)$",origin)
print (r.group()) #获取匹配到得所有结果
print(r.groups()) #获取模型中匹配到的分组结果
print (r.groupdict()) #获取模型中匹配到的分组中的所有执行了key的分组
print ("--------------------------search1---------------------------")
---------------------------------------------------------------------
tom bcd tom lge tom acd 19
('tom', '9')
{'name': '9'}
--------------------------search1---------------------------
---------------------------------------------------------------------
origin = "hello tom bcd tom lge tom acd 19"
r=re.search("t(\w+)",origin)
print (r.group()) #获取匹配到得所有结果
print(r.groups()) #获取模型中匹配到的分组结果
print (r.groupdict()) #获取模型中匹配到的分组中的所有执行了key的分组
print ("--------------------------search2---------------------------")
---------------------------------------------------------------------
tom
('om',)
{}
--------------------------search2---------------------------
---------------------------------------------------------------------
'''
3.findall:匹配到的字符串放到列表(分组和不分组)
分组提取:从左到右,从外到内,有几个括号就取几次
'''
r=re.findall("\d+\w\d+","a2b3c4d5")
print (r)
print ("--------------------------findall1---------------------------")
---------------------------------------------------------------------
['2b3', '4d5']
--------------------------findall1---------------------------
---------------------------------------------------------------------
r=re.findall("","a2b3c4d5")
print (r)
print ("--------------------------findall2---------------------------")
---------------------------------------------------------------------
['', '', '', '', '', '', '', '', '']
--------------------------findall2---------------------------
---------------------------------------------------------------------
origin = "hello tomm bcd tomm lge tomm acd 19"
r=re.findall("(t)(\w+)(m)",origin) #(\w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall3---------------------------")
---------------------------------------------------------------------
[('t', 'om', 'm'), ('t', 'om', 'm'), ('t', 'om', 'm')]
--------------------------findall3---------------------------
---------------------------------------------------------------------
origin = "hello tomm bcd tomm lge tomm acd 19"
r=re.findall("((t)(\w+)(m))",origin) #(\w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall4---------------------------")
---------------------------------------------------------------------
[('tomm', 't', 'om', 'm'), ('tomm', 't', 'om', 'm'), ('tomm', 't', 'om', 'm')]
--------------------------findall4---------------------------
---------------------------------------------------------------------
origin = "hello tomn bcd tomn lge tomn acd 19"
r=re.findall("(t)(\w+(m))(n)",origin) #(\w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall5---------------------------")
---------------------------------------------------------------------
[('t', 'om', 'm', 'n'), ('t', 'om', 'm', 'n'), ('t', 'om', 'm', 'n')]
--------------------------findall5---------------------------
---------------------------------------------------------------------
'''
4.finditer():返回迭代器
'''
origin = "hello tomn bcd tomn lge tomn acd 19"
r=re.finditer("(t)(\w+(m))(?P<name>n)",origin) #(\w+)中显示search中groups中的所有元素
print (r)
for i in r:
print (r)
print (i.group())
print(i.groups())
print(i.groupdict())
print ("--------------------------finditer1---------------------------")
---------------------------------------------------------------------
<callable_iterator object at 0x00000000025CF940>
<callable_iterator object at 0x00000000025CF940>
tomn
('t', 'om', 'm', 'n')
{'name': 'n'}
<callable_iterator object at 0x00000000025CF940>
tomn
('t', 'om', 'm', 'n')
{'name': 'n'}
<callable_iterator object at 0x00000000025CF940>
tomn
('t', 'om', 'm', 'n')
{'name': 'n'}
--------------------------finditer1---------------------------
---------------------------------------------------------------------
'''
5.re.split():分割
split(pattern, string, maxsplit=0, flags=0):
pattern:正则
string:字符串
maxsplit:最大分割次数
flags:标志位,用于控制正则表达式的匹配方式,如:是否区分大小写,多行匹配等等
'''
origin = "hello tomn bcd tomn lge tomn acd 19"
print (origin.split("t"))
r=re.split("t\w+",origin,1)
print(r)
print ("--------------------------split1---------------------------")
---------------------------------------------------------------------
['hello ', 'omn bcd ', 'omn lge ', 'omn acd 19']
['hello ', ' bcd tomn lge tomn acd 19']
--------------------------split1---------------------------
---------------------------------------------------------------------
#只要有组,中间的分割值就可以拿到
r=re.split("(t\w+)",origin,1)
print(r)
print ("--------------------------split2---------------------------")
---------------------------------------------------------------------
['hello ', 'tomn', ' bcd tomn lge tomn acd 19']
--------------------------split2---------------------------
---------------------------------------------------------------------
#去掉t和n,将()放在t之后n之前,提取后的内容不包含t和n
r=re.split("t(\w+)n",origin,1)
print(r)
print ("--------------------------split3---------------------------")
---------------------------------------------------------------------
['hello ', 'om', ' bcd tomn lge tomn acd 19']
--------------------------split3---------------------------
---------------------------------------------------------------------
'''
计算器
'''
source="1-2*((6 -30+(-40.0/5)*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))"
#print (re.split("\([^()]+\)",source,1))
def func(x):
return 1
while True:
print(source)
result = re.split("\(([^()]+)\)",source,1)
if len(result) == 3:
before=result[0]
content=result[1]
after=result[2]
#before,content,after=result 当result知道了确定的长度,可这样写
r=func(content)
#print(r)
new_source=before+str(r)+after
source=new_source
else:
m=func(source)
print (m)
break
---------------------------------------------------------------------
1-2*((6 -30+(-40.0/5)*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))
1-2*((6 -30+1*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))
1-2*((6 -30+1*1)-(-4*3)/(16-3*2))
1-2*(1-(-4*3)/(16-3*2))
1-2*(1-1/(16-3*2))
1-2*(1-1/1)
1-2*1
1
---------------------------------------------------------------------
'''
6.re.sub():
sub(pattern, repl, string, count=0, flags=0):用于替换字符串中的匹配项
pattern:正则
repl:指定替换后的字符串
string:要替换的字符串
count:替换次数,默认所有
flags:标志位
subn(pattern, repl, string, count=0, flags=0):用于替换字符串中的匹配项,并返回替换次数
'''
origin="fsd2agds3gsd4gsdga5gas7g8a8sdf"
r=re.sub("\d+","OOO",origin,2)
print (r)
r=re.subn("\d+","OOO",origin)
print (r)
---------------------------------------------------------------------
fsdOOOagdsOOOgsd4gsdga5gas7g8a8sdf
('fsdOOOagdsOOOgsdOOOgsdgaOOOgasOOOgOOOaOOOsdf', 7)
---------------------------------------------------------------------
============================================================================================================
当有些人一出生就有的东西,我们要为之奋斗几十年才拥有。但有一样东西,你一辈子都不会有,那就是我们曾经一无所有。