Python的正则表达式总结
s='hello world' # print(s.find('llo')) #返回的是索引 # ret=s.replace('ll','xx') # print(ret) # print(s.split('w')) import re # ret=re.findall('w\w{2}',s) # print(ret) #元字符 # .通配符 ret=re.findall('w..l','hello world') #.只能代指任意一个字符 print(ret)#['worl'] ret=re.findall('w..l','hello w\nrld') #.不能代替换行符 print(ret)#[] #^:只对字符串的从开始进行匹配 ret=re.findall('^h...o','hsdsdsdsdshkkhjkhhello')#[] print(ret) ret=re.findall('^h...o','hellosdsdsdshkkhjkhhello')#['hello'] print(ret) #$:只对字符串的从结束进行匹配 ret=re.findall('liu..jj$','liuddjjddddlidfjj') print(ret) #*:重复匹配[0,+无穷] ret=re.findall('ba*','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhjhjfjfjaljjfvcjbnmbaaaandxjvcbjbjaaavbjbvbjdvbjvbvc')#['b', 'baaaa', 'b', 'b', 'b', 'b', 'b', 'b', 'b'] print(ret) #+:重复匹配[1,+无穷] ret=re.findall('ba+','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhjhjfjfjaljjfvcjbnmbaaaandxjvcbjbjaaavbjbvbjdvbjvbvc')#['baaaa'] print(ret) #+:重复匹配[1,+无穷] ret=re.findall('b+a','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhjhjfjfjaljjfvcjbnmbaaaandxjvcbjbjaaavbjbvbjdvbjvbvc')#['ba'] print(ret) #?:重复匹配[0,1] ret=re.findall('b?a','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhjhjfjfjaljjfvcjbnmbaaaandxjvcbjbjaaavbjbvbjdvbjvbvc')#['a', 'a', 'a', 'a', 'ba', 'a', 'a', 'a', 'a', 'a', 'a'] print(ret) #{1,}代表{1,正无穷} ret=re.findall('b{2}','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhbbjhjfjfjaljjfvcjbbbnmbaaaanbbbbbdxjvcbjbjaaavbjbvbjdvbjvbvc')#['bb', 'bb', 'bb', 'bb'] print(ret) ret=re.findall('b{1,3}','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhbbjhjfjfjaljjfvcjbbbnmbaaaanbbbbbdxjvcbjbjaaavbjbvbjdvbjvbvc')#['bb', 'bbb', 'b', 'bbb', 'bb', 'b', 'b', 'b', 'b', 'b', 'b', 'b'] print(ret) #结论:*等价于{0,正无穷} +等价于{1,正无穷} ?等价于{0,1}推荐使用前者 #findall找到所有的 #字符集 #[] ret=re.findall('a[c,d]x','acxaxasssdsadxacdxdffdffadsdeddff')#['acx', 'adx'] print(ret) ret = re.findall('[com,cn]','www.baidu.comwwww.guge.cn') print(ret) ret=re.findall('[a-z]','aasdsdfdffvvf') print(ret) # [] 字符集:取消元字符的特殊功能,例外情况(\,^,-) ret=re.findall('[x,*]','dddfdfxsddx*ddd*')#['x', 'x', '*', '*'] print(ret) ret=re.findall('[x,.]','dfghsdxefffddsaw.')#['x', '.'] print(ret) ret=re.findall('[x,,]','dfghs,dxefffd,dsaw.')#[',', 'x', ','] print(ret) # ^ 放在[]意味着是取反的意思 ret=re.findall('[^t]','dfghs,dxefffd,dsaw.')#['d', 'f', 'g', 'h', 's', ',', 'd', 'x', 'e', 'f', 'f', 'f', 'd', ',', 'd', 's', 'a', 'w', '.'] print(ret) ret=re.findall('[^4,5]','sdsdsq,dd46,775')#['s', 'd', 's', 'd', 's', 'q', 'd', 'd', '6', '7', '7']代表非4和5的 print(ret) # \ 反斜杠后面跟元字符去除特殊功能 #反斜杠后面跟一部分普通字符实现特殊功能 # \d 匹配任何十进制数;相当于[0-9] print(re.findall('\d{11}','sddffgg15991710523dsdf159917105261dfdfffg'))#['15991710523', '15991710526'] # \D匹配任何非数字字符,相当于[^0-9] # \s匹配任何空白字符,相当于[ \t\n\r\f\v] # \S匹配任何非空白字符,相当于[^ \t\n\r\f\v] # \w匹配任何字母数字字符,相当于[a-zA-Z0-9] # \W匹配任何非字母数字字符,相当于[^a-zA-Z0-9] # \b匹配一个特殊字符的边界,也就是单词和空格的位置 print(re.findall('\sasd','asdddf asd'))#[' asd'] print(re.findall('\wasd','asdddf aasd'))#['aasd'] print(re.findall(r'I\b','I am a LI$st'))#['I', 'I'] ######################## ret=re.search('wc','dfvdwcxxf') print(ret) print(ret.group()) ret=re.search('a.','agj').group() print(ret) ret=re.search('a\.','a.gj').group() print(ret)#a. ret=re.findall(r'A\\s','sddddA\s') print(ret) ret=re.findall('\\\\','aa\\vvwww')#['\\'] print(ret) #()括起来代表一个整体 print(re.search('(sa)+','sasaddddsdwasasa').group())#sasa #|或的关系 print(re.search('3|(sa)','3sa').group())#3 # 1,findall():所有结果都返回到一个列表里 # 2,search():返回一个对象(object),对象可以调用group()返回结果,返回匹配到的第一个对象 # 3,match():只在字符串开始匹配,对象可以调用group()返回结果,返回匹配到的第一个对象 ret=re.match('asd','asdass') print(ret.group()) # 4,split() ret=re.split('[k,a]','djkaffagg') print(ret) obj = re.compile('\.com')#编译成一个正则表达式对象,可供多次使用 ret=obj.split('dshjhjkhd.com') print(ret) ret = obj.findall('dshjhjkhd.com') print(ret)