re 模块

　　

import re

# findall 返回左右的满足匹配条件的结果放到列表 search 只要匹配到一个满足条件就不再进行匹配;并且把结果到对象里面
# 注意：*,+,?等都是贪婪匹配，也就是尽可能匹配，后面加?号使其变成惰性匹配

# "." 匹配任意字符 单个字符 有多少就匹配多少内容
print(re.findall("h..lo","workhellopython")) #

print(re.findall("h..lo","workhellopythonhello")) #['hello', 'hello']

# "^" 匹配以什么开头
print(re.findall("^w..","workhellopythonhello")) #['wor']

# "$" 匹配以什么结尾
print(re.findall("thon$","workhellopython")) #['thon']

# "*" 匹配0次或多次 贪婪匹配 取最多的
print(re.findall("l*","workhellopython")) #['', '', '', '', '', '', 'll', '', '', '', '', '', '', '', '']

# "+" 匹配1次或多次
print(re.findall("hel+","workhellopython")) #['hell']


# "?" 匹配0次或1次
print(re.findall("helle?","workhellopython")) #['hell']


# "{}" 自定义匹配N次 {1}匹配1次, {1,}匹配1到n次, {0,1}0次或者1次, {1,10}1到10次
print(re.findall("hell{1,}","workhelllopython")) #['helll']


# 后面加?号使其变成惰性匹配; 惰性匹配就是最小匹配 ; "?" 匹配0次或1次;通过惰性匹配就是匹配0次
print(re.findall("hel*?","workhelllopython")) #['he']

# "[]" 字符集 [xy] 匹配x或者y 在字符集中没有特俗符号 就是说* ? + 都是普通符号 意思就是按 * ? + 来匹配 除"-" "^" "\"
# "[a-z]" 匹配a 至 z 匹配,只能匹配一个字符 [^a-z] 匹配不是a至z的内容
print(re.findall("z[xy,]","zxuuuzyuuuz,uuu")) #['zx', 'zy', 'z,']


# "|" 或 匹配ka 或者 b
print(re.findall(r"ka|b","asdkabll")) #['ka', 'b']


# "()" 分组
print(re.findall(r"(abc)+","abc abc abc abc abc"))

# 元字符之转义符 \

# 反斜杠后边跟元字符去除特殊功能,比如\.
# 反斜杠后边跟普通字符实现特殊功能,比如\d
#
# \d 匹配任何十进制数；它相当于类 [0-9]。
# \D 匹配任何非数字字符；它相当于类 [^0-9]。
# \s 匹配任何空白字符；它相当于类 [ \t\n\r\f\v]。
# \S 匹配任何非空白字符；它相当于类 [^ \t\n\r\f\v]。
# \w 匹配任何字母数字字符；它相当于类 [a-zA-Z0-9_]。
# \W 匹配任何非字母数字字符；它相当于类 [^a-zA-Z0-9_]
# \b 匹配一个特殊字符边界，比如空格 ，&，＃等
print(re.findall("\d",'12+331+1231')) #['1', '2', '3', '3', '1', '1', '2', '3', '1']
print(re.findall("\d+",'12+331+1231')) #['12', '331', '1231']
print(re.findall("I\\b",'hello I LIST')) #['I']

# print(re.findall(r"I\b",'hello I LIST')) #跟python解释器有冲突
# 1 首先这个条语句拿给python去解释,读到\b时候,在python这层是有意义的,\b是\x08 可以这样测试下. 一:a = [] 二:a.append("a\b") 三:print(a) ['a\x08'] python就直接翻译成python这层的意思了
# 2 我们想吧\b传给re模块做匹配操作,这时候我们传的已经被python解释成\x08;所以我们需要在这个匹配规则里面加上r"I\b";r的意思就是原声字符串;这是\b就是\b的意思;python不会做处理了;\b在re那层就可以做转义处理了
# 3 re可以完完整整的拿到\b去做匹配

#或者使用\\

print(re.findall("I\\b",'hello I LIST'))
# 1 \\b 这中间有2个\\ 意思是把\变成普通字符("\b") 这时候传递到re模块就可以使用了


print(re.findall("c\\\\s",'hec\so')) #这种匹配到空;为什么2个\\不行了 ??
# 1 首先在re模块处理是没有问题的;但是python在做解释的时候;会把\\变成\;所以需要把\\改成\\\\;这时候python解释器就解释成了2个普通\\
# 2 到re这层后收到2个\\后,re在用自己语言解释成1个普通\; 这时候结果就是['c\\s']
# 3 ['c\\s'] 为什么里面是两个\\ 是因为匹配完成后,传到解释器这层需要转义 不然有变成python解释器的意思了

 

####search########
ret = re.search("(?P<name>\w+)","abcdf") # ?P是固定的内容,就这么定的;意思就是给这匹配的内容起了一个名字就name
print(ret.group()) #abcdf

ret = re.search("(?P<name>[a-z]+)\d+","2312asdsad31")
print(ret.group()) #asdsad31
print(ret.group("name")) #asdsad


ret = re.search("(?P<name>\w+)\d+","abcdf1232131")
print(ret.group()) #abcdf1232131


####match#### 从头开始匹配
print(re.match("\d+","aaasdas723123")) #匹配为空 因为他是从a开始匹配 匹配不到就返回None
print(re.match("\d+","123aaasdas723123")) #<_sre.SRE_Match object; span=(0, 3), match='123'>
print(re.match("\d+","123aaasdas723123")) #123


#####split######## 按匹配规则分割
print(re.split("[e|]","he kkk ccc|ddd")) #['h', ' kkk ccc', 'ddd'] 按e或者"|"进行分割


#####sub##### 替换
print(re.sub("\d+","A","aaaad12321dd990312")) #aaaadAddA 把数字变成A
print(re.sub("\d","A","aaaad12321dd990312",)) #aaaadAAAAAddAAAAAA
print(re.sub("\d","A","aaaad12321dd990312",3)) #aaaadAAA21dd990312 #3表示修改次数

#####subn##### 匹配次数
print(re.subn("\d","A","aaaad12321dd990312")) #('aaaadAAAAAddAAAAAA', 11)
print(re.subn("\d+","A","aaaad12321dd990312")) #('aaaadAddA', 2)


######compile#######是将一个匹配规则编辑到一个变量里面去
regex = re.compile("\d+")
regex.findall("1a2b3c4d5f") #['1', '2', '3', '4', '5']


#####finditer####匹配到结果放到迭代器里面
ret = re.finditer("\d","123213dada990")
print(next(ret).group()) # 1 2 ......


###注意的事情#######3
ret = re.findall('www.(baidu|oldboy).com', 'www.oldboy.com')
print(ret) # ['oldboy'] 这是因为findall会优先把匹配结果组里内容返回,如果想要匹配结果,取消权限即可

ret = re.findall('www.(?:baidu|oldboy).com', 'www.oldboy.com')
print(ret) # ['www.oldboy.com'] #?:即可
posted on 2018-05-11 14:18 Python哥阅读(238) 评论(0) 收藏举报