Pyuthon正则表达式re模块练习

'''
import re

# 正则表达式:针对字符串做模糊匹配
s = "hello yuan"
print(s.find("yuan")) # 6

s2 = "1,2,34,100,yuan,rain,alvin,45"
ret = re.findall("[a-z]+", s2)
print(ret) # ['yuan', 'rain', 'alvin']

#################### 元字符 ####################

# (1) 通配符 .: 默认模式下匹配除了换行符以外的所有符号

ret = re.findall("y..n", "hello yuan,hello rain,hello yabn")
print(ret) # ['yuan', 'yabn']
ret = re.findall("y..n", "hello yu\nn,hello rain,hello yabn", re.S) #修改模式通配符 .: 该模式下匹配所有符号
print(ret) # ['yu\nn', 'yabn']

# (2) 重复: * [0,无穷] +[1,无穷] ?[0,1] {} 指定范围
# 重点:默认贪婪匹配
# 如何取消贪婪匹配

ret = re.findall("\d", "1,2,3,55,yuan,33")
print(ret) # ['1', '2', '3', '5', '5', '3', '3']

ret = re.findall("hi \d{2}", "hi 1,hi 66,hi 188")
print(ret) # ['hi 66', 'hi 18']

ret = re.findall("hi \d*", "hi 1,hi 66,hi 188")
print(ret) # ['hi 1', 'hi 66', 'hi 188']

ret = re.findall("\d+", "66,188,2,12222,hello")
print(ret) # ['66', '188', '2', '12222']

ret = re.findall("\d+?", "66,188,2,12222,hello") # 取消贪婪匹配?
print(ret) # ['6', '6', '1', '8', '8', '2', '1', '2', '2', '2', '2']

ret = re.findall("hi \d?", "hi 1,hi 66,hi 188,hi ") #
print(ret) # ['hi 1', 'hi 6', 'hi 1', 'hi ']

ret = re.findall("hi \d{0,1}", "hi 1,hi 66,hi 188,hi ") #
print(ret) # ['hi 1', 'hi 6', 'hi 1', 'hi ']

ret = re.findall("hi \d{1,}", "hi 1,hi 66,hi 188,hi ") #
print(ret) # ['hi 1', 'hi 66', 'hi 188']

ret = re.findall("hi \d{0,}", "hi 1,hi 66,hi 188,hi ") #
print(ret) # ['hi 1', 'hi 66', 'hi 188', 'hi ']

ret = re.findall("hi \d{1,3}?", "hi 1,hi 66,hi 188,hi ") #
print(ret) # ['hi 1', 'hi 6', 'hi 1']


# (3) ^ $

ret = re.findall("^good/.{4}/.{4}", "hello/good/food/meat")
print(ret) # []

ret = re.findall("^good/.{4}/meat$", "good/aaaa/meat")
print(ret) # ['good/aaaa/meat']


# (4) [] 字符集匹配[]中任意一个符号, 字符集两个特殊符号 - 范围 ^: 取反
ret = re.findall("yu[ac]n", "yuan yubn yucn yuacn")
print(ret) # ['yuan', 'yucn']

ret = re.findall("yu[a,c]n", "yuan yubn yucn yu,n")
print(ret) # ['yuan', 'yucn', 'yu,n']

ret = re.findall("yu[0123456789]n", "yuan yu8n yucn yu2n")
print(ret) # print(ret) #

ret = re.findall("yu[0-9]n", "yuan yu8n yucn yu2n")
print(ret) # ['yu8n', 'yu2n']

ret = re.findall("[a-z0-9A-Z]+", "yuan,22,alvin,45,rain")
print(ret) # ['yuan', '22', 'alvin', '45', 'rain']

ret = re.findall("[^0-9]+", "yuan,22,alvin,45,rain") #取反
print(ret) # ['yuan,', ',alvin,', ',rain']

# (5) 分组() |
# 取消优先提取 ?:
ret = re.findall("https?://www\.[a-zA-Z0-9]+\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret)

# (6) 转义符 \
# 赋予一些普通符号以特殊功能 [0-9] \d \w [0-9a-zA-Z]
# 取消一些特殊符号的特殊功能

ret = re.findall("\d+","123a45bcd678")
print(ret)

# (7) () 分组

#1.re.findall()
ret1 = re.findall("https?://www\.\w+\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret1) # ['http://www.baidu.com', 'https://www.jd.com', 'http://www.python.cn']
ret2 = re.findall("(https?)://www\.\w+\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret2) # ['http', 'https', 'http']

ret3 = re.findall("https?://www\.(\w+)\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret3) # ['baidu', 'jd', 'python']

#2.re.search() 查询匹配的第一个结果,返回对象
ret4 = re.search("https?://www\.\w+\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret4) # <re.Match object; span=(0, 20), match='http://www.baidu.com'>
print(ret4.group()) # http://www.baidu.com

ret5= re.search("https?://www\.(?P<mingzi>\w+)\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret5) # <re.Match object; span=(0, 20), match='http://www.baidu.com'>
print(ret5.group("mingzi")) # baidu

ret6= re.search("(?P<yuming>https?)://www\.(?P<mingzi>\w+)\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret6.group("yuming")) # http

ret7= re.search("(?P<yuming>https?)://www\.(?P<mingzi>\w+)\.(?P<houzhui>com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret7.group("houzhui")) # com

#3.re.match() #开头进行匹配
ret8= re.match("https?://www\.\w+\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret8.group()) # http://www.baidu.com

ret9= re.match("https?://www\.\w+\.(?:com|cn)",
"huchangxi,http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret9.group()) # None

'''


posted @ 2022-04-06 23:45  呼长喜  阅读(69)  评论(0编辑  收藏  举报