爬虫常用正则表达式
1.指定开头,指定结尾
str1 = "background-image: url(https://image2.pearvideo.com/cont/20200428/cont-1671582-12370181.png);" # \b开头字符.*?结尾字符\b res = re.search(r"\bhttp.*?png\b", str1)
# 或者
res = re.search(r"http(.*?)png",str1) print(res.group())
2.匹配所有整型数字
inventory_count = "库存5000"
res = re.search("\d+", inventory_count)
3.匹配所有浮点数
price = 565656.23223
price = re.search('[-+]?[0-9]*\.?[0-9]+', price)
4.匹配无视空格和换行
str1 = "instrt into inswate"
text_data = "hjkshcsdk dsehfhk instrt into
inswate " str1_list = str1.split(" ") str2 = "" for i in str1_list: str2 = str2 + r"%s(\s*)(?i)" % i str3 = "(?s)(?i)" + str2 response = re.search(str3.rstrip("(\s*)(?i)"), text_data)
5.匹配或者
(png|jpg|jpeg) 就是或的关系
images = re.search(r"\bhttp.*?(png|jpg|jpeg)\b", image)
6.匹配所有中文
text1_list = re.findall('[^\x00-\xff]',text1)
7.匹配小数或者整数
count_list = re.findall('(\d+\.\d+|\d+)',desc1)