python_正则表达式随笔

webpage_regex = re.search(r'span_ed7[\s\S]*', dd)
[\s\S]*  匹配多行,转义字符

webpage_regex = re.compile('<a[^>]+href=["\'](.*?)["\']', re.IGNORECASE) 



# -*- encoding: utf-8 -*-
import urllib2
from bs4 import BeautifulSoup
import re
import requests

url = 'http://www.muu22.com/knstz_385866.html?ucmidtm=1538566880.7'
url = 'http://www.muu22.com/knstz_718071.html'
#url = "http://www.youjizz.com"
#url = "https://www.youjizz.com/videos/japanese-schoolgirl-with-her-young-brother-47424491.html"
response = urllib2.urlopen(url)
dd = response.read()
#print dd
title = re.search(r'<title>.*?</title>', dd)
title = title.group()
print title[7:-8]

webpage_regex = re.search(r'span_ed7([\s\S]*)span_ed8', dd)
print webpage_regex.group()
fasdfa = re.compile('<img[^>]+src=["\'](.*?)["\']', re.IGNORECASE)
fdasffa = fasdfa.findall(webpage_regex.group())
tines = 1
for i in fdasffa:
print 'get picture'
pic = requests.get(i)
print 'prepare path & name'
string = u'[Ligui丽柜] 网络丽人 Model 王欣竹[53P]_nnuu22/'+str(tines)+'.jpg'
print 'prepare write'
fp = open(string, 'wb')
fp.write(pic.content)
fp.close()
print i
tines+=1
print 'end'
posted @ 2019-04-27 23:00  第一v最寂寞  阅读(1543)  评论(0编辑  收藏  举报