python_正则表达式随笔

webpage_regex = re.search(r'span_ed7[\s\S]*', dd)

[\s\S]*  匹配多行，转义字符

webpage_regex = re.compile('<a[^>]+href=["\'](.*?)["\']', re.IGNORECASE)

# -*- encoding: utf-8 -*-
import urllib2
from bs4 import BeautifulSoup
import re
import requests

url = 'http://www.muu22.com/knstz_385866.html?ucmidtm=1538566880.7'
url = 'http://www.muu22.com/knstz_718071.html'
#url = "http://www.youjizz.com"
#url = "https://www.youjizz.com/videos/japanese-schoolgirl-with-her-young-brother-47424491.html"
response = urllib2.urlopen(url)
dd = response.read()
#print dd
title = re.search(r'<title>.*?</title>', dd)
title = title.group()
print title[7:-8]

webpage_regex = re.search(r'span_ed7([\s\S]*)span_ed8', dd)
print webpage_regex.group()
fasdfa = re.compile('<img[^>]+src=["\'](.*?)["\']', re.IGNORECASE)
fdasffa =  fasdfa.findall(webpage_regex.group())
tines = 1
for i in fdasffa:
    print 'get picture'
    pic = requests.get(i)
    print 'prepare path & name'
    string = u'[Ligui丽柜] 网络丽人 Model 王欣竹[53P]_nnuu22/'+str(tines)+'.jpg'
    print 'prepare write'
    fp = open(string, 'wb')
    fp.write(pic.content)
    fp.close()
    print i
    tines+=1
print 'end'

posted @ 2019-04-27 23:00 第一v最寂寞阅读(1554) 评论(0) 编辑收藏举报

刷新页面返回顶部

第一v最寂寞

python_正则表达式随笔

公告