#!/usr/bin/python
# coding: UTF-8
import requests
import re
import os
def GetLink(link):
headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
r = requests.get(link, headers=headers, timeout=20)
r.encoding = r.apparent_encoding
html_doc = r.text
pattern = re.compile(r'\<a\s+href\=\"(\/pn\.x\S+)\"\s+title\=\"(\S+)\"\s+target')
result1 = pattern.findall(html_doc)
return result1
def GetPicture(link):
headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
r = requests.get(link, headers=headers, timeout=20)
r.encoding = r.apparent_encoding
html_doc = r.text
"""
src="https://pic.ssz91.com/d7/3643/364391-1.jpg"
"""
pattern = re.compile(r'(https\:\S+\.jpg)')
result2 = pattern.findall(html_doc)
return result2
for i in range(1, 189):
try:
link0 = "http://ccw27.com"
link2 = "http://ccw27.com/piclist.x?classid=6&page="
link1 = link2 + str(i)
rs1 = GetLink(link1)
for each in rs1:
new_link = link0 + each[0]
new_title = each[1]
print(new_link, new_title)
rs3 = __file__
rs4 = rs3.split("/")[:-1]
temp_path = '/'.join(rs4)
temp_path = temp_path + '/' + new_title
if os.path.exists(temp_path):
continue
else:
os.mkdir(new_title, mode=0o777)
rs2 = GetPicture(new_link)
j = 0
for each2 in rs2:
j += 1
picture = requests.get(each2, timeout=20)
pictureName = "picture" + str(j) + ".jpg"
pictureName = temp_path + '/' + pictureName
with open(pictureName, 'wb') as f:
f.write(picture.content)
print("爬完了第%d个(第%d页)" % (j, i))
except:
print("执行出错")
continue