妹子图爬取

import requests
import pymysql
from lxml import etree
#数据库封装
class MysqlHelper(object):
def __init__(self):
self.db = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='123456', database='py11', charset='utf8')
self.cursor = self.db.cursor()

def execute_modify_sql(self,sql, data):
self.cursor.execute(sql, data)
self.db.commit()

def __del__(self):
self.cursor.close()
self.db.close()
#数据库操作
sql = 'insert into mzitu(src) values (%s)'
mc = MysqlHelper()

headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
#获取前三页图片url
for i in range(1,4):
url = 'http://www.mzitu.com/page/{}/'.format(i)

response = requests.get(url, headers=headers)
  #图片在当前文件夹下新建一个html文件写入获取到的HTML
with open('mzitu.html', 'wb') as f:
f.write(response.content)
  #定位图片位置获取图片
html_ele = etree.HTML(response.text)
# print(html_ele)
li_list = html_ele.xpath('//ul[@id="pins"]/li')
  
for li_ele in li_list:
# print(li_ele)
li_href = li_ele.xpath('./a/@href')[0]
# print(li_href)
response_a = requests.get(li_href, headers=headers)
#
html_ele_a = etree.HTML(response_a.text)

a_list = html_ele_a.xpath('//div[@class="pagenavi"]/a/span/text()')[-2]
# print(a_list)

for i2 in range(1,int(a_list)+1) :
url_a = 'http://www.mzitu.com/146586/{}/'.format(i2)
# print(url_a)
response_b = requests.get(url_a,headers=headers)

html_ele_b = etree.HTML(response_b.text)
# print(html_ele_b)
a_list_a = html_ele_b.xpath('//div[@class="main-image"]/p/a/img/@src')[0]
# print(a_list_a)
data = (a_list_a)

mc.execute_modify_sql(sql, data)

posted on 2018-08-19 21:41  luwanhe  阅读(161)  评论(0编辑  收藏  举报

导航