爬虫-爬取美少女壁纸

1. 有时候想找些好看的壁纸,一个一个下载太慢了,作为一个菜鸡程序员,还是会点爬虫的,说到爬虫,当时还是python香了,说干就干

 

import requests
import  re,os
from lxml import etree
import threading


def getUrl(i):
    url_list=[]
    page_url="https://www.bizhizu.cn/search/动漫/"+str(i)+".html"
    page_res=requests.get(page_url).text
    html = etree.HTML(page_res)
    for i in range(1,19):

        html_data = html.xpath('//*[@class="imgcont"]/ul/li['+str(i)+']/a[1]/@href')
        name = html.xpath('//*[@class="imgcont"]/ul/li['+str(i)+']/a[2]/text()')

        if len(html_data) > 0 and len(name)>0:
            path_name = []
            path_name.append(html_data[0])
            # print(path_name)
            path_name.append(name[0])
            url_list.append(path_name)

    print(url_list)
    return url_list


def saveImg(url,path_name):
    # url = "https://www.bizhizu.cn/pic/62690.html"
    res = requests.get(url).text
    html = etree.HTML(res)

    if ("" in path_name) or ("" in path_name) or ("" in path_name) or ("少女" in path_name):#你懂我意思吧
        r_name = path_name.replace("/", "-")
        if  not os.path.exists("/Volumes/HD2/downloadpic/" + r_name):
            os.mkdir("/Volumes/HD2/downloadpic/" + r_name)
        else:
            pass
        for i in range(1, 10):
            html_data = html.xpath('//*[@id="thumb"]/li[' + str(i) + ']/a/img/@src')
            # print(html_data)

            if len(html_data) > 0:
                img_url = re.findall(r"(https://.*?\.jpg)\.220\.146.jpg", html_data[0])
                # print(img_url)
                if  len(img_url)>0:
                    img = requests.get(img_url[0]+".source.jpg")
                    img_url_name = re.findall(r"https://uploadfile.bizhizu.cn/up/.*/.*/.*/(.*?)\.jpg\.220\.146.jpg",
                                              html_data[0])
                    r_name = path_name.replace("/", "-")
                    f = open("/Volumes/HD2/downloadpic/" + r_name + "/" + img_url_name[0] + ".jpg", 'ab')  # 存储图片,多媒体文件需要参数b(二进制文件)
                    f.write(img.content)  # 多媒体存储content
                    f.close()
                    print("保存成功:", path_name + '/' + img_url_name[0] + ".jpg")

    else:
        pass
def demo1():
    for x in range(1,18):
        urlList = getUrl(x)
        for url in urlList:
            saveImg(url[0], url[1])

def demo2():
    for x in range(18,37):
        urlList = getUrl(x)
        for url in urlList:
            saveImg(url[0], url[1])

if __name__=='__main__':
  #开启两个线程 t1
= threading.Thread(target=demo1) t2 = threading.Thread(target=demo2) t1.start() t2.start()

 

posted @ 2020-05-28 19:52  黑曼巴后仰  阅读(294)  评论(0编辑  收藏  举报