python-web-下载所有xkcd漫画

下载所有xkcd漫画

 

# downloads every single xkcd comic

import requests,os,bs4
url='http://xkcd.com'  # start url
os.makedirs('xkcd',exist_ok=True) # store comics in ./xkcd
while not url.endswith('#'):
    # todo:download the page 
    print('downloading page %s...'%url)
    res = requests.get(url)
    res.raise_for_status()

    soup = bs4.BeautifulSoup(res.text)

    # todo find the url of the comic image
    comicElem = soup.select('#comic img')
    if comicElem == []:
        print('could not find comic image')
    else:
        comicUrl = 'http:'+comicElem[0].get('src')
        # todo: download the iamge
        print('downloading image %s .... '%(comicUrl))
        res = requests.get(comicUrl)
        res.raise_for_status()

        # todo: save the image to ./xkcd
        imageFile = open(os.path.join('xkcd',os.path.basename(comicUrl)),'wb')

        for chunk in res.iter_content(100000):
             imageFile.write(chunk)
        imageFile.close()


    # todo: get the prev button'url
    prevLink = soup.select('a[rel="prev"]')[0]
    url = 'http://xkcd.com'+prevLink.get('href')
posted @ 2018-05-06 10:23  8亩田  阅读(351)  评论(0编辑  收藏  举报