欢迎来到RankFan的Blogs

扩大
缩小

Python下载课件

from urllib.request import urlretrieve  # #下载网络文件到本地
import os
os.chdir("C:/Users/RankFan/Desktop/空间计量经济学")

for i in range(4, 6):
   url = f'http://www.mysmu.edu/faculty/zlyang/ECON6002_21-Web/Chap{i}-2021T2.pdf'
   print(url)
   name = url.split('/')[-1]
   print(name)
   urlretrieve(url, filename=name)

下载PDF

import re
import urllib.request


def open_url(url):
    Req = urllib.request.Request(url)
    Req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
                    AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36')
    page = urllib.request.urlopen(Req)
    html = page.read()

    return html

# 获取图片
def get_img(html):
    p = r'<A HREF="([^"]+\.pdf)"'
    # print(p)
    # print(html.decode('gb2312'))
    notice_list = re.findall(p, html.decode('gb2312'))

    for each in notice_list:
        item = 'https://www.math.pku.edu.cn/teachers/lidf/docs/textrick/' + each
        html_item = open_url(item)
        with open(each, 'wb') as f:
            f.write(html_item)

if __name__ == "__main__":
    url = 'https://www.math.pku.edu.cn/teachers/lidf/docs/textrick/index.htm'
    get_img(open_url(url))

posted on 2021-08-21 17:12  RankFan  阅读(56)  评论(0编辑  收藏  举报

导航