爬取网页的通用代码框架.py(亲测有效)

import requests

def getHTMLText(url):
    try:
        kv = {'user-agent':'Mozilla/5.0'}
        r = requests.get(url,headers = kv)
        r.raise_for_status() #如果状态不是200,引发HTTPError异常
        r.encoding = r.apparent_encoding
        return r.text[1000:2000]
    except:
        return "产生异常"

if __name__ == "__main__":
    url = "https://www.amazon.cn/dp/B08M5BZS7G/ref=s9_acsd_hps_bw_c2_x_1_i?pf_rd_m=A1U5RCOVU0NYF2&pf_rd_s=merchandised-search-14&pf_rd_r=SVTY7ZZ7H7B96BGWWD39&pf_rd_t=101&pf_rd_p=06ed40c2-7f57-4088-aa29-c2103889f7f3&pf_rd_i=116169071"
    print(getHTMLText(url))

 

posted on 2020-12-29 22:02  打篮球的Curry  阅读(242)  评论(0编辑  收藏  举报