爬取网页的通用代码框架.py(亲测有效)
import requests def getHTMLText(url): try: kv = {'user-agent':'Mozilla/5.0'} r = requests.get(url,headers = kv) r.raise_for_status() #如果状态不是200,引发HTTPError异常 r.encoding = r.apparent_encoding return r.text[1000:2000] except: return "产生异常" if __name__ == "__main__": url = "https://www.amazon.cn/dp/B08M5BZS7G/ref=s9_acsd_hps_bw_c2_x_1_i?pf_rd_m=A1U5RCOVU0NYF2&pf_rd_s=merchandised-search-14&pf_rd_r=SVTY7ZZ7H7B96BGWWD39&pf_rd_t=101&pf_rd_p=06ed40c2-7f57-4088-aa29-c2103889f7f3&pf_rd_i=116169071" print(getHTMLText(url))