request高级之防盗链

import requests,os
from lxml import etree

dirName = '三里屯'
if not os.path.exists(dirName):
    os.mkdir(dirName)

headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
}

url = 'http://blog.sina.com.cn/s/blog_01ebcb8a0102zi2o.html?tj=1'

page_text = requests.get(url=url,headers=headers).text
tree = etree.HTML(page_text)

#获取图片地址
img_list = tree.xpath('//*[@id="sina_keyword_ad_area2"]/div/a/img/@real_src')
for img in img_list:
    img_name = img.split('/')[-1]+'.jpg'
    img_data = requests.get(url =img,headers = headers).content

    img_path = dirName + '/'  + img_name
    with open(img_path,'wb') as fp:
        fp.write(img_data)
  • 3.此时,我们发现直接请求real_src请求到的图片不显示
    • 加上Refere请求头即可
    • 如何查找到Refere头信息勒
      • 抓包工具中,Network--->找到img--->找到界面上任意一张图片的数据包,request headers中找到Refere信息
      • 将Refere信息添加到headers中即可
import requests,os
from lxml import etree

dirName = '三里屯'
if not os.path.exists(dirName):
    os.mkdir(dirName)

headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
    #添加Refere头信息
    'Referer': 'http://blog.sina.com.cn/'
}

url = 'http://blog.sina.com.cn/s/blog_01ebcb8a0102zi2o.html?tj=1'

page_text = requests.get(url=url,headers=headers).text
tree = etree.HTML(page_text)

#获取图片地址
img_list = tree.xpath('//*[@id="sina_keyword_ad_area2"]/div/a/img/@real_src')
for img in img_list:
    img_name = img.split('/')[-1]+'.jpg'
    img_data = requests.get(url =img,headers = headers).content

    img_path = dirName + '/'  + img_name
    with open(img_path,'wb') as fp:
        fp.write(img_data)
  • 4.再次打开下载的图片,正常显示
posted @ 2022-03-23 00:00  Tony_xiao  阅读(83)  评论(0编辑  收藏  举报