模拟百度进行图片搜索,有问题可以留言

模拟百度进行图片搜索

import requests
import re
#分析页面我们先在百度图片搜索里面随笔输入一个'背景图片'获得链接如下
#https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=背景图片
#word为我们查找的内容


# data = input('请输入你要搜索的内容')
# rp = requests.get(f'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word={data}')
# rp.encoding = rp.apparent_encoding
# print(rp.text)
#试了一下很明显内容是错误的,主体很多内容没有


#这时候我们加headers
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36'
}
#User-Agent是对于我们来源的进行伪装

data = input('请输入你要搜索的内容')
rp = requests.get(f'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word={data}',headers=headers)
rp.encoding = rp.apparent_encoding
data = '"middleURL":"(.*?)",'
# print(rp.text)
#对于url查找我们一定不要看原网页上的,要看访问请求下来的text,他可能会相比于原来网页会少了点前缀之类的,但是肯定会有一部分内容一样,绝对能发现的.
url = re.findall(data,rp.text,re.S)
print(url)


#下一步对就是对爬取的url再进行访问可以将文件保存至本地,具体可以参考博客
https://www.cnblogs.com/pythonywy/p/10856508.html

posted @ 2019-08-06 21:35 小小咸鱼YwY 阅读(938) 评论(0) 收藏举报

刷新页面返回顶部

加载时间中.....

Python 前端 爬虫 数据库 Django Flask 微信小程序 Linux Go

模拟百度进行图片搜索,有问题可以留言

公告

加载时间中.....

Python 前端 爬虫 数据库 Django Flask 微信小程序 Linux Go

模拟百度进行图片搜索,有问题可以留言

公告

Python 前端爬虫数据库 Django Flask 微信小程序 Linux Go