先使用phantomJS(selenuim以后将不支持phantomJS,可使用无头chrome 和火狐插件)模拟登录用户,拿到登录后页面的cookie,再用cookie访问本网站,拿到想要的搜索结果

#获取网站cookie方法
def obCookie():
    url = "http://www.juming.com/"
    service_args = []
    service_args.append('--load-images=no')  ##关闭图片加载
    service_args.append('--ignore-ssl-errors=true')  ##忽略https错误
    driver = webdriver.PhantomJS(executable_path="phantomjs.exe")#windows下
    # driver = webdriver.PhantomJS("phantomjs")#linux下
    print("获取网站cookie。。")
    driver.get(url)
    time.sleep(5)
    driver.find_element_by_xpath('//*[@id="UserName"]').send_keys("账号")
    driver.find_element_by_xpath('//*[@id="Password"]').send_keys("密码")
    driver.find_element_by_xpath('//*[@id="loginBox"]/button').click()

    time.sleep(2)
    cookie_list = driver.get_cookies()
    driver.close()
    cookie_dict = {}
    for i in cookie_list:
        cookie_dict[i["name"]] = i["value"]
    return cookie_dict
# 获取网站cookie
cookie_dict = obCookie()
#从网页获取网站页数
def obtainPage():
    print("-----------------------")
    print("获取网站页数方法")
    url="http://www.juming.com/newcha/index.htm?cha=1"
    # url = "http://www.juming.com/newcha/index.htm?cha=1&page=1"
    res = requests.post(url,headers=headers,data=payload ,cookies= cookie_dict)
    res.encoding = 'utf-8'
    soup = BeautifulSoup(res.text ,'html.parser')
    H1 = soup.select('.reg')
    page = 0
    if H1:
        result = re.findall('.*">(.*)</span>.*',str(H1[0]))
        if result:
            if int(result[0]) % 50 == 0:
                page = int(result[0]) // 50
            else:
                page = int(result[0]) // 50 +1
    if page != 0:
        return page
#网站页数
page = obtainPage()

先模拟登陆获取cookie,再拿着cookie访问网站,获取想要的搜索结果

#获取当前时间后第二天
def obTime():

    today = datetime.datetime.today()
    tomorrow = today + datetime.timedelta(days=2)
    data = str(tomorrow).split(" ")
    return data[0]


data = obTime()
payload = {
    "ymhz":"com,cn",
    "sfba_1":"1",
    "sclx":"2",
    "scsj":data,
}

headers = {
    "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
}

 

posted @ 2018-05-02 15:19  自说自话唉  阅读(801)  评论(0编辑  收藏  举报