res_d_l =[{'contents':d.contents,'href':d.attrs['href']} for d in rd] 泛型

 

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.firefox.options import Options
import time
from time import sleep
import math
import random
import sys
import threading
from random import choice
# import urllib.parse
from bs4 import BeautifulSoup

ua_list = []
with open('mobile_ua.txt', 'r', encoding='utf-8') as uafile:
for i in uafile:
if i.find('Mozilla') > -1:
ua_list.append(i.replace('\n', '').strip())

ua_list_len_ = len(ua_list) - 1


def close_alert(browser, attitude='accept'):
# js='alert(window.alert=function(str){return;}'
# browser.execute_script(js)

# js= 'window.alert = function(str){return ;}'
# browser.execute_script(js)
return


# mobile_emulation = {
# "deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0},
# "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"}
ua_list_index = random.randint(0, ua_list_len_)
# mobile_emulation = {
# "deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0}}
#
# mobile_emulation['userAgent'] = choice(ua_list)
# chrome_options = Options()
# chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
# browser = webdriver.Chrome(chrome_options=chrome_options)
browser = webdriver.Chrome()
s_wd = '长尾词'
url_seed = 'https://m.baidu.com/s?word=s_wd'

url_seed = url_seed.replace('s_wd', s_wd)
print(url_seed)
browser.get(url_seed)

rd = BeautifulSoup(browser.page_source, 'html.parser').find_all('a', class_='rw-item')
res_d_l = [{'contents': d.contents, 'href': d.attrs['href']} for d in rd]
browser.quit()
d = 3















posted @ 2017-12-19 23:43  papering  阅读(302)  评论(0编辑  收藏  举报