简化窗口的控制和异常处理的复杂性
实例化一个新的浏览器,打开某个url
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time, random
from selenium.webdriver.common.keys import Keys
import logging
import os
start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
os_sep = os.sep
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
-1]
logf = this_file_name + '.log'
try:
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]',
datefmt='%a, %d %b %Y %H:%M:%S',
filename=logf,
filemode='a')
except Exception as e:
s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e)
with open(logf, 'a') as fo:
fo.write(s)
print(s)
os._exit(4002)
logging.info('START')
import string
def gen_a_broswer():
mobile_emulation = {
"deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0},
"userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"}
chrome_options = Options()
chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
browser = webdriver.Chrome(chrome_options=chrome_options)
return browser
while True:
url_seed = 'http://m.sogou.com/web/searchList.jsp?pid=sogou-mobb-123asd-0007&keyword=百度地图'
url_seed = 'https://www.bing.com/search?q=%E7%99%BE%E5%BA%A6%E4%B8%80%E4%B8%8B&qs=AS&pq=%E7%99%BE%E5%BA%A6&sk=AS1&sc=5-2&cvid=C784E8909000DA&FORM=CHRDEF&sp=2'
mobile_emulation = {
"deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0},
"userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"}
chrome_options = Options()
chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.get(url_seed)
time.sleep(random.randint(1, 10))
browser.find_element_by_link_text('百度一下,你就知道').click()
l = ['ABC招商', 'xyz', 'ABC合伙人', '', '密龄', 'ABC招商', 'ABC招商', '招商', 'xyz招商']
target = ['icarei', 'urcare', 'ABC招商', 'xyz招商', 'ABC合伙人招商', '招商', '密龄招商', 'ABC招商', 'ABC招商', '招商',
'xyz招商']
input_area = '//*[@id="index-kw"]'
mytxt = random.choice(target)
zh_l = ['高招', '幼儿园', '苹果', '香蕉', '井冈山', '新西兰', '坦克']
for i in range(random.randint(3, 7)):
l = [random.choice(string.ascii_letters) for ii in range(28)]
l += zh_l
ascii_letter = random.choice(l)
browser.find_element_by_xpath(input_area).send_keys(ascii_letter)
time.sleep(random.randint(1, 3))
browser.find_element_by_xpath(input_area).send_keys(Keys.SPACE)
browser.find_element_by_xpath(input_area).send_keys(Keys.CONTROL, 'a')
browser.find_element_by_xpath(input_area).send_keys(Keys.CONTROL, 'x')
browser.find_element_by_xpath(input_area).send_keys(Keys.CONTROL, 'v')
if int(time.time()) % (random.randint(3, 17)) == 0:
xp_np = '//*[@id="index-bn"]'
browser.find_element_by_xpath(xp_np).click()
time.sleep(random.randint(1, 10))
browser.quit()
goon = False
break
else:
goon = True
if goon:
js = 'document.getElementById("index-kw").value=""'
browser.execute_script(js)
time.sleep(random.randint(1, 10))
browser.find_element_by_xpath(input_area).send_keys(mytxt)
time.sleep(random.randint(1, 10))
xp_np = '//*[@id="index-bn"]'
browser.find_element_by_xpath(xp_np).click()
time.sleep(random.randint(1, 10))
for i in range(12):
time.sleep(random.randint(1, 10))
for isc in range(3):
time.sleep(random.randint(1, 10))
js = 'window.scrollTo(0,document.body.scrollHeight)'
browser.execute_script(js)
url_l = ['www.icarei.cn', 'www.urcare.cn']
all_ = browser.find_elements_by_tag_name('a')
bd_url_l = []
for ii in all_:
if ii.text in url_l:
try:
logging.info(ii)
print(ii)
# ii.click()
bd_href = ii.get_attribute('href')
bd_url_l.append(bd_href)
gb=gen_a_broswer()
gb.get(bd_href)
time.sleep(3)
gb.quit()
except Exception as e:
lgs = '{}{}'.format('ii.text-', e)
logging.info(lgs)
try:
browser.find_element_by_class_name('new-nextpage').click()
except Exception as e:
lgs = '{}{}'.format('new-nextpage-', e)
logging.info(lgs)
print(lgs)
try:
browser.find_element_by_class_name('new-nextpage-only').click()
except Exception as e:
lgs = '{}{}'.format('new-nextpage-only-', e)
logging.info(lgs)
print(e)
lgs = '{}{}'.format('page-', i)
logging.info(lgs)
try:
browser.quit()
except Exception as e:
lgs = '{}{}'.format('delete_all_cookies-', e)
logging.info(lgs)
time.sleep(random.randint(1, 10))
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time, random
from selenium.webdriver.common.keys import Keys
import logging
import os
start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
os_sep = os.sep
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
-1]
logf = this_file_name + '.log'
try:
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]',
datefmt='%a, %d %b %Y %H:%M:%S',
filename=logf,
filemode='a')
except Exception as e:
s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e)
with open(logf, 'a') as fo:
fo.write(s)
print(s)
os._exit(4002)
logging.info('START')
import string
def gen_a_broswer():
mobile_emulation = {
"deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0},
"userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"}
chrome_options = Options()
chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
browser = webdriver.Chrome(chrome_options=chrome_options)
return browser
while True:
url_seed = 'http://m.sogou.com/web/searchList.jsp?pid=sogou-mobb-123asd-0007&keyword=百度地图'
url_seed = 'https://www.bing.com/search?q=%E7%99%BE%E5%BA%A6%E4%B8%80%E4%B8%8B&qs=AS&pq=%E7%99%BE%E5%BA%A6&sk=AS1&sc=5-2&cvid=C784E8909000DA&FORM=CHRDEF&sp=2'
mobile_emulation = {
"deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0},
"userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"}
chrome_options = Options()
chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.get(url_seed)
time.sleep(random.randint(1, 10))
browser.find_element_by_link_text('百度一下,你就知道').click()
l = ['ABC招商', 'xyz', 'ABC合伙人', '', '密龄', 'ABC招商', 'ABC招商', '招商', 'xyz招商']
target = ['icarei', 'urcare', 'ABC招商', 'xyz招商', 'ABC合伙人招商', '招商', '密龄招商', 'ABC招商', 'ABC招商', '招商',
'xyz招商']
input_area = '//*[@id="index-kw"]'
mytxt = random.choice(target)
zh_l = ['高招', '幼儿园', '苹果', '香蕉', '井冈山', '新西兰', '坦克']
for i in range(random.randint(3, 7)):
l = [random.choice(string.ascii_letters) for ii in range(28)]
l += zh_l
ascii_letter = random.choice(l)
browser.find_element_by_xpath(input_area).send_keys(ascii_letter)
time.sleep(random.randint(1, 3))
browser.find_element_by_xpath(input_area).send_keys(Keys.SPACE)
browser.find_element_by_xpath(input_area).send_keys(Keys.CONTROL, 'a')
browser.find_element_by_xpath(input_area).send_keys(Keys.CONTROL, 'x')
browser.find_element_by_xpath(input_area).send_keys(Keys.CONTROL, 'v')
if int(time.time()) % (random.randint(3, 17)) == 0:
xp_np = '//*[@id="index-bn"]'
browser.find_element_by_xpath(xp_np).click()
time.sleep(random.randint(1, 10))
browser.quit()
goon = False
break
else:
goon = True
if goon:
js = 'document.getElementById("index-kw").value=""'
browser.execute_script(js)
time.sleep(random.randint(1, 10))
browser.find_element_by_xpath(input_area).send_keys(mytxt)
time.sleep(random.randint(1, 10))
xp_np = '//*[@id="index-bn"]'
browser.find_element_by_xpath(xp_np).click()
time.sleep(random.randint(1, 10))
for i in range(12):
time.sleep(random.randint(1, 10))
for isc in range(3):
time.sleep(random.randint(1, 10))
js = 'window.scrollTo(0,document.body.scrollHeight)'
browser.execute_script(js)
url_l = ['www.icarei.cn', 'www.urcare.cn']
all_ = browser.find_elements_by_tag_name('a')
bd_url_l = []
for ii in all_:
if ii.text in url_l:
try:
logging.info(ii)
print(ii)
# ii.click()
bd_href = ii.get_attribute('href')
bd_url_l.append(bd_href)
gb = gen_a_broswer()
gb.get(bd_href)
time.sleep(3)
gb.quit()
except Exception as e:
lgs = '{}{}'.format('ii.text-', e)
logging.info(lgs)
try:
browser.find_element_by_class_name('new-nextpage').click()
except Exception as e:
lgs = '{}{}'.format('new-nextpage-', e)
logging.info(lgs)
print(lgs)
try:
browser.find_element_by_class_name('new-nextpage-only').click()
except Exception as e:
lgs = '{}{}'.format('new-nextpage-only-', e)
logging.info(lgs)
print(e)
lgs = '{}{}'.format('page-', i)
logging.info(lgs)
try:
browser.quit()
except Exception as e:
lgs = '{}{}'.format('delete_all_cookies-', e)
logging.info(lgs)
time.sleep(random.randint(1, 10))