selenium模拟点击爬数据
from time import sleep from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.keys import Keys import clipboard import pandas as pd # 忽略ssl 公司网络问题 options = webdriver.ChromeOptions() options.add_argument('-ignore-certificate-errors') options.add_argument('-ignore -ssl-errors') # 设置蓝牙 options.add_experimental_option('excludeSwitches', ['enable-logging']) path = Service(executable_path=r'E:\\chromedriver.exe') driver = webdriver.Chrome(service=path,options=options) # 打开页面 driver.get('http://doc.weixin.qq.com/sheet/') # 窗口大小 driver.set_window_size(930,1000) # 鼠标左键点击 sleep(5) ActionChains(driver).move_by_offset(460, 568).click().perform() # 鼠标重置 # ActionChains(driver).move_by_offset(-460, -568) # 点击选中全部表格 ActionChains(driver).move_by_offset(-460, -568).click().perform() sleep(10) # 鼠标右键点击 # ActionChains(driver).move_by_offset(200, 100).context_click().perform() # ctrl + C ActionChains(driver).move_by_offset(20, 154).click().perform() ActionChains(driver).move_by_offset(-20, -154) ActionChains(driver).move_by_offset(20, 154).key_down(Keys.CONTROL).send_keys('c').perform() # 获取剪切板数据集 # data = clipboard.paste() df = pd.read_clipboard() # 去除一部分行列都是NaN data = df.dropna(axis=0,how='all').dropna(axis=1,how='all') # print(data) data.to_excel(r'E:\\邮箱.xlsx','sheet')