Python爬虫:Selenium常用操作,下载youtube视频实例
selenium常用操作:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get(url)
# 输入内容
driver.find_element_by_id("sf_url").send_keys(arg)
# 鼠标点击
driver.find_element_by_class("sf_submit").click()
# 获取延迟加载的元素
element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "[class='row title']"))
)
print(element)
print(element.get_attribute("title"))
print("text:", element.text)
常用API:
https://selenium-python-zh.readthedocs.io/en/latest/api.html#locate-elements-by
下面的例子是下载youtube视频(利用https://zh.savefrom.net/网站得到下载地址):
if __name__ == "__main__":
arg = "https://www.youtube.com/watch?v=***"
driver = get_selenium_driver()
url = "https://zh.savefrom.net/"
driver.get(url)
driver.find_element_by_id("sf_url").send_keys(arg)
driver.find_element_by_id("sf_submit").click()
element = WebDriverWait(driver, 20).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "[class='row title']"))
)
title = element.text.replace(" ", "_")
print(title)
# 获取元素的父元素的父元素
pele = element.find_element(By.XPATH, "./../..")
#pele.find_element_by_class_name("def-btn-name").click()
# 获取所有title包含'视频格式'的a元素 ?
tag_as = pele.find_elements_by_xpath("//a[contains(@title,'视频格式')]")
m = {}
for tag_a in tag_as:
dt = tag_a.get_attribute("data-type")
k = tag_a.get_attribute("title")
href = tag_a.get_attribute("href")
m[k] = (href, dt)
li = list(m.keys())
for idx in range(len(li)):
print("%s: %s" % (idx, li[idx]))
i = int(input("intpu:"))
(href, dt) = m[li[i]]
driver.quit()
print("""\nwget -O "%s.%s" "%s" """ % (title, dt, href))