Python爬虫:Selenium常用操作,下载youtube视频实例

selenium常用操作:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver.get(url)
# 输入内容
driver.find_element_by_id("sf_url").send_keys(arg)
# 鼠标点击
driver.find_element_by_class("sf_submit").click()
# 获取延迟加载的元素
element = WebDriverWait(driver, 20).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "[class='row title']"))
)
print(element)
print(element.get_attribute("title"))
print("text:", element.text)

常用API:
https://selenium-python-zh.readthedocs.io/en/latest/api.html#locate-elements-by


下面的例子是下载youtube视频(利用https://zh.savefrom.net/网站得到下载地址):

if __name__ == "__main__":
    arg = "https://www.youtube.com/watch?v=***"
    driver = get_selenium_driver()
    url = "https://zh.savefrom.net/"
    driver.get(url)
    driver.find_element_by_id("sf_url").send_keys(arg)
    driver.find_element_by_id("sf_submit").click()
    
    element = WebDriverWait(driver, 20).until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, "[class='row title']"))
    )
    title = element.text.replace(" ", "_")
    print(title)
    
    # 获取元素的父元素的父元素
    pele = element.find_element(By.XPATH, "./../..")
    
    #pele.find_element_by_class_name("def-btn-name").click()
    
    # 获取所有title包含'视频格式'的a元素 ?
    tag_as = pele.find_elements_by_xpath("//a[contains(@title,'视频格式')]")
    
    m = {}
    for tag_a in tag_as:
        dt = tag_a.get_attribute("data-type")
        k = tag_a.get_attribute("title")
        href = tag_a.get_attribute("href")
        m[k] = (href, dt)
    
    li = list(m.keys())
    for idx in range(len(li)):
        print("%s: %s" % (idx, li[idx]))
    
    i = int(input("intpu:"))
    (href, dt) = m[li[i]]
    
    driver.quit()
    
    print("""\nwget -O "%s.%s" "%s" """ % (title, dt, href))
posted @ 2018-12-28 09:05  xuejianbest  阅读(1520)  评论(0编辑  收藏  举报