selenium练习

爬取豆瓣top250电影:https://movie.douban.com/top250

from selenium import webdriver
import time

bro = webdriver.Chrome(executable_path='./chromedriver.exe')
# 设置隐士等待
bro.implicitly_wait(10)

try:
    bro.get('https://movie.douban.com/top250')
    # 找到包含信息的li标签
    li_list = bro.find_elements_by_css_selector('.grid_view li')
    # print(li_list)
    # 遍历list,取出需要的信息
    for li in li_list:
        # 取出电影信息
        v_info = li.find_element_by_css_selector('.info .hd a').text
        # print(v_info)
        # 取出图片url
        photo_url = li.find_element_by_css_selector('.pic img').get_attribute('src')
        # print(photo_url)
        # 取出电影描述信息
        v_a_info = li.find_element_by_css_selector('.bd>p').text
        # print(v_a_info)
        # 取出电影的评价人数
        v_comment_count = li.find_element_by_xpath("//div[@class='star']//span[4]").text
        # print(v_comment_count)
        # 取出电影标签
        v_mark = li.find_element_by_css_selector('.inq').text
        print('''
        电影标题:%s
        电影图片url:%s
        电影描述信息:%s
        电影的评价人数:%s
        电影标签:%s
        ''' % (v_info, photo_url, v_a_info, v_comment_count, v_mark))
        time.sleep(2)
except Exception as e:
    print(e)

finally:
    bro.close()

 

posted @ 2020-08-05 00:40  耗油炒白菜  阅读(107)  评论(0编辑  收藏  举报