selenium爬12306

Posted on 2019-09-26 15:29  大白不白  阅读(273)  评论(0编辑  收藏  举报
from selenium import webdriver
from pyquery import PyQuery as pq
import time
fs = input("出发站:")
ts = input("达到站:")
date = input("时间:")
browser = webdriver.Chrome()
browser.get("https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc&fs=%s,HGH&ts=%s,TLU&date=%s&flag=N,N,Y"%(fs,ts,date))
# browser.get("https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc&fs=杭州东,HGH&ts=桐庐,TLU&date=2019-09-30&flag=N,N,Y")
time.sleep(2)
a = "window.scrollTo(0,800);"
b = "window.scrollTo(0,1600);"
c = "window.scrollTo(0,3200);"

browser.execute_script(a)
time.sleep(1)
browser.execute_script(b)
time.sleep(1)
browser.execute_script(c)
time.sleep(1)


def start():
    print("开始")
    label = browser.find_element_by_xpath('//*[@id="queryLeftTable"]')
    # print(label)

    aaa()


def aaa():
    html = browser.page_source
    row = browser.find_elements_by_tag_name('tr')
    list = []
    for i in row:
        j = i.find_elements_by_tag_name('td')
        for item in j:
            text = item.text
            if text != "":
                list.append(text)
    # print(list)
    # print(html)
    doc = pq(html)
    qq = doc('.t-list tbody tr').items()

    j = 1
    for a in range(len(list)):

        for i in qq:

            if i.find('.train').text() != "":
                # res = browser.find_element_by_xpath('//*[contains(@id,"SWZ_")]').text

                qq_data = {
                    "车次": i.find('.train').text().split("\n")[0],
                    "出发站到达站": i.find('.cdz').text(),
                    "出发时间到达时间": i.find('.cds').text(),
                    "历时": i.find('.ls').text(),
                    "商务座": list[j],
                    "一等座": list[j+1],
                    "二等座": list[j+2],
                    "高级软卧": list[j+3],
                    "软卧": list[j+4],
                    "动卧": list[j+5],
                    "硬卧": list[j+6],
                    "软座": list[j+7],
                    "硬座": list[j+8],
                    "无座": list[j+9],

                }

                print(qq_data)
                j += 13

def main():
    start()


if __name__ == '__main__':
    main()

 

Copyright © 2024 大白不白
Powered by .NET 8.0 on Kubernetes