关于Python selenium实现类似比价软件的功能
偶然间想实现比价的功能,正常requests途径比较难实现,于是乎想到可以selenium可以简易实现,下面是代码。
import requests from selenium import webdriver import json import time import threading import matplotlib.pyplot as plt import numpy as np s = str(raw_input('请输入要比价内容:')) print s list = []
#京东价格,主要是selenium模拟,定位搜索框然后输入比价内容。其中还有error的处理。 def jd(s): driver = webdriver.Chrome() driver.get('https://www.jd.com/') driver.find_element_by_id('key').send_keys(s.decode('gbk')) driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button').click() time.sleep(2) try: print "京东价格:",driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[3]/strong/i').text print "京东价格:",driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[7]/span/a').text list.append(driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[3]/strong/i').text) except: print "京东价格:",driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[2]/strong/i').text print "京东价格:",driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[5]/span/a').text list.append(driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[2]/strong/i').text) finally: driver.quit()
#淘宝价格,跟京东类似 def taobao(s): driver = webdriver.Chrome() driver.get('https://www.taobao.com/') driver.find_element_by_id('q').send_keys(s.decode('gbk')) driver.find_element_by_xpath('//*[@id="J_TSearchForm"]/div[1]/button').click() time.sleep(2) try: print "淘宝价格:",driver.find_element_by_xpath('//*[@id="mainsrp-itemlist"]/div/div/div[1]/div[1]/div[2]/div[1]/div[1]/strong').text list.append(driver.find_element_by_xpath('//*[@id="mainsrp-itemlist"]/div/div/div[1]/div[1]/div[2]/div[1]/div[1]/strong').text) except: print "淘宝价格:",driver.find_element_by_xpath('//*[@id="J_itemlistListItem0"]/div[2]/p/a').text print "淘宝价格:",driver.find_element_by_xpath('//*[@id="J_itemlistListItem0"]/div[3]/div[1]/span/strong').text list.append(driver.find_element_by_xpath('//*[@id="J_itemlistListItem0"]/div[3]/div[1]/span/strong').text) finally: driver.quit() #亚马逊价格 def amz(s): driver = webdriver.Chrome() driver.get('https://www.amazon.cn/') driver.find_element_by_id('twotabsearchtextbox').send_keys(s.decode('gbk')) driver.find_element_by_xpath('//*[@id="nav-search"]/form/div[2]/div/input').click() time.sleep(2) print "亚马逊价格:",driver.find_element_by_xpath('//*[@id="result_0"]/div/div[4]/div[1]/a/span[2]').text list.append(driver.find_element_by_xpath('//*[@id="result_0"]/div/div[4]/div[1]/a/span[2]').text.replace(',','')[1:]) driver.quit()
#用到多线程处理。 threads = [] t1 = threading.Thread(target=jd,args=(s,)) threads.append(t1) t2 = threading.Thread(target=taobao,args=(s,)) threads.append(t2) t3 = threading.Thread(target=amz,args=(s,)) threads.append(t3) for i in range(len(threads)): threads[i].start() for i in range(len(threads)): threads[i].join() print list
#图像展示 list = [int(i.split('.')[0]) for i in list] index=np.arange(3) plt.bar(left=index,height=list,width=0.5) plt.xticks((0,1,2),('Jd','Taobao','Amazon')) plt.show()
效果图:
简单的功能实现了,不足之处(有待改善):
1、等待时间太长,体验不佳,即使多线程也要18s左右才能返回结果,太慢。
2、开3个webdriver,占用资源有点多,也导致比较慢。
3、错误处理未完善。