使用python selenium webdriver模拟浏览器
selenium是进行web自动化测试的一个工具,支持C,C
+
+
,Python,Java等语言,他能够实现模拟手工操作浏览器,进行自动化,通过
webdriver驱动浏览器操作,我使用的是chrome浏览器,下载chrome webdriver 放到python的安装目录。
参考连接:
https://pypi.python.org/pypi/selenium
http://selenium-python.readthedocs.io/api.html
http://www.cnblogs.com/fnng/p/3160606.html
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | from selenium import webdriver import time import string import datetime def usage(): print ( "*********************************************************************" ) print ( "欢迎使用Amazone差评神器,Enover保留版权,作者:Anker 日期:2016-12-18" ) print ( "*********************************************************************" ) def genSearchDate(): now = datetime.datetime.now() print ( "当前的日期是:%s/%s/%s" % (now.day, now.month, now.year % 2000 )) #计算当前月的的日期范围 dayarr = [] if now.day < = 10 : dayarr = [ 10 , 1 ] elif now.day / 10 < = 2 : dayarr = [now.day, 10 , 1 ] else : dayarr = [now.day, 20 , 10 , 1 ] #判断是否闰年 day2 = 0 if (now.year % 4 = = 0 and now.year % 100 ! = 0 ) or now.year % 400 = = 0 : day2 = 29 else : day2 = 28 months = [[ 0 , 0 ],[ 31 , 20 , 10 , 1 ,],[day2, 20 , 10 , 1 ],[ 31 , 20 , 10 , 1 ],[ 30 , 20 , 10 , 1 ],[ 31 , 20 , 10 , 1 ],[ 30 , 20 , 10 , 1 ],[ 31 , 20 , 10 , 1 ],[ 31 , 20 , 10 , 1 ],[ 30 , 20 , 10 , 1 ],[ 31 , 20 , 10 , 1 ],[ 30 , 20 , 10 , 1 ],[ 31 , 20 , 10 , 1 ]] mon = now.month searchDate = [] while (mon > 0 ): if (mon = = now.month): tmp = dayarr else : tmp = months[mon] for d in range ( 0 , len (tmp) - 1 ): if d = = 0 : enddate = '%s/%s/%s' % (mon, tmp[d], now.year % 2000 ) else : enddate = '%s/%s/%s' % (mon, tmp[d] - 1 , now.year % 2000 ) begdate = '%s/%s/%s' % (mon, tmp[d + 1 ], now.year % 2000 ) val = [begdate,enddate] searchDate.append(val) mon = mon - 1 #print(searchDate) return searchDate #登陆亚马逊 def loginAmazone(driver): driver.get( "https://sellercentral.amazon.com" ) driver.find_element_by_id( 'ap_email' ).send_keys( 'xxxxx' ) driver.find_element_by_id( 'ap_password' ).send_keys( 'xxxxx' ) driver.find_element_by_name( 'signIn' ).submit() #设置查询条件 ASIN 和 时间 def searchProcess(driver, asin, begdate,enddate): driver.get( "https://sellercentral.amazon.com/gp/orders-v2/search/ref=ag_myosearch_apsearch_myo" ) driver.find_element_by_id( '_myoSO_searchTypeSelect' ).send_keys( 'ASIN' ) driver.find_element_by_id( '_myoSO_searchKeyword' ).send_keys(asin) driver.find_element_by_id( '_myoSO_SearchOption_exactDates' ).click() driver.find_element_by_id( 'exactDateBegin' ).clear() driver.find_element_by_id( 'exactDateBegin' ).send_keys(begdate) driver.find_element_by_id( 'exactDateEnd' ).clear() driver.find_element_by_id( 'exactDateEnd' ).send_keys(enddate) driver.find_element_by_id( '_myoSO_SearchButton' ).click() time.sleep( 2 ) #设置每页显示50个 def setpage50(driver): driver.find_element_by_xpath( '//option [@value="50"]' ).click() # click driver.find_element_by_xpath( '//form [@onsubmit="return MYO.LO.DoAjaxSearchCall( this );"]' ).submit() time.sleep( 2 ) driver.find_element_by_id( '_myoLO_saveDefaultSearchCheckBox' ).click() #计算记录个数 def countPage(source): pattern = '</strong> of <strong>' pos1 = source.find(pattern) beg = pos1 + len (pattern) pos2 = source.find( '</strong>' ,pos1 + len (pattern)) total = int (source[beg:pos2]) page = total % 50 if page = = 0 : page = total / 50 else : page = int (total / 50 ) + 1 print ( "订单总数为:%s,共计%s页" % (total, page)) return page #翻页 jump to page def jumppage(driver, page, custid): rc = False for index in range ( 1 ,page): print ( "正在查找第%s页" % index) elements = driver.find_elements_by_xpath( '//input [@maxlength="7"]' ) elements[ 1 ].find_element_by_xpath( '//input [@name="currentPage"]' ).send_keys( str (index)) driver.find_element_by_id( '_myoSO_GoToPageForm_1' ).submit() time.sleep( 4 ) source = driver.page_source pos = source.find(custid) if pos ! = - 1 : print ( '终于找到了,查找记录如下:' ) print (source[pos - 270 :pos + 24 ]) rc = True break return rc def searchBadReview(driver, asin, custid, searchDate): for i in range ( 0 , len (searchDate)): tmpDate = searchDate[i] begdate = tmpDate[ 0 ] enddate = tmpDate[ 1 ] print ( '==============================================' ) print ( "开始找%s到%s的订单" % (begdate, enddate)) searchProcess(driver, asin, begdate, enddate) setpage50(driver) source = driver.page_source page = countPage(source) rc = jumppage(driver, page, custid) if rc = = True : break #主函数 def main(): usage() #输入参数 asin = input ( "请输入ASIN:" ) print ( "你输入的ASIN是: " , asin) custid = input ( "请输入Customer profile id:" ) print ( "你输入的内容是: " , custid) searchDate = genSearchDate() #print("查找时间范围如下:") #print(searchDate) #默认浏览器行为 print ( '==============================================' ) print ( "开始打开浏览器,并登陆Amazone seller center" ) driver = webdriver.Chrome() loginAmazone(driver) time.sleep( 1 ) searchBadReview(driver, asin, custid, searchDate) driver.quit() time.sleep( 60 ) if __name__ = = "__main__" : main() |
冷静思考,勇敢面对,把握未来!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异
· 三行代码完成国际化适配,妙~啊~
2012-12-18 《APUE》读书笔记—第十一章线程
2012-12-18 《APUE》读书笔记—第十章信号(下)