Egの天猫的任一商品详情页为入口,看到的商品促销价或淘宝价:
from requestium import Session,Keys
#from selenium.webdriver.common.action_chains import ActionChains
#滑块总显示出错了刷新:改用免安装版Chrome或火狐
options={'arguments':['disable-infobars',],
'experimental_options':{'prefs':{'profile.managed_default_content_settings.images':2}},
'binary_location':r'D:\Program Files\Browser\ChromeQuic\\chrome.exe'}
def loginByBrowser():
driver.get('https://login.tmall.com/')
loginFrame=css('#J_loginIframe')
loginFrame.send_keys(Keys.TAB,Keys.TAB,Keys.TAB,Keys.ENTER,
username,Keys.TAB,pwd,Keys.ENTER)
try:
css('#J_Logout')
except:
input('浏览器端成功处理滑块验证码后,在本句句尾任敲一字母:')
# source=css('#nc_1_n1z')
# action=ActionChains(driver)
# action.click_and_hold(source).perform()
# for x in range(10):
# action.move_by_offset(5,0).perform()
# action.release()
loginFrame.send_keys(Keys.SHIFT+Keys.TAB)
loginFrame.send_keys(pwd,Keys.ENTER)
def getGoodsDetail(id='563211819596'):
driver.get(f'https://detail.tmall.com/item.htm?id={id}')
促销价=css('#J_PromoPrice .tm-price').text
月销量=css('.tm-ind-sellCount .tm-count').text
累计评价=css('#J_ItemRates .tm-count').text
库存=css('#J_EmStock').text
print(促销价,月销量,累计评价,库存,sep=';')
if __name__ == '__main__':
username='用户名'
pwd='密码'
driver=Session('C:/Program Files/Python36/chromedriver','chrome',5,options).driver
css=driver.ensure_element_by_css_selector
loginByBrowser()
goodsIDs=True
while goodsIDs:
goodsIDs=input('换掉下行待查询的各商品id的列表后回车:')
goodsIDs=['563211819596',]
try:
for id in goodsIDs:
getGoodsDetail(id)
except:
break
driver.quit()
****************************************分割线****************************************
Egの粤偗偗考各考区的职位报名统计:
import os,requests,json,time,pandas as pd
from urllib.parse import parse_qs
def spider(region):
regionID,num=regions[region].split(';')
data=f'bfa001=201801&bab301={regionID}&page=1&rows={num}&sort=aab119&order=asc'
data={k: v[0] for k, v in parse_qs(data).items()}
url='http://ggfw.gdhrss.gov.cn/gwyks/exam/details/spQuery.do'
headers={'User-Agent':'Mozilla/5.0 Baiduspider+ Chrome/64',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie':'JSESSIONID=vE9CvsoJTgJx4mx6w2FPaBKzJbovacHcWPWvQU8TvXldq4kc5ted!-447929663'}
response=requests.post(url,headers=headers,data=data)
response.encoding=response.apparent_encoding
return response.json()['rows']
def parse(rows):
jobs=[]
for job in rows:
jobs.append((job['aab004'],job['bfe3a4'],job['bfe301'],job['aab119']))
tjsj=time.strftime('%Y-%m-%d %H:%M',time.localtime(int(rows[0]['aae036']/1000)))
df=pd.DataFrame(jobs,columns=['招考单位','招考职位','职位代码','缴费人数'])
df['统计时间']=tjsj
df.to_csv(f'D:/公招/报名统计.csv',index=False,mode='a+',encoding='utf-8-sig')
def main(region):
rows=spider(region)
parse(rows)
if __name__ == '__main__':
regions={'广州':'01;294','深圳':'02;133','珠海':'03;120','汕头':'04;519','佛山':'05;246',
'韶关':'06;488','河源':'07;493','梅州':'08;500','惠州':'09;355','汕尾':'10;309',
'东莞':'11;177','中山':'12;178','江门':'13;659','阳江':'14;219','湛江':'15;590',
'茂名':'16;281','肇庆':'17;585','清远':'18;366','潮州':'19;274','揭阳':'20;354',
'云浮':'21;302','省直':'99;1033'}
for region in regions:
main(region)
time.sleep(2)
#regionID,num=regions['佛山'].split(';')
#main()