史高治

新随笔 联系 管理
Egの天猫的任一商品详情页为入口,看到的商品促销价或淘宝价:
 
from requestium import Session,Keys
#from selenium.webdriver.common.action_chains import ActionChains
#滑块总显示出错了刷新:改用免安装版Chrome或火狐
options={'arguments':['disable-infobars',],
    'experimental_options':{'prefs':{'profile.managed_default_content_settings.images':2}},
    'binary_location':r'D:\Program Files\Browser\ChromeQuic\\chrome.exe'}
 
def loginByBrowser():
    driver.get('https://login.tmall.com/')
    loginFrame=css('#J_loginIframe')
    loginFrame.send_keys(Keys.TAB,Keys.TAB,Keys.TAB,Keys.ENTER,
        username,Keys.TAB,pwd,Keys.ENTER)
    try:
        css('#J_Logout')
    except:
        input('浏览器端成功处理滑块验证码后,在本句句尾任敲一字母:')
        # source=css('#nc_1_n1z')
        # action=ActionChains(driver)
        # action.click_and_hold(source).perform()
        # for x in range(10):
        #     action.move_by_offset(5,0).perform()
        # action.release()
        loginFrame.send_keys(Keys.SHIFT+Keys.TAB)
        loginFrame.send_keys(pwd,Keys.ENTER)
 
def getGoodsDetail(id='563211819596'):
    driver.get(f'https://detail.tmall.com/item.htm?id={id}')
    促销价=css('#J_PromoPrice .tm-price').text
    月销量=css('.tm-ind-sellCount .tm-count').text
    累计评价=css('#J_ItemRates .tm-count').text
    库存=css('#J_EmStock').text
    print(促销价,月销量,累计评价,库存,sep=';')
 
if __name__ == '__main__':
    username='用户名'
    pwd='密码'
    driver=Session('C:/Program Files/Python36/chromedriver','chrome',5,options).driver
    css=driver.ensure_element_by_css_selector
 
    loginByBrowser()
 
    goodsIDs=True
    while goodsIDs:
        goodsIDs=input('换掉下行待查询的各商品id的列表后回车:')
        goodsIDs=['563211819596',]
        try:
            for id in goodsIDs:
                getGoodsDetail(id)
        except:
            break
    driver.quit()
****************************************分割线****************************************
Egの粤偗偗考各考区的职位报名统计:
 
import os,requests,json,time,pandas as pd
from urllib.parse import parse_qs
 
def spider(region):
    regionID,num=regions[region].split(';')
    data=f'bfa001=201801&bab301={regionID}&page=1&rows={num}&sort=aab119&order=asc'
    data={k: v[0] for k, v in parse_qs(data).items()}
    url='http://ggfw.gdhrss.gov.cn/gwyks/exam/details/spQuery.do'
    headers={'User-Agent':'Mozilla/5.0 Baiduspider+ Chrome/64',
        'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
        'Cookie':'JSESSIONID=vE9CvsoJTgJx4mx6w2FPaBKzJbovacHcWPWvQU8TvXldq4kc5ted!-447929663'}
    response=requests.post(url,headers=headers,data=data)
    response.encoding=response.apparent_encoding
    return response.json()['rows']
 
def parse(rows):
    jobs=[]
    for job in rows:
        jobs.append((job['aab004'],job['bfe3a4'],job['bfe301'],job['aab119']))
    tjsj=time.strftime('%Y-%m-%d %H:%M',time.localtime(int(rows[0]['aae036']/1000)))
    df=pd.DataFrame(jobs,columns=['招考单位','招考职位','职位代码','缴费人数'])
    df['统计时间']=tjsj
    df.to_csv(f'D:/公招/报名统计.csv',index=False,mode='a+',encoding='utf-8-sig')
 
def main(region):
    rows=spider(region)
    parse(rows)
 
if __name__ == '__main__':
    regions={'广州':'01;294','深圳':'02;133','珠海':'03;120','汕头':'04;519','佛山':'05;246',
        '韶关':'06;488','河源':'07;493','梅州':'08;500','惠州':'09;355','汕尾':'10;309',
        '东莞':'11;177','中山':'12;178','江门':'13;659','阳江':'14;219','湛江':'15;590',
        '茂名':'16;281','肇庆':'17;585','清远':'18;366','潮州':'19;274','揭阳':'20;354',
        '云浮':'21;302','省直':'99;1033'}
    for region in regions:
        main(region)
        time.sleep(2)
    #regionID,num=regions['佛山'].split(';')
    #main()
posted on 2018-03-15 16:41  史高治  阅读(273)  评论(0编辑  收藏  举报