欢迎来到RankFan的Blogs

扩大
缩小

Python爬虫--淘宝“泸州老窖”

爬虫淘宝--“泸州老窖”

爬去淘宝“泸州老窖” 相关信息:

import requests
import re
import json
import pandas as pd


## cookie
usercookie = 'miid.......'

def cookie_get(usercookie):
    """
    # 把字符串转换为字典
    :param usercookie:
    :return:
    """
    cookies = {}
    for a in usercookie.split(";"):
        name, value = a.strip().split("=", 1)
        cookies[name] = value
    return cookies


def open_url(keyword, page):
    keyload = {'q': keyword ,'s':str((page-1) * 44), 'sort':'sale-desc'}
    url = "https://s.taobao.com/search"
    cookies = cookie_get(usercookie)

    res = requests.get(url, params=keyload, cookies = cookies)
    return res

def get_page_items():
    with open("items.txt", "r", encoding="utf-8") as file1:
        g_page_config = re.search(r"g_page_config = (.*?);\n", file1.read())
        page_config_json = json.loads(g_page_config.group(1))
        page_items = page_config_json['mods']['itemlist']['data']['auctions']
        return page_items

def get_reslut(page_items, results):
    # results = []
    # page_items = get_page_items()
    for each_items in page_items:
        dict1 = dict.fromkeys(('nid', 'title', 'detail_url', 'view_price', 'view_sales', 'nick'))
        dict1['nid'] = each_items['nid']
        dict1['title'] = each_items['title']
        dict1['detail_url'] = each_items['detail_url']
        dict1['view_price'] = each_items['view_price']
        dict1['view_sales'] = each_items['view_sales']
        dict1['nick'] = each_items['nick']
        results.append(dict1)
    return results

def results_to_excel(results):
    pf = pd.DataFrame(list(results))
    order = ['nid', 'title', 'detail_url', 'view_price', 'view_sales', 'nick']
    pf = pf[order]
    file_name = pd.ExcelWriter('泸州老窖.xlsx')
    pf.fillna(' ', inplace=True) # 替换空格
    pf.to_excel(file_name, encoding='utf-8', index = False)
    file_name.save()

def main():
    keyword =  '泸州老窖' # input("请输入搜索关键词")
    length = 4 # 获取前3页商品( 44 * 3 ),每页44个商品,
    results = []
    for each in list(range(1, length)):
        res = open_url(keyword, each)
        with open("items.txt", 'w', encoding="utf-8") as file:
            file.write(res.text)
        page_items = get_page_items()
        results = get_reslut(page_items,results)

    results_to_excel(results)

if __name__ == "__main__":
    main()

posted on 2021-04-18 21:56  RankFan  阅读(62)  评论(0编辑  收藏  举报

导航