Scarpy Demo

天气预警信息与某地热榜车排名
import scrapy
import time
import json
import copy


class WeatheralarmSpider(scrapy.Spider):
    name = 'WeatherAlarm'
    allowed_domains = ['weather.com.cn']
    url = 'http://product.weather.com.cn/alarm/grepalarm_cn.php?_=' + str(int(time.time() * 1000))
    start_urls = [url]

    table_name = 'weather_alarm'

    def start_requests(self):
        grade_kind = {"gradeObj": {"01": "蓝色", "02": "黄色", "03": "橙色", "04": "红色", "05": "白色"},
                      "kindObj": {"01": "台风", "02": "暴雨", "03": "暴雪", "04": "寒潮", "05": "大风", "06": "沙尘暴",
                                  "07": "高温", "08": "干旱", "09": "雷电", "10": "冰雹", "11": "霜冻", "12": "大雾",
                                  "13": "霾",
                                  "14": "道路结冰", "91": "寒冷",
                                  "92": "灰霾", "93": "雷雨大风", "94": "森林火险", "95": "降温", "96": "道路冰雪",
                                  "97": "干热风", "98": "空气重污染",
                                  "99": "低温", "51": "海上大雾", "52": "雷暴大风", "53": "持续低温", "54": "浓浮尘",
                                  "55": "龙卷风", "56": "低温冻害",
                                  "57": "海上大风", "58": "低温雨雪冰冻", "59": "强对流", "60": "臭氧", "61": "大雪",
                                  "62": "强降雨", "63": "强降温",
                                  "64": "雪灾", "65": "森林(草原)火险", "66": "雷暴", "67": "严寒", "68": "沙尘",
                                  "69": "海上雷雨大风", "70": "海上雷电",
                                  "71": "海上台风", "72": "低温"}}

        for url in self.start_urls:
            yield scrapy.Request(url,
                                 dont_filter=True, meta={"grade_kind": copy.deepcopy(grade_kind)})

    def parse(self, response, ):
        grade_kind = response.request.meta["grade_kind"]
        grade_obj = grade_kind["gradeObj"]
        kind_obj = grade_kind["kindObj"]
        result = json.loads(response.text[14:len(response.text) - 1])
        item = {}
        for weather in result['data']:
            item['site'] = weather[0]
            weather_type = weather[1][-9:-7]
            weather_grade = weather[1][-7:-5]
            item['weather_type'] = kind_obj[weather_type]
            item['weather_grade'] = grade_obj[weather_grade]
            publish_date=weather[4].split('_')[1]
            item['publish_date'] =time.strftime("%Y-%m-%d %H:%M:%S", time.strptime(publish_date, "%Y%m%d%H%M%S"))
            yield copy.deepcopy(item)



import copy
import json

import scrapy
from urllib.parse import urlencode

import arrow


class DongchediHotRankSpider(scrapy.Spider):
    name = "DongchediHotRank"
    allowed_domains = ["dongchedi.com"]
    start_urls = ["https://www.dongchedi.com/motor/pc/car/rank_data?"]

    table_name = 'hot_rank'

    car_type_list = [
        {"0": '微型车', "1": '小型车', "2": '紧凑型车', "3": '中型车', "4": '中大型车', "5": '大型车'},
        {"10": '小型SUV', "11": '紧凑型SUV', "12": '中型SUV', "13": '中大型SUV', "14": '大型SUV'},
        {"20": '小型MPV', "21": '紧凑型MPV', "22": '中型MPV', "23": '中大型MPV', "24": '大型MPV'},
    ]

    def start_requests(self):
        for car_type in self.car_type_list:
            params = {
                "aid": "1839",
                "app_name": "auto_web_pc",
                "city_name": "全国",
                "count": 100,
                "offset": 0,
                "rank_data_type": 1,
                "outter_detail_type": ",".join(car_type.keys()),
                "nation": 1
            }
            for url in self.start_urls:
                url = url + urlencode(params)
                yield scrapy.Request(url,
                                     dont_filter=True, meta={"car_type": copy.deepcopy(car_type)})

    def parse(self, response, **kwargs):
        result = json.loads(response.text)
        car_type = response.request.meta["car_type"]
        item = {'date': arrow.now().format("YYYY-MM-DD")}
        for car in result['data']['list']:
            item['series_id'] = car['series_id']
            item['series_name'] = car['series_name']
            item['min_price'] = car['min_price']
            item['max_price'] = car['max_price']
            item['hot_count'] = car['count']
            item['car_type'] = car['outter_detail_type']
            item['car_type_name'] = car_type.get(str(item['car_type']))
            item['brand_name'] = car['brand_name']
            item['sub_brand_name'] = car['sub_brand_name']
            item['price'] = car['price']
            yield copy.deepcopy(item)



posted @ 2024-06-28 14:34  堕落先锋  阅读(3)  评论(0编辑  收藏  举报