天气预警信息与某地热榜车排名
import scrapy
import time
import json
import copy
class WeatheralarmSpider(scrapy.Spider):
name = 'WeatherAlarm'
allowed_domains = ['weather.com.cn']
url = 'http://product.weather.com.cn/alarm/grepalarm_cn.php?_=' + str(int(time.time() * 1000))
start_urls = [url]
table_name = 'weather_alarm'
def start_requests(self):
grade_kind = {"gradeObj": {"01": "蓝色", "02": "黄色", "03": "橙色", "04": "红色", "05": "白色"},
"kindObj": {"01": "台风", "02": "暴雨", "03": "暴雪", "04": "寒潮", "05": "大风", "06": "沙尘暴",
"07": "高温", "08": "干旱", "09": "雷电", "10": "冰雹", "11": "霜冻", "12": "大雾",
"13": "霾",
"14": "道路结冰", "91": "寒冷",
"92": "灰霾", "93": "雷雨大风", "94": "森林火险", "95": "降温", "96": "道路冰雪",
"97": "干热风", "98": "空气重污染",
"99": "低温", "51": "海上大雾", "52": "雷暴大风", "53": "持续低温", "54": "浓浮尘",
"55": "龙卷风", "56": "低温冻害",
"57": "海上大风", "58": "低温雨雪冰冻", "59": "强对流", "60": "臭氧", "61": "大雪",
"62": "强降雨", "63": "强降温",
"64": "雪灾", "65": "森林(草原)火险", "66": "雷暴", "67": "严寒", "68": "沙尘",
"69": "海上雷雨大风", "70": "海上雷电",
"71": "海上台风", "72": "低温"}}
for url in self.start_urls:
yield scrapy.Request(url,
dont_filter=True, meta={"grade_kind": copy.deepcopy(grade_kind)})
def parse(self, response, ):
grade_kind = response.request.meta["grade_kind"]
grade_obj = grade_kind["gradeObj"]
kind_obj = grade_kind["kindObj"]
result = json.loads(response.text[14:len(response.text) - 1])
item = {}
for weather in result['data']:
item['site'] = weather[0]
weather_type = weather[1][-9:-7]
weather_grade = weather[1][-7:-5]
item['weather_type'] = kind_obj[weather_type]
item['weather_grade'] = grade_obj[weather_grade]
publish_date=weather[4].split('_')[1]
item['publish_date'] =time.strftime("%Y-%m-%d %H:%M:%S", time.strptime(publish_date, "%Y%m%d%H%M%S"))
yield copy.deepcopy(item)
import copy
import json
import scrapy
from urllib.parse import urlencode
import arrow
class DongchediHotRankSpider(scrapy.Spider):
name = "DongchediHotRank"
allowed_domains = ["dongchedi.com"]
start_urls = ["https://www.dongchedi.com/motor/pc/car/rank_data?"]
table_name = 'hot_rank'
car_type_list = [
{"0": '微型车', "1": '小型车', "2": '紧凑型车', "3": '中型车', "4": '中大型车', "5": '大型车'},
{"10": '小型SUV', "11": '紧凑型SUV', "12": '中型SUV', "13": '中大型SUV', "14": '大型SUV'},
{"20": '小型MPV', "21": '紧凑型MPV', "22": '中型MPV', "23": '中大型MPV', "24": '大型MPV'},
]
def start_requests(self):
for car_type in self.car_type_list:
params = {
"aid": "1839",
"app_name": "auto_web_pc",
"city_name": "全国",
"count": 100,
"offset": 0,
"rank_data_type": 1,
"outter_detail_type": ",".join(car_type.keys()),
"nation": 1
}
for url in self.start_urls:
url = url + urlencode(params)
yield scrapy.Request(url,
dont_filter=True, meta={"car_type": copy.deepcopy(car_type)})
def parse(self, response, **kwargs):
result = json.loads(response.text)
car_type = response.request.meta["car_type"]
item = {'date': arrow.now().format("YYYY-MM-DD")}
for car in result['data']['list']:
item['series_id'] = car['series_id']
item['series_name'] = car['series_name']
item['min_price'] = car['min_price']
item['max_price'] = car['max_price']
item['hot_count'] = car['count']
item['car_type'] = car['outter_detail_type']
item['car_type_name'] = car_type.get(str(item['car_type']))
item['brand_name'] = car['brand_name']
item['sub_brand_name'] = car['sub_brand_name']
item['price'] = car['price']
yield copy.deepcopy(item)