python - 获取法定节假日日历

先找一个网站,然后使用requests获取返回值并使用beautifulsoup解析,最后使用pandas导出excel文件
脚本如下

import pandas as pd
import requests
import json
from tqdm import trange
from bs4 import BeautifulSoup

def get_Calendar(year):
    date_list = []
    datetagsstr_list = []
    dayofweek_list = []
    festivals_list = []
    rileft_list = []
    riright_list = []
    jia_list = []
    for month in trange(1,13):
        params = {
            'year': str(year),
            'month': str(month),
            'day': '1',
        }
        headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.41',
        }
        res = requests.get('https://cn.bing.com/richcalendar/fetch', params=params, headers=headers)
        soup = BeautifulSoup(res.text,'lxml')
        data = soup.find_all('div',{'data-date':True})
        for i in range(len(data)):
            if data[i]['data-date'][0:4] != str(year):
                continue
            date_list.append(data[i]['data-date'])
            datetagsstr_list.append(data[i]['data-datetagsstr'])
            dayofweek_list.append(data[i]['data-dayofweek'])
            festivals_list.append(data[i]['data-festivals'])
            rileft_list.append(data[i]['data-rileft'])
            riright_list.append(data[i]['data-riright'])
            if data[i].find('div',{'class':'rcld_celllabel'}) == None:
                jia_list.append('')
            else:
                jia_list.append(data[i].find('div',{'class':'rcld_celllabel'}).text)
    tmp = pd.DataFrame({'date':date_list,'datetagsstr':datetagsstr_list,'dayofweek':dayofweek_list,'festivals':festivals_list,'rileft':rileft_list,'riright':riright_list,'jia':jia_list})
    tmp = tmp.sort_values(by=['date'])
    tmp = tmp.reset_index(drop=True)
    tmp = tmp.drop_duplicates(['date'],keep='first')
    tmp.to_excel(f'{year}.xlsx',index=None)

get_Calendar(2023)

image

posted @ 2023-03-16 20:58  wstong  阅读(246)  评论(0编辑  收藏  举报