python - 获取法定节假日日历
先找一个网站,然后使用requests获取返回值并使用beautifulsoup解析,最后使用pandas导出excel文件
脚本如下
import pandas as pd
import requests
import json
from tqdm import trange
from bs4 import BeautifulSoup
def get_Calendar(year):
date_list = []
datetagsstr_list = []
dayofweek_list = []
festivals_list = []
rileft_list = []
riright_list = []
jia_list = []
for month in trange(1,13):
params = {
'year': str(year),
'month': str(month),
'day': '1',
}
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.41',
}
res = requests.get('https://cn.bing.com/richcalendar/fetch', params=params, headers=headers)
soup = BeautifulSoup(res.text,'lxml')
data = soup.find_all('div',{'data-date':True})
for i in range(len(data)):
if data[i]['data-date'][0:4] != str(year):
continue
date_list.append(data[i]['data-date'])
datetagsstr_list.append(data[i]['data-datetagsstr'])
dayofweek_list.append(data[i]['data-dayofweek'])
festivals_list.append(data[i]['data-festivals'])
rileft_list.append(data[i]['data-rileft'])
riright_list.append(data[i]['data-riright'])
if data[i].find('div',{'class':'rcld_celllabel'}) == None:
jia_list.append('')
else:
jia_list.append(data[i].find('div',{'class':'rcld_celllabel'}).text)
tmp = pd.DataFrame({'date':date_list,'datetagsstr':datetagsstr_list,'dayofweek':dayofweek_list,'festivals':festivals_list,'rileft':rileft_list,'riright':riright_list,'jia':jia_list})
tmp = tmp.sort_values(by=['date'])
tmp = tmp.reset_index(drop=True)
tmp = tmp.drop_duplicates(['date'],keep='first')
tmp.to_excel(f'{year}.xlsx',index=None)
get_Calendar(2023)