import requests import re import itchat import datetime import time from bs4 import BeautifulSoup def wenhou(): headers = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding':'gzip, deflate, sdch, br', 'Accept-Language':'zh-CN,zh;q=0.8', 'Cache-Control':'max-age=0', 'Connection':'keep-alive', 'Cookie':'_ga=GA1.2.1295241282.1555764994; _ntes_nuid=ba6c01094ad014ee67291cbd93fbf9ca; _ntes_nnid=cdb6628bbe04b0309ff818ea7c47119e,1561017743195; NNSSPID=e2f760b6a8494ad4ba99f7987b3cab06; NTES_hp_textlink1=old; ne_analysis_trace_id=1561018193650; Province=0; City=0; vinfo_n_f_l_n3=bd4ac504432134d2.1.1.1561017743224.1561019652855.1561019887629; s_n_f_l_n3=bd4ac504432134d21561019791428', 'Host':'tech.163.com', 'If-Modified-Since':'Thu, 20 Jun 2019 08:08:01 GMT', 'Referer':'http://tech.163.com/', 'Upgrade-Insecure-Requests':'1', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0', } res = requests.get('http://www.weather.com.cn/weather1d/101110102.shtml') #请求并获取当日天气情况网页 turl = requests.get('https://tech.163.com/special/00097UHL/tech_datalist.js?callback=data_callback', headers = headers)#请求并获取最新科技新闻,来源于网易 turl.encoding = turl.apparent_encoding res.encoding = 'utf-8' soup1 = BeautifulSoup(res.text, 'html.parser') #对天气网页进行格式化处理 soup2 =turl.text[50:-2] #直接截取新闻主体内容,便于后期处理 # 取评论数 #print(soup) why = '来自大学生新技能的早安日报:' #问候语1 new = '以下是今日最新的科技头条:' #提示语 cytj = soup1.find('a',{'target':'_blank','href':'http://www.weather.com.cn/forecast/ct.shtml?areaid=101110102'}) #检索穿衣相关标签 tianqi = soup1.find('input',id='hidden_title')['value'] #提取天气信息 top10 = ''.join(why+'\n'+ tianqi+'\n'+ cytj.p.text+'\n' * 2+new+'\n') #初次整合信息 str1 = re.findall(r'title.*',soup2) #利用正则检索新闻标题 str2 = re.findall(r'docurl.*',soup2) #利用正则检索新闻url for i in range(18): #获取前18条新闻 top10 += ''.join(str1[i][8:-2]+':'+'\n') #循环添加新闻标题到top10 top10 += ''.join(str2[i][9:-2]+'\n') ##循环添加url到top10 #print(type(str1)) top10 +=''.join('\n' * 2+ '今天也要主动一些..@_@|||||..') #加入固定结尾语 wenhou1 = [top10] #封装内容,便于微信发送 #print(top10) return wenhou1 #以下为微信消息批处理 itchat.auto_login(hotReload=True) #"hotReload=True"用于保持持续登陆,5分钟没动作则失效 #users = itchat.search_friends(name='Hotbox') #userName = users[0]['UserName'] friends = itchat.get_friends(update=True)[0:] #@获取所有好友名称 while 1: #保持持续运行微信 now = datetime.datetime.now() #获取当前系统时间 now_str = now.strftime('%Y/%m/%d %H:%M:%S')[11:] print('\r{}'.format(now_str)) #打印当前时间 if now_str in ['08:05:00']: #当8:05分时 wenhou2 = wenhou() #开始爬取相关信息 for friend in friends: #遍历所有好友 try: userName = friend['UserName'] itchat.send(wenhou2[0] ,toUserName=userName) #发送整理出的消息 except: pass if now_str in ['22:00:00']: for friend in friends: try: userName = friend['UserName'] itchat.send('晚安~' ,toUserName=userName) #发送指定消息 except: pass time.sleep(1)#间隔睡眠
其中有俩点需要记录:
一:
需要检索json文件多个数据时,使用re更为方便,利用re.findall便可生成结果列表,遍历截取;
二:
微信消息发送过多时需要提前用list容器封装消息,如a = [b],b为全部消息,具体用法参考上方top10的封装。