爬虫-给女朋友的每日天气预报

前序

好感人的程序员礼物,甩饼的女朋友都感动哭惹

数据准备

1.爬虫下载每日告白图片
2.爬取解析每日天气数据
3.SMTP发送给自己女朋友
4.把python代码做成脚本,发布至服务器,设置每日定时发送

1.爬虫下告白图片

这里使用(https://www.douban.com/photos/album/157693223/?m_start=0)提供的表白图片

代码:

import requests
from lxml import etree
import os
import time

def write_name(name):                                  #将所有下载的图片名保存到一个txt
    with open('picLibs/name.txt','a') as f:
        text =name+'\n'
        f.write(text)
        print(name+' 输入到txt成功!')

if __name__ == '__main__':
    if not os.path.exists('./picLibs'):                 #创建图片存储文件夹
        os.mkdir('./picLibs')
    url = 'https://www.douban.com/photos/album/157693223/?m_start='         #爬取图片的url
    headers = {                                                               #规避反robot协议
        'Connection': 'Keep-Alive',
        'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
    }
    for i in range(18):
        real_url = ''
        real_url = url+str(i*17)
        page_text = requests.get(url=real_url,headers=headers).text
        tree = etree.HTML(page_text)
        url_list = tree.xpath('//*[@id="content"]/div[3]/div[1]/div[2]/div/a/img/@src')
        for image in url_list:
            img_name = image.split('/')[-1]                          #获取文件名
            img_data = requests.get(url=image, headers=headers).content            
            img_path = 'picLibs/' + img_name                        #拼接图片存储路径
            write_name(img_name)
            with open(img_path, 'wb') as fp:                        #二进制写入本地
                fp.write(img_data)
                print(img_name, '下载成功!!!')
        time.sleep(1)                                                #主线程休眠一秒,规避检测
    print('全部下载完成!')

爬取下来就是这样

2.爬取解析每日天气数据

因为甩饼女票在湘潭,所以拿湘潭举例,换城市只需要换URL的城市编码就好
爬取的数据来源于:中国天气网 http://www.weather.com.cn/weather1d/101250201.shtml


可以看到中国天气网,温度模块、生活指数模块的数据,对我们比较有用
经过抓包分析后,还能发现这两个模块的数据是静态加载出来的,这就很nice了,直接用xpath解析就好

3.SMTP发送给自己女朋友

4.把python代码做成脚本,发布至服务器,设置每日定时发送

直接上代码吧

import requests
from lxml import etree
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import smtplib
import random
import linecache
from datetime import datetime

headers = {
        'Connection': 'Keep-Alive',
        'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
    }

def get_imageName():           #随机选取一张“我爱你”图片
    txt = open('picLibs/name.txt', 'rb')
    data = txt.read().decode('utf-8')
    txt.close()
    n = data.count('\n')
    i = random.randint(1, n + 1)
    name = linecache.getline(r'picLibs/name.txt', i)[:-1]   
    return name

def getHTMLText(url):
    try:
        page_text = requests.get(url=url, headers=headers)
        page_text.encoding = page_text.apparent_encoding
        html = page_text.text
        return html
    except:
        return ''


def parserHTMLWeather(html):
    try:
        thisDay = {}
        tree = etree.HTML(html)
        day_t = tree.xpath('//*[@id="today"]/div[2]/ul/li[1]/p[2]/span/text()')[0]
        day_weather = tree.xpath('//*[@id="today"]/div[2]/ul/li[1]/p[1]/text()')[0]
        night_t = tree.xpath('//*[@id="today"]/div[2]/ul/li[2]/p[2]/span/text()')[0]
        night_weather = tree.xpath('//*[@id="today"]/div[2]/ul/li[2]/p[1]/text()')[0]
        cloth = tree.xpath('//*[@id="chuanyi"]/a/p/text()')[0]
        sunny = tree.xpath('/html/body/div[5]/div[1]/div[4]/div/ul/li[6]/p/text()')[0]
        thisDay["day_t"] = day_t
        thisDay["day_weather"] = day_weather
        thisDay["night_t"] = night_t
        thisDay["night_weather"] = night_weather
        thisDay["cloth"] = cloth
        thisDay["sunny"] = sunny
        return thisDay
    except:
        return

def makeMessage(weather):
    message = '罗憨憨!~</br>'
    message += '今日温度:'+weather['night_t']+'℃~'+weather['day_t']+'℃</br>'
    message += '天气:白天:'+weather['day_weather']+' 转 '+'夜晚:'+weather['night_weather']+'</br>'
    message += '穿衣指南:'+weather['cloth']+'</br>'
    message += '防晒指南:'+weather['sunny']
    return message

def sendEmail(Mes):
    # 考研倒计时
    now_str = datetime.now().strftime('%Y-%m-%d')
    now = datetime.strptime(now_str, '%Y-%m-%d')
    future = datetime(2020, 12, 26)
    days = (future - now).days
    exam_message = '今天:' + now_str + '<br/>考研时间:2020-12-26至2020-12-27 '
    exam_message += '<br/>今天距离考研(2020-12-26),还有 ' + str(days) + ' 天!'
    exam_message += '<br/>小罗要加油嗷~!'

    sender = 'xxxxxxx@qq.com'        #甩饼邮箱
    receivers = 'xxxxx@qq.com'                  #女友邮箱
    
    message = MIMEMultipart('related')
    subject = '甩饼天气提醒'
    message['Subject'] = subject
    message['From'] = sender
    message['To'] = receivers

    html_content = '<html><body>'\
                   +'<p>'+Mes+'</p>'\
                   +'<img src="cid:imageid" alt="imageid">' \
                   + '<p style=\"color:red\" align=\"center\"></B> - - - - - - - - 倒  计  时  '+str(days)+'  天 - - - - - - - - </B></p>' \
                   +'<p style=\"color:red\" align=\"center\"></B>'+exam_message+'</B></p>'\
                   +'</body></html>'
    content = MIMEText(html_content, 'html', 'utf-8')
    message.attach(content)
    name=get_imageName()
    path='picLibs/'+name
    file = open(path, "rb")
    img_data = file.read()
    file.close()

    img = MIMEImage(img_data)
    img.add_header('Content-ID', 'imageid')
    message.attach(img)
    try:
        #print("123")
        server = smtplib.SMTP_SSL("smtp.qq.com", 465)
        server.login(sender, "ymwvkyryjzzxecgh")
        server.sendmail(sender, receivers, message.as_string())
        server.quit()
        print("邮件发送成功")
    except smtplib.SMTPException as e:
        print(e)

def getMessage():             
    url = 'http://www.weather.com.cn/weather1d/101250201.shtml'   #爬取的天气网站
    html = getHTMLText(url)
    weather = parserHTMLWeather(html)
    # image = getHTMLText(image_url)
    message = makeMessage(weather)
    sendEmail(message)
    print(message)

if __name__ == '__main__':
    getMessage()

实现效果图:
图片名称

最后在自己服务器,把py做成一个脚本
用crontab定期执行就好了,建议每天早上七点半发送

posted @ 2020-12-23 23:29  饼先生  阅读(570)  评论(0编辑  收藏  举报