python使用mysql
链接mysql做了个小练习:爬取 http://wufazhuce.com 上的问题,描述和答案,存到本地的数据库里。
数据表结构:
CREATE TABLE `questions` ( `title` varchar(2000) DEFAULT NULL, `description` varchar(200) DEFAULT NULL, `answers` varchar(2000) DEFAULT NULL, `url` varchar(2000) DEFAULT NULL, `daynum` varchar(20) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8
代码:
# author: import pymysql.cursors import requests from bs4 import BeautifulSoup con = pymysql.connect(host='192.168.86.130', user='root', password='letmein', db='0603simon', port=3306, charset='utf8') # with con.cursor() as cur: # sql = 'show tables' # result = cur.execute(sql) # print(result) # exit() cur = con.cursor() for p_num in range(1, 1872): # for p_num in range(8,9): url = 'http://wufazhuce.com/question/%s' % p_num response = requests.get(url=url) response.encoding = response.apparent_encoding soup = BeautifulSoup(response.text, features="html.parser") # print(soup) tar = soup.find('div', class_='one-cuestion') # print(tar) if not tar: print('not tar') sql = '''insert into questions (title,description,answers,url,daynum) values('404','404','404','%s','%s')''' % ( url, p_num) print(sql) result = cur.execute(sql) con.commit() print('执行结果:' + str(result)) continue title = tar.find('h4').text.strip() # print (title) # exit() desc = soup.find('div', class_='cuestion-contenido').text.strip() # print(desc) ans = soup.find_all('div', class_='cuestion-contenido') for index, answer in enumerate(ans): if index == 0: continue # print(answer.text.strip()) answer = answer.text.strip() # print(url) if answer.__len__() > 1800: answer = answer[0:1800] sql = '''insert into questions (title,description,answers,url,daynum) values(%s,%s,%s,'%s','%s')''' % ( con.escape(title), con.escape(desc), con.escape(answer), url, p_num) print(sql) result = cur.execute(sql) con.commit() print('执行结果:' + str(result)) cur.close() con.close()
邮箱的使用
# coding:utf-8 def get_question(): obj = {} import requests from bs4 import BeautifulSoup import time first_num = 2593 first_date = time.mktime(time.strptime('2019-09-08', '%Y-%m-%d')) now_date = time.mktime(time.localtime()) cur_num = first_num + (int((now_date - first_date) / (24 * 60 * 60))) print(cur_num) url = 'http://wufazhuce.com/question/%s' % cur_num response = requests.get(url=url) response.encoding = response.apparent_encoding soup = BeautifulSoup(response.text, features="html.parser") tar = soup.find('div', class_='one-cuestion') if not tar: print('not tar') obj['title'] = '' return obj title = tar.find('h4').text.strip() desc = soup.find('div', class_='cuestion-contenido').text.strip() ans = soup.find_all('div', class_='cuestion-contenido') answer = '' for index, answer2 in enumerate(ans): if index == 0: continue answer = answer2.text.strip() # obj = {'title': '', 'desc': '', 'answer': ''} obj['title'] = title obj['desc'] = desc obj['answer'] = answer return obj def send_email(title, desc, content): import smtplib # 加载smtplib模块 from email.mime.text import MIMEText from email.utils import formataddr from email.mime.application import MIMEApplication from email.mime.image import MIMEImage from email.mime.multipart import MIMEMultipart sender = 'xxxxx@163.com' # 发件人邮箱账号 receive = 'xxxxx@qq.com' # 收件人邮箱账号 passwd = 'xxxxx' mailserver = 'smtp.163.com' port = '25' try: msg = MIMEMultipart('related') msg['From'] = formataddr(["sender", sender]) # 发件人邮箱昵称、发件人邮箱账号 msg['To'] = formataddr(["receiver", receive]) # 收件人邮箱昵称、收件人邮箱账号 msg['Subject'] = title # 文本信息 # txt = MIMEText('this is a test mail', 'plain', 'utf-8') # msg.attach(txt) # 附件信息 # attach = MIMEApplication(open("D:\xx\\tool\pycharm\\1.csv").read()) # attach.add_header('Content-Disposition', 'attachment', filename='1.csv') # msg.attach(attach) # 正文显示图片 body = '''【描述】:%s/br 【回答】:%s''' % (desc, content) text = MIMEText(body, 'html', 'utf-8') msg.attach(text) server = smtplib.SMTP(mailserver, port) # 发件人邮箱中的SMTP服务器,端口是25 server.login(sender, passwd) # 发件人邮箱账号、邮箱密码 server.sendmail(sender, receive, msg.as_string()) # 发件人邮箱账号、收件人邮箱账号、发送邮件 server.quit() print('success') except Exception as e: print(e) def main_to(): obj = get_question() res = 0 if obj['title']: send_email(obj['title'], obj['desc'], obj['answer']) res = 1 else: res = 0 return res from threading import Timer import time timer_interval = 1 def delayrun(): print('running') first_time = 3 * 60 * 60 time.sleep(first_time) print('先歇一下') t = Timer(timer_interval, delayrun) t.start() wait_time = 60 while True: print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))) res = main_to() if res: wait_time = 60 * 60 * 24 else: wait_time = 60 time.sleep(wait_time)