Python爬取微博热搜榜,将数据存入数据库
#-*-coding:utf-8-*- import urllib, pymysql, requests, re # 配置数据库 config = { 'host': '127.0.0.1', 'port': 3306, 'user': 'root', 'password': '******', 'db': 'weibo', 'charset': 'utf8', } # 链接数据库 conn = pymysql.connect(**config) cursor = conn.cursor() # 获取热搜源码 weiboHotFile = requests.get('http://s.weibo.com/top/summary') weiboHotHtml = weiboHotFile.text # 正则表达式匹配URL,找到title hotKey = re.compile(r'td class=\\"td_05\\"><a href=\\"\\/weibo\\/(.*?)&Refer=top\\"') hotKeyListBe = hotKey.findall(weiboHotHtml) rank = 1 # 遍历获取的title列表 for title in hotKeyListBe: # 去除干扰数字 title = title.replace('25', '') url = 'http://s.weibo.com/weibo/' + title title = urllib.parse.unquote(title) print(str(rank)+' '+title + ' '+' '+url+'\n') # 执行数据语句 sql = 'insert into hotsearch (rank, daydate, mindate, title, url) values (%s, curdate(), curtime(), %s, %s)' cursor.execute(sql, (rank, title, url)) rank += 1 conn.commit() cursor.close() conn.close()