寒假第四天
寒假第四天
今天想的是怎么直接把爬取的数据保存在数据库中:
经过同学代码参考,学出爬取猫眼电影名称和上映时间保存在mysql数据库中
发现最主要的就是设置mysql连接,而且在mysql的语句中我也是修改了好多次,参考同学的语句就是不信,然后在网上也看到很多不一样的语句表达形式,最后碰巧遇到这中表达就可以,代码如下:
import requests import re import pymysql import xlwt url = 'https://maoyan.com/board/4?' headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" } def get_page(url): try: response = requests.get(url, headers=headers) if response.status_code == 200: return response.text else: print('获取网页失败') except Exception as e: print(e) def get_info(page): items = re.findall('.*?class="name"><.*?>(.*?)</a></p>.*?<p class="releasetime">(.*?)</p>',page,re.S) for item in items: data = {} data['name'] = item[0] data['time'] = item[1] # print (123) #print(data) yield data # 设置mysql连接 def save_mysql(a,b): #print (a) connect = pymysql.connect(host = 'localhost',user = 'root',passwd = 'mm123456',port = 3306,db = 'python',charset = 'utf8') cursor = connect.cursor() # 编写数据库语句 sql = 'insert into dbtest(name,time) VALUES(%s,%s)' # 判断插入是否成功 try: cursor.execute(sql,(a,b)) connect.commit() print('数据插入成功') except: print('数据插入失败') urls = ['https://maoyan.com/board/4?offset={}'.format(i * 10) for i in range(1)] DATA = [] for url in urls: print (url) page = get_page(url) datas = get_info(page) for data in datas: DATA.append(data) # 将所有的数据添加到DATA里 for i in range(len(DATA)): print (DATA[i]['name']) print (DATA[i]['time']) save_mysql(DATA[i]['name'],DATA[i]['time']) # f = xlwt.Workbook(encoding='utf-8') # sheet01 = f.add_sheet(u'sheet1', cell_overwrite_ok=True) # sheet01.write(0, 0, 'name') # 第一行第一列 # # # 写内容 # for i in range(len(DATA)): # sheet01.write(i + 1, 0, DATA[i]['name']) # # print('p', end='') # f.save('F:\Python\pachong\作业.xls')