scrapy 数据存储到mysql和mongodb
pipelines.py
将数据存储到mysql数据库中
import pymysql class HuVPipeline(object): def process_item(self, item, spider): #连接数据库, conn = pymysql.connect(host='localhost', user='root', password='******', port=3306, db='mysql') #获取游标 cusor = conn.cursor() #获取数据 title=item['title'] username=item['username'] yijuhua=item['yijuhua'] otherStyleTime=item['otherStyleTime'] #sql语句 sql = """INSERT INTO hux VALUES (%s, %s, %s, %s)""" #这里是元组数据,(str,str,str,str) cusor.execute(sql, (title, username, yijuhua, otherStyleTime)) cusor.close() conn.commit() # 关闭数据库连接 conn.close() return item
更简单便捷的方式
import pymysql class YiqingPipeline(object): def process_item(self, item, spider): # 连接数据库, conn = pymysql.connect(host='localhost', user='root', password='******', port=3306, db='db55') # # 获取游标 cusor = conn.cursor() # sql语句 sql = """insert into umetrip_risk (province,city,area,street,risk,updatetime,parsetime) values ("%(province)s","%(city)s","%(area)s","%(street)s","%(risk)s","%(updatetime)s","%(parsetime)s") ON DUPLICATE KEY UPDATE street="%(street)s",risk="%(risk)s", updatetime="%(updatetime)s",parsetime="%(parsetime)s";""" cusor.execute(sql % item) cusor.close() conn.commit() # 关闭数据库连接 conn.close() return item
进入D:\mongo\bin目录下输入以下命令
mongod --dbpath D:\mongo\data\db
将数据存放到mongodb数据库中
无密码连接
import pymongo class HuVPipeline(object): def __init__(self): self.client=pymongo.MongoClient()#链接Mongodb数据库 self.db=self.client['huxiuv3']#新建数据库 def process_item(self, item, spider): self.db['hu_1'].insert(dict(item))#第一种方法 #将数据存放到插入到表中 return item
import pymysql
class HuVPipeline(object):
def process_item(self, item, spider):
#连接数据库,
conn = pymysql.connect(host='localhost', user='root', password='123456', port=3306, db='mysql')
#获取游标
cusor = conn.cursor()
#获取数据
title=item['title']
username=item['username']
yijuhua=item['yijuhua']
otherStyleTime=item['otherStyleTime']
#sql语句
sql = """INSERT INTO hux VALUES (%s, %s, %s, %s)"""
#这里是元组数据,(str,str,str,str)
cusor.execute(sql, (title, username, yijuhua, otherStyleTime))
cusor.close()
conn.commit()
# 关闭数据库连接
conn.close()
return item