scrapy异步向mysql插入数据
我们采用twisted给我们提供的adbapi下的connectionpool连接池,把插入数据的动作丢进连接池里,这样往数据库插入数据的效率会相对高一点
import pymysql
from twisted.enterprise import adbapi
from pymysql import cursors
class JianshuTwistedPipeline(object)
def __init__(self):
self.dbpool = adbapi.ConnectionPool('pymysql')
dbparams = {
'host':'127.0.0.1',
'port':3306,
'user':'root',
'password':'root',
'database':'jianshu2',
'charset':'utf8',
'cursorclass':cursors.DictCursor
}
self.dbpool = adbapi.ConnectionPool('pymysql',**dbparams)
@property
def sql(self):
if not self._sql:
self._sql = ""
insert into article(id,title,content) values(null,%s,%s)
"""
return self._sql
return self._sql
def process_item(self,item,spider):
#把sql的插入语句从同步变成异步
defer = self.dbpool.runInteraction(self.insert_item,item)
defer.addErrback(self.handle_error,item,spider) #错误处理函数
def insert_item(self,cursor):
cursor.execute(self.sql,(item['title'],item['content']))
#错误处理函数
def handle_error(self,error,item,spider):
print('='*10+'error'+'='*10)
print(error)
print('='*10+'error'+'='*10)
ITEM_PIPELINSE = {
'jianshu_spider.pipelines.JianshuTwistedPipeline'
}
#项目名.pipelines.pipeline名