scrapy异步向mysql插入数据

我们采用twisted给我们提供的adbapi下的connectionpool连接池,把插入数据的动作丢进连接池里,这样往数据库插入数据的效率会相对高一点

pipelines.py

import pymysql
from twisted.enterprise import adbapi
from pymysql import cursors
class JianshuTwistedPipeline(object)
	def __init__(self):
		self.dbpool = adbapi.ConnectionPool('pymysql')
		dbparams  = {
		'host':'127.0.0.1',
		'port':3306,
		'user':'root',
		'password':'root',
		'database':'jianshu2',
		'charset':'utf8',
		'cursorclass':cursors.DictCursor
		}
		self.dbpool = adbapi.ConnectionPool('pymysql',**dbparams)
		
	@property
	def sql(self):
		if not self._sql:
			self._sql = ""
				insert into article(id,title,content) values(null,%s,%s)
				"""
			return self._sql
		return self._sql
	
	def process_item(self,item,spider):
		#把sql的插入语句从同步变成异步
		defer = self.dbpool.runInteraction(self.insert_item,item)
		defer.addErrback(self.handle_error,item,spider) #错误处理函数
		
	def insert_item(self,cursor):
		cursor.execute(self.sql,(item['title'],item['content']))
		
	#错误处理函数
	def handle_error(self,error,item,spider):
		print('='*10+'error'+'='*10)
		print(error)
		print('='*10+'error'+'='*10)

settings.py

ITEM_PIPELINSE = {
'jianshu_spider.pipelines.JianshuTwistedPipeline'
}
#项目名.pipelines.pipeline名
posted @ 2019-06-22 21:08  不会玩python  阅读(9)  评论(0编辑  收藏  举报