Scrapy用Pipeline写入MySQL
编辑pipelines.py
,添加自定义pipelines类:
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# class HongxiuPipeline(object):
# def process_item(self, item, spider):
# return item
import datetime
from twisted.enterprise import adbapi
class HongxiuMysqlPipeline(object):
@classmethod
def from_crawler(cls, crawler):
# 从项目的配置文件中读取相应的参数
# cls.MYSQL_DB_NAME = crawler.settings.get("MYSQL_DB_NAME")
cls.HOST = crawler.settings.get("MYSQL_HOST")
cls.PORT = crawler.settings.get("MYSQL_PORT")
cls.USER = crawler.settings.get("MYSQL_USER")
cls.PASSWD = crawler.settings.get("MYSQL_PASSWORD")
return cls()
def open_spider(self, spider):
self.dbpool = adbapi.ConnectionPool('pymysql', host=self.HOST, port=self.PORT, user=self.USER,
passwd=self.PASSWD, charset='utf8')
def process_item(self, item, spider):
#提交
self.dbpool.runInteraction(self.insert_db, item)
return item
def handle_error(self, failure):
# 处理异步插入时的异常
print(failure)
def close_spider(self, spider):
#关闭连接
self.dbpool.close()
def insert_db(self, cur, item):
#取出数据,执行cur sql
create_date = datetime.datetime.now().date()
create_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
values = (
None,
item['book_id'],
item['book_name'],
item['book_author'],
item['book_type'],
item['tag'],
item['brief'],
item['website'],
None
)
sql = 'INSERT INTO 库名.表名 VALUES (%s'+',%s'*8+')'
cur.execute(sql, values)
接着在settings.py
中写入相关配置参数,添加至item_pipelines中:
MYSQL_DB_NAME = 'scrapy_db' MYSQL_HOST = 'localhost' MYSQL_PORT = 3306 MYSQL_USER = 'root' MYSQL_PASSWORD = 'new.1234' # ITEM_PIPELINES = { 'toscrape_book.pipelines.MySQLPipeline': 400, }
本文章仅供学习参考,如有版权侵犯,请联系作者修改,转载请注明出处!