Scrapy用Pipeline写入MySQL

编辑`pipelines.py`，添加自定义pipelines类：

# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html


# class HongxiuPipeline(object):
#     def process_item(self, item, spider):
#         return item
import datetime
from twisted.enterprise import adbapi


class HongxiuMysqlPipeline(object):

    @classmethod
    def from_crawler(cls, crawler):
        # 从项目的配置文件中读取相应的参数
        # cls.MYSQL_DB_NAME = crawler.settings.get("MYSQL_DB_NAME")
        cls.HOST = crawler.settings.get("MYSQL_HOST")
        cls.PORT = crawler.settings.get("MYSQL_PORT")
        cls.USER = crawler.settings.get("MYSQL_USER")
        cls.PASSWD = crawler.settings.get("MYSQL_PASSWORD")
        return cls()

    def open_spider(self, spider):
        self.dbpool = adbapi.ConnectionPool('pymysql', host=self.HOST, port=self.PORT, user=self.USER,
                                            passwd=self.PASSWD, charset='utf8')


    def process_item(self, item, spider):
        #提交
        self.dbpool.runInteraction(self.insert_db, item)
        return item

    def handle_error(self, failure):
        # 处理异步插入时的异常
        print(failure)

    def close_spider(self, spider):
        #关闭连接
        self.dbpool.close()

    def insert_db(self, cur, item):
        #取出数据，执行cur sql
        create_date = datetime.datetime.now().date()
        create_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        values = (
            None,
            item['book_id'],
            item['book_name'],
            item['book_author'],
            item['book_type'],
            item['tag'],
            item['brief'],
            item['website'],
            None
        )
        sql = 'INSERT INTO 库名.表名 VALUES (%s'+',%s'*8+')'
        cur.execute(sql, values)

接着在`settings.py`中写入相关配置参数，添加至item_pipelines中：

MYSQL_DB_NAME = 'scrapy_db'
MYSQL_HOST = 'localhost'
MYSQL_PORT = 3306
MYSQL_USER = 'root'
MYSQL_PASSWORD = 'new.1234'
# 
ITEM_PIPELINES = {
    'toscrape_book.pipelines.MySQLPipeline': 400,
}

posted @ 2020-01-10 16:39 小君~ 阅读(1189) 评论(0) 收藏举报

刷新页面返回顶部

Scrapy用Pipeline写入MySQL

编辑pipelines.py，添加自定义pipelines类：

接着在settings.py中写入相关配置参数，添加至item_pipelines中：

公告

编辑`pipelines.py`，添加自定义pipelines类：

接着在`settings.py`中写入相关配置参数，添加至item_pipelines中：