Loading

Scrapy连接MySQL

同步存储

import pymysql


class ScrapyMoviePipeline:
    def __init__(self):
        self.conn = pymysql.connect(
            host='127.0.0.1',
            user='root',
            passwd='admin@123',
            db='scrapy_movie'
        )
        self.cur = self.conn.cursor()

    def process_item(self, item, spider):
        sql = 'INSERT INTO tbl_scrapy (name,url) VALUES (%s,%s)'
        self.cur.execute(sql, (item["name"], item["url"]))
        self.conn.commit()

    def close_spider(self, spider):
        self.cur.close()
        self.conn.close()

异步存储

import pymysql
from twisted.enterprise import adbapi


class ScrapyMoviePipeline:
    def __init__(self, dbpool):
        self.dbpool = dbpool

    @classmethod
    def from_settings(cls, settings):  # 函数名固定,会被scrapy调用,直接可用settings的值
        """
        数据库建立连接
        :param settings: 配置参数
        :return: 实例化参数
        """
        params = dict(
            host=settings['MYSQL_HOST'],
            db=settings['MYSQL_DBNAME'],
            user=settings['MYSQL_USER'],
            password=settings['MYSQL_PASSWORD'],
            cursorclass=pymysql.cursors.DictCursor  # 指定cursor类型
        )

        # 连接数据池ConnectionPool,使用pymysql或者Mysqldb连接
        dbpool = adbapi.ConnectionPool('pymysql', **params)

        # 返回实例化参数
        return cls(dbpool)

    def process_item(self, item, spider):
        """
        使用twisted将MySQL插入变成异步执行。通过连接池执行具体的sql操作,返回一个对象
        """
        query = self.dbpool.runInteraction(self.do_database, item)  # 指定操作方法和操作数据
        # 添加异常处理
        query.addCallback(self.handle_error)  # 处理异常

    def do_database(self, cursor, item):
        # 定义sql语句
        insert_sql = 'insert into tbl_scrapy (name,url) values (%s,%s)'
        select_sql = 'select * from tbl_scrapy where name="%s" and url="%s"' % (item["name"], item["url"])
        rep = cursor.execute(select_sql)
        # 判断数据库中是否存在该数据,没有则插入
        if rep == 0:
            cursor.execute(insert_sql, (item["name"], item["url"]))

    def handle_error(self, failure):
        if failure:
            # 打印错误信息
            print(failure)
posted @ 2021-10-30 12:28  北兢王  阅读(344)  评论(0)    收藏  举报