爬虫文件存储-1:mysql
1.连接并创建数据库
import pymysql db = pymysql.connect(host='localhost', user='root', password='root', port=3306) cursor = db.cursor() cursor.execute('SELECT VERSION()') data = cursor.fetchone() print('Database version:', data) cursor.execute("CREATE DATABASE spiders DEFAULT CHARACTER SET utf8") db.close()
执行结果:
Database version: ('5.7.23',)
结果是创建一个名为spiders的数据库
2.创建数据表
注意:这次连接数据库时需要指定创建数据表所在的数据库,也就是db参数的值
import pymysql db = pymysql.connect(host='localhost', user='root', password='root', port=3306, db='spiders') cursor = db.cursor() sql = 'CREATE TABLE IF NOT EXISTS students (id VARCHAR(255) NOT NULL, name VARCHAR(255) NOT NULL, age INT NOT NULL, PRIMARY KEY (id))' cursor.execute(sql) db.close()
3.插入一条数据
import pymysql db = pymysql.connect(host='localhost', user='root', password='root', port=3306, db='spiders') cursor = db.cursor() id = '20120001' user = 'Bob' age = 20 sql = 'INSERT INTO students(id, name, age) values(%s, %s, %s)' try: cursor.execute(sql, (id, user, age)) db.commit() except: db.rollback() db.close()
通用写法:
这样一来,若是写入更多值的话直接给data字典增加数据就行了,就不用其他sql语句了,推荐使用
import pymysql db = pymysql.connect(host='localhost', user='root', password='root', port=3306, db='spiders') cursor = db.cursor() table = 'students' data = { 'id': '20120002', 'name': 'Bob11', 'age': 200 } keys = ', '.join(data.keys()) values = ', '.join(['%s'] * len(data)) sql = 'INSERT INTO {table} ({keys}) VALUES ({values})'.format(table=table, keys=keys, values=values) try: if cursor.execute(sql, tuple(data.values())): print('Successful') db.commit() except: print('Failed') db.rollback() db.close()
4.更新数据
import pymysql db = pymysql.connect(host='localhost', user='root', password='root', port=3306, db='spiders') cursor = db.cursor() sql = 'UPDATE students SET age = %s WHERE name = %s' try: cursor.execute(sql, (25, 'Bob')) db.commit() except: db.rollback() db.close()
通用写法:
根据主键进行判断,若主键存在则更新,若不存在则插入,推荐使用
import pymysql db = pymysql.connect(host='localhost', user='root', password='root', port=3306, db='spiders') cursor = db.cursor() table = 'students' data = { 'id': '20120001', 'name': 'Bob', 'age': 21 } keys = ', '.join(data.keys()) values = ', '.join(['%s'] * len(data)) sql = 'INSERT INTO {table}({keys}) VALUES ({values}) ON DUPLICATE KEY UPDATE'.format(table=table, keys=keys, values=values) update = ','.join([" {key} = %s".format(key=key) for key in data]) sql += update try: if cursor.execute(sql, tuple(data.values()) * 2): print('Successful') db.commit() except: print('Failed') db.rollback() db.close()
5.删除数据
import pymysql db = pymysql.connect(host='localhost', user='root', password='root', port=3306, db='spiders') cursor = db.cursor() table = 'students' condition = 'age > 20' sql = 'DELETE FROM {table} WHERE {condition}'.format(table=table, condition=condition) try: cursor.execute(sql) db.commit() except: db.rollback() db.close()
6.查询数据
import pymysql db = pymysql.connect(host='localhost', user='root', password='root', port=3306, db='spiders') cursor = db.cursor() sql = 'SELECT * FROM students WHERE age >= 20' try: cursor.execute(sql) print('Count:', cursor.rowcount) row = cursor.fetchone() while row: print('Row:', row) row = cursor.fetchone() except: print('Error')