python-pymysql 操作数据库-创建-写入-多线程写入-读取-清空表
python-pymysql 操作数据库
创建数据库-表 #导入pymysql import pymysql # 创建连接 DBHOST = 'localhost' DBUSER = 'root' DBPASS = 'lxl123456' # DBNAME = 'test' # pymysql的接口获取链接 def mysql_conn(host, user, password): # 传参版本 try: conn = pymysql.connect(host=host, user=user, password=password) print('数据库连接成功!') except pymysql.Error as e: print("数据库连接失败:" + str(e)) return conn # 先创建cursor负责操作conn接口 conn = mysql_conn(DBHOST, DBUSER, DBPASS) cursor=conn.cursor() #设置游标 # 创建数据库的sql(如果数据库存在就不创建,防止异常) sql = "CREATE DATABASE IF NOT EXISTS test" # 执行创建数据库的sql cursor.execute(sql) cursor.execute("use test") # 创建表 sql_2 = '''CREATE TABLE `express2` ( `运单号` bigint NOT NULL AUTO_INCREMENT, `物流单号` VARCHAR(80), `物流宝单号` VARCHAR(80), `行业` VARCHAR(80), `货主` VARCHAR(80), `是否保价` VARCHAR(40), `是否催派` VARCHAR(40), `是否工单发起` VARCHAR(40), `是否预售下沉` VARCHAR(40), `停滞状态` VARCHAR(80), `停滞时长` INT, `停滞要求时长` INT, `是否超停滞要求时长` VARCHAR(80), `物流停滞节点` VARCHAR(80), `发货大区` VARCHAR(80), `发货省` VARCHAR(80), `仓code` VARCHAR(80), `仓库` VARCHAR(80), `配送公司` VARCHAR(150), `配送cp` VARCHAR(150), `配送类型` VARCHAR(80), `配送大区` VARCHAR(80), `收货大区` VARCHAR(80), `收货省` VARCHAR(80), `收货市` VARCHAR(80), `收货区县` VARCHAR(80), `区域类型` VARCHAR(80), `一级分拨` VARCHAR(80), `二级分拨` VARCHAR(80), `末分拨` VARCHAR(80), `网点id` VARCHAR(80), `网点` VARCHAR(80), `支付时间` VARCHAR(80), `创建时间` VARCHAR(80), `接单时间` VARCHAR(80), `出库时间` VARCHAR(80), `入交接区时间` VARCHAR(80), `揽收时间` VARCHAR(80), `一级分拨入时间` VARCHAR(80), `一级分拨出时间` VARCHAR(80), `二级分拨入时间` VARCHAR(80), `二级分拨出时间` VARCHAR(80), `末分拨入时间` VARCHAR(80), `末分拨出时间` VARCHAR(80), `网点入时间` VARCHAR(80), `领件时间` VARCHAR(80), `配送成功时间` VARCHAR(80), `配送应签时间` VARCHAR(80), PRIMARY KEY (`运单号`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; ''' cursor.execute(sql_2) #关闭连接和游标 conn.close() cursor.close()
写入数据库 #导入类 import numpy as np import pandas as pd import pymysql info = pd.read_excel(r'/Users/linxianli/Desktop/数据.xlsx') info.shape ''' (443378, 48) ''' # 将 NAN 转化为 None info = info.astype(object).where(pd.notnull(info), None) # 创建连接 DBHOST = 'localhost' DBUSER = 'root' DBPASS = 'lxl123456' DBNAME = 'test' # pymysql的接口获取链接 def mysql_conn(host, user, password, db, port=3306, charset="utf8"): # 传参版本 try: conn = pymysql.connect(host=host, user=user, password=password, database=db, port=port, charset=charset) print('数据库连接成功!') except pymysql.Error as e: print("数据库连接失败:" + str(e)) return conn def write_database(): # 先创建cursor负责操作conn接口 conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME) cursor=conn.cursor() #设置游标 # # 开启事务 conn.begin() # 构造符合sql语句的列,因为sql语句是带有逗号分隔的,(这个对应上面的sql语句的(column1, column2, column3)) columns = ','.join(list(info.columns)) # 构造每个列对应的数据,对应于上面的((value1, value2, value3)) data_list = [tuple(i) for i in info.values] # 每个元组都是一条数据,根据df行数生成多少元组数据 # 计算一行有多少value值需要用字符串占位 s_count = len(data_list[0]) * "%s," # 构造sql语句 insert_sql = "insert into " + "express2" + " (" + columns + ") values (" + s_count[:-1] + ")" try: res = cursor.executemany(insert_sql, data_list) print('执行sql受影响的行数:',res) conn.commit() cursor.close() conn.close() except Exception as e: # 万一失败了,要进行回滚操作 conn.rollback() cursor.close() conn.close() start_time = time.time() write_database() stop_time = time.time() print('run time is %s' % (stop_time - start_time)) ''' 数据库连接成功! 执行sql受影响的行数: 443378 run time is 96.30252575874329 '''
多线程写入数据库 import numpy as np import pandas as pd import time, requests import pymysql from concurrent.futures import ProcessPoolExecutor info = pd.read_excel(r'/Users/linxianli/Desktop/数据.xlsx') # 将 NAN 转化为 None info = info.astype(object).where(pd.notnull(info), None) # 创建连接 DBHOST = 'localhost' DBUSER = 'root' DBPASS = 'lxl123456' DBNAME = 'test' # pymysql的接口获取链接 def mysql_conn(host, user, password, db, port=3306, charset="utf8"): # 传参版本 try: conn = pymysql.connect(host=host, user=user, password=password, database=db, port=port, charset=charset) print('数据库连接成功!') except pymysql.Error as e: print("数据库连接失败:" + str(e)) return conn def data_handler(info): conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME) cursor=conn.cursor() #设置游标 # # 开启事务 conn.begin() # 构造符合sql语句的列,因为sql语句是带有逗号分隔的,(这个对应上面的sql语句的(column1, column2, column3)) columns = ','.join(list(info.columns)) # 构造每个列对应的数据,对应于上面的((value1, value2, value3)) data_list = [tuple(i) for i in info.values] # 每个元组都是一条数据,根据df行数生成多少元组数据 # 计算一行有多少value值需要用字符串占位 s_count = len(data_list[0]) * "%s," # 构造sql语句 insert_sql = "insert into " + "express2" + " (" + columns + ") values (" + s_count[:-1] + ")" try: res = cursor.executemany(insert_sql, data_list) print('执行sql受影响的行数:',res) conn.commit() cursor.close() conn.close() except Exception as e: # 万一失败了,要进行回滚操作 conn.rollback() cursor.close() conn.close() def run(): urls = [info[0:100000],info[100000:200000],info[200000:300000],info[300000:450000]] with ProcessPoolExecutor() as excute: excute.map(data_handler,urls) ##ProcessPoolExecutor 提供的map函数,可以直接接受可迭代的参数,并且结果可以直接for循环取出 start_time = time.time() run() stop_time = time.time() print('run time is %s' % (stop_time - start_time)) ''' 数据库连接成功! 数据库连接成功! 数据库连接成功! 数据库连接成功! 执行sql受影响的行数: 100000 执行sql受影响的行数: 100000 执行sql受影响的行数: 100000 执行sql受影响的行数: 143378 run time is 57.68310880661011 '''
查询数据库表数据-清空表数据 # 导入类 import numpy as np import pandas as pd import pymysql # 创建连接 DBHOST = 'localhost' DBUSER = 'root' DBPASS = 'lxl123456' DBNAME = 'test' # pymysql的接口获取链接 def mysql_conn(host, user, password, db, port=3306, charset="utf8"): # 传参版本 try: conn = pymysql.connect(host=host, user=user, password=password, database=db,port=port, charset=charset) print('数据库连接成功!') except pymysql.Error as e: print("数据库连接失败:" + str(e)) return conn # 先创建cursor负责操作conn接口 conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME) cursor=conn.cursor() #设置游标 def mysql(sql): cursor.execute(sql) #执行查询语句 jieguo=cursor.fetchall() #查看全部查询结果 cols=cursor.description #类似 desc table_name返回结果 col=[] #创建一个空列表以存放列名 for v in cols: col.append(v[0]) #循环提取列名,并添加到col空列表 dfsql=pd.DataFrame(jieguo,columns=col) #将查询结果转换成DF结构,并给列重新赋值 # if dfsql.empty: # return 'empty set' #判断查询结果为空时返回的值 # else: return dfsql #以DF结构返回查询结构,DF.to_excel...导出查询结果时可以带列名,这样就解决了mysql直接导出结果无列名的问题 df = mysql(''' select * from express2 ''') conn.close() cursor.close() df.shape ''' (443378, 48) ''' # 清空表数据 # 先创建cursor负责操作conn接口 conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME) cursor=conn.cursor() #设置游标 sql = '''TRUNCATE TABLE `express2`;''' cursor.execute(sql) print('清空表数据成功!') conn.close() cursor.close()
'''
数据库连接成功!
清空表数据成功!
'''
代码还有很多地方可以优化的地方可以慢慢调整
参考文档:
https://blog.csdn.net/weixin_42796152/article/details/107931768
https://blog.csdn.net/blog_liuliang/article/details/78724910
请你一定不要停下来 成为你想成为的人
感谢您的阅读,我是LXL