pandas批量插入mysql效率工具

单独遍历插入1w数据耗费十多分钟

import pandas as pd
import pymysql
import time




target_db = pymysql.connect(host='IP', port=3306, user='test', password='123456', db='test',
                     charset='utf8')

src_db = pymysql.connect(host='IP', port=3306, user='test', password='test', db='test',
                      charset='utf8')

time_start = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # 记录当前时间
print('start time {}',time_start)
tar_cursor=target_db.cursor()
src_cursor = src_db.cursor()

sql="select * from new_news_info;"
src_cursor.execute(sql)
df=src_cursor.fetchall()

infodata = pd.DataFrame(df).values

for i in range(0, len(infodata)):  #
    data_each = []
    data_each = infodata[i]
    # print(data_each)
    sql = "insert into new_news_info values {}".format(tuple(data_each))
    # print(sql)
    try:
        tar_cursor.execute(sql)
        target_db.commit()
        # print(i)
    except:
        print('报错')
time_end = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # 记录当前时间

print('>>> 共计耗费时间:{} ',time_end-time_start)
# src_df = pd.read_sql(sql, src_cursor)
tar_cursor.close()
target_db.close()

src_cursor.close()
src_db.close()

使用了executemany方法 1s插入


# 方式较快 1s

import pandas as pd
import pymysql
import time

table_name = 'new_vehicle_info'

target_db = pymysql.connect(host='IP', port=3306, user='test', password='123456', db='test',
                     charset='utf8')
src_db = pymysql.connect(host='IP', port=3306, user='test', password='123456', db='test',
                      charset='utf8')
tar_cursor=target_db.cursor()
time_start = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # 记录当前时间
print("start time {}".format(time_start))
df=pd.read_sql("select * from {}".format(table_name), src_db)

time_start = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # 记录当前时间
print('start time {}',time_start)

# 将列名称逗号分隔开
columns_delimat=','.join(df.columns.values)
# 这是%s的个数,记着多一个逗号
s_count=len(df.columns.values)* "%s,"
sql="insert into {}({}) values({})".format(table_name,columns_delimat, s_count[:-1])
# 这是为了将值搞成tuple的形式
data_list = [tuple(i) for i in df.values]
tar_cursor.executemany(sql, data_list)

time_end = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # 记录当前时间

print('>>> 共计耗费时间:{} '.format(str(time_end)))

target_db.commit()
tar_cursor.close()

target_db.close()
src_db.close()


这是做的两个测试


from selenium import webdriver
import time
import re
import requests
import datetime
import openpyxl
import pyautogui
import pyperclip
from urllib.request import urlretrieve
from urllib import request
import pymysql
import pandas
from PIL import Image

# 测试代理是否可以使用的
proxies = {'http': '58.220.95.86:9401', 'https': '58.220.95.86:9401'}
try:
    resp = requests.get('https://httpbin.org/get', proxies=proxies,timeout=5)
    print(resp.status_code)
    if resp.status_code == 200:
        print('usefull')
    else:
        print('not usefull')
except:
    print('fail')
    
# 测试插入mysql()和tupple的差别,tupple只能传入一个参数,适合df的转换那种,()的参数么有限制
db = pymysql.connect(host='IP', port=3306, user='u_import', password='123456', db='dbname',
                     charset='utf8')
cursor = db.cursor()
save_list = []

save_list.append(('58.220.95.88:9401', '1'))
save_list.append(('58.220.95.88:9401', '0'))
sql = "insert into cyb_new_ip_port(ip_port,flag) values(%s,%s);"
cursor.executemany(sql, save_list)
db.commit()

cursor.close()
db.close()

posted @   堕落先锋  阅读(225)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术
· .NET周刊【3月第1期 2025-03-02】
点击右上角即可分享
微信分享提示