python3 获取线上所有mysql的表大于1000万的信息
基于其他博主改编
1.获取所有数据库表信息以ip.json格式保存到当前目录
#!/usr/bin/env python3 # coding: utf-8 import pymysql import json hostList=["192.168.1.2","192.168.1.3"] for h in hostList: conn = pymysql.connect( host=h, # mysql ip地址 user="root", passwd="passwd", port=3306, # mysql 端口号,注意:必须是int类型 connect_timeout = 3 # 超时时间 ) cur = conn.cursor() # 创建游标 # 获取mysql中所有数据库 cur.execute('SHOW DATABASES') data_all = cur.fetchall() # 获取执行的返回结果 # print(data_all) dic = {} # 大字典,第一层 for i in data_all: if i[0] not in dic: # 判断库名不在dic中时 # 排序列表,排除mysql自带的数据库 exclude_list = ["sys", "information_schema", "mysql", "performance_schema"] if i[0] not in exclude_list: # 判断不在列表中时 # 写入第二层数据 dic[i[0]] = {'name': i[0], 'table_list': []} ta=i[0] cur.execute("SELECT CONCAT(table_schema,'.',table_name) AS 'Table Name', CONCAT(ROUND(table_rows/100000000,2)) AS 'Number of Rows', CONCAT(ROUND(data_length/(1024*1024*1024),2),'G') AS 'Data Size', CONCAT(ROUND(index_length/(1024*1024*1024),2),'G') AS 'Index Size' , CONCAT(ROUND((data_length+index_length)/(1024*1024*1024),2),'G') AS 'Total' FROM information_schema.TABLES where table_schema='%s' order by ABS(Total) desc;"%ta) ret = cur.fetchall() for k in ret: table_name=k[0].split('.')[1] print({'cdb_name_ip':h,'db_name':i[0],'tname': table_name, 'rows': k[1],'table_size':k[2],'index_size':k[3],'db_size':k[4]}) dic[i[0]]['table_list'].append({'cdb_name_ip':h,'tname': table_name, 'rows': k[1],'table_size':k[2],'index_size':k[3],'db_size':k[4]}) with open('%s.json'%h,'w',encoding='utf-8') as f: f.write(json.dumps(dic))
2.遍历当前目录下ip.json格式的文件,并将大于1000万的表行数记录在trows.xls中
#!/usr/bin/env python3 # coding: utf-8 import xlwt import json import sys from collections import OrderedDict f = xlwt.Workbook() sheet1 = f.add_sheet('统计', cell_overwrite_ok=True) row0 = ["cdb_name_ip","db_name", "tname", "rows","table_size","index_size","db_size"] hostList=["192.168.1.2","192.168.1.3"] num=0 for h in hostList: # 写第一行 for i in range(0, len(row0)): sheet1.write(0, i, row0[i]) # 加载json文件 with open("%s.json"%h, 'r') as load_f: load_dict = json.load(load_f) # 反序列化文件 order_dic = OrderedDict() # 有序字典 for key in sorted(load_dict): # 先对普通字典key做排序 order_dic[key] = load_dict[key] # 再写入key for i in order_dic: # 遍历所有表 for j in order_dic[i]["table_list"]: if j['rows'] is not None: # 判断行数大于1000万时,0.1亿 trows=float(j['rows']) if trows > 0.1: # 写入库名 sheet1.write(num + 1, 0, j['cdb_name_ip']) #写入库IP sheet1.write(num+1,1,i) # 写入表名 sheet1.write(num + 1, 2, j['tname']) # 写入行数 sheet1.write(num + 1, 3, j['rows']) # 写入行数 sheet1.write(num + 1, 4, j['table_size']) # 写入行数 sheet1.write(num + 1, 5, j['index_size']) # 写入行数 sheet1.write(num + 1, 6, j['db_size']) num += 1 # 自增1 f.save('trows.xls')
3、效果如下,rows单位是亿