mongo 索引分析
用以分析索引大小、使用率、数量,占整体比率
默认分析所有表,-c 指定表
1 #!/usr/bin/env python 2 # coding=utf-8 3 4 ''' 5 @brief 分析表单索引价值 6 @date 2020-08-10 7 ''' 8 import sys 9 import time 10 import bson 11 import copy 12 import logging 13 import traceback 14 import json 15 import datetime 16 import math 17 18 import getopt 19 20 from pymongo import MongoClient 21 22 23 class IndexAnalysis(object): 24 25 def __init__(self): 26 try: 27 self.client = MongoClient() 28 self.db = self.client['db_name'] 29 except Exception as e: 30 logging.error(str(e)) 31 32 self.level_rule = { 33 'high': 1000, 34 'mid': 100, 35 'low': 1, 36 'no': 0 37 } 38 self.unit = 1 39 self.unit_name = 'B' 40 self.coll_names = [] 41 self.coll_names = [] 42 self.stats = {} 43 44 self.__parse_args() 45 46 if not self.coll_names: 47 self.coll_names = self.__get_coll_names() 48 49 def __parse_args(self): 50 """ 51 python index_analysis.py --level=100,10,1 --unit=KB --coll=Asset,Branch 52 53 unit = KB MB GB 显示单位 54 level = high,mid,low 索引价值划分 55 coll = Asset,Branch,SecEvent 分析哪些表单,默认全部 56 :return: 57 """ 58 try: 59 opts, args = getopt.getopt(sys.argv[1:], 'l:c:u:', ['level=', 'coll=', 'unit=']) 60 61 for opt, value in opts: 62 if opt in ('-c', '--coll'): 63 coll_names = value.split(',') 64 if coll_names and isinstance(coll_names, list): 65 self.coll_names = coll_names 66 67 elif opt in ('-l', '--level'): 68 level_rule = value.split(',') 69 if len(level_rule) >= 3: 70 self.level_rule['high'] = int(level_rule[0]) 71 self.level_rule['mid'] = int(level_rule[1]) 72 self.level_rule['low'] = int(level_rule[2]) 73 74 elif opt in ('-u', '--unit'): 75 map = { 76 'KB': 1024, 77 'MB': 1024 * 1024, 78 'GB': 1024 * 1024 * 1024, 79 } 80 if value and value in map: 81 self.unit_name = value 82 self.unit = int(map[value]) 83 84 except Exception as e: 85 print e 86 87 def run(self): 88 89 if not self.coll_names: 90 raise Exception("stat collections empty!!!") 91 92 for coll_name in self.coll_names: 93 if not coll_name: 94 continue 95 coll = self.db[coll_name] 96 97 try: 98 collStats = self.db.command({'collStats': coll_name}) 99 if collStats is None or collStats['count'] == 0: 100 raise Exception('Collection:[ %-40s ] empty! no analysis...' % (coll_name)) 101 102 indexStats = coll.aggregate([{'$indexStats': {}}]) 103 if indexStats is None: 104 raise Exception('Collection:[ %-40s ] empty! no analysis...' % (coll_name)) 105 106 self.stats[coll_name] = { 107 'indexStats': {}, 108 'collStats': {}, 109 'rate': { 110 'index': { 111 'high': { 112 'name': '高使用率索引>' + str(self.level_rule['high']), 'size': 0, 'n': 0, 'rate_index': 0, 113 'rate_all': 0, 114 }, 115 'mid': { 116 'name': '中使用率索引 >' + str(self.level_rule['mid']), 'size': 0, 'n': 0, 'rate_index': 0, 117 'rate_all': 0, 118 }, 119 'low': { 120 'name': '低使用率索引 >' + str(self.level_rule['low']), 'size': 0, 'n': 0, 'rate_index': 0, 121 'rate_all': 0, 122 }, 123 'no': { 124 'name': '无使用率索引 =0', 'size': 0, 'n': 0, 'rate_index': 0, 'rate_all': 0, 125 }, 126 } 127 }, 128 } 129 self.__format_index_stats(coll_name, indexStats) 130 131 self.__format_coll_stats(coll_name, collStats) 132 133 self.__gen_rate(coll_name) 134 except Exception as e: 135 print e 136 137 self.__print_stat() 138 139 def __print_stat(self): 140 141 col_len = 150 142 title = 'MongoDB 索引价值分析' 143 144 n = int((col_len - 24) / 2) 145 print '*' * col_len 146 print '**' + ' ' * n + title + ' ' * n + '**' 147 print '*' * col_len 148 print '' 149 print '' 150 151 for coll_name, item in self.stats.items(): 152 print ' ' * col_len 153 print '=' * col_len 154 print coll_name 155 print '=' * col_len 156 157 index_data_rate = '%40s:%20s' % ('索引总量/数据总量', self.__rate(item['collStats']['index_data_rate'])) 158 159 print '%20s:%20s' % ('count', item['collStats']['count']), index_data_rate 160 print '%20s:%20s' % ('nindexes', item['collStats']['nindexes']) 161 print '%20s:%20s' % ('size', self.__byte(item['collStats']['size'])) 162 print '%20s:%20s' % ('storageSize', self.__byte(item['collStats']['storageSize'])) 163 print '%20s:%20s' % ('totalIndexSize', self.__byte(item['collStats']['totalIndexSize'])) 164 print '%20s:%20s' % ('avgObjSize', self.__byte(item['collStats']['avgObjSize'])) 165 166 self.__tl(col_len) 167 print '%30s%20s%20s%20s%40s' % ('索引分类', '使用频率', '大小', '数量', '占整体索引的比率') 168 self.__tl(col_len) 169 index_rate = item['rate']['index'] 170 for level in ['high', 'mid', 'low', 'no']: 171 v = index_rate[level] 172 print '%30s%20s%20s%20s%20s' % ( 173 v['name'], level, self.__byte(v['size']), v['n'], self.__rate(v['rate_index'])) 174 175 self.__tl(col_len) 176 print '%72s%20s%20s%35s%15s' % ('索引名称', '大小', '使用次数', '开始统计时间', '使用频率') 177 self.__tl(col_len) 178 179 list = sorted(item['indexStats'].values(), key=lambda tmp: tmp['ops'], reverse=True) 180 for v in list: 181 level = self.__get_rate_level(v['ops']) 182 print '%70s%15s%15s%30s%10s' % (v['name'], self.__byte(v['size']), v['ops'], v['from'], level) 183 184 def __tl(self, col_len): 185 print ' ' * 4 + '-' * (col_len-8) 186 187 def __get_coll_names(self): 188 """ 189 全部集合 190 :return: 191 """ 192 return self.db.list_collection_names() 193 194 def __format_index_stats(self, coll_name, indexStats): 195 """ 196 统计索引价值 197 :param coll_name: 198 :param indexStats: 199 :return: 200 """ 201 if indexStats is None: 202 logging.error('{}:indexStats empty'.format(key)) 203 return True 204 205 for item in indexStats: 206 tmp = { 207 'name': item['name'], 208 'key': item['key'], 209 'ops': item['accesses']['ops'], 210 'from': str(item['accesses']['since']), 211 'szie': 0 212 } 213 self.stats[coll_name]['indexStats'][item['name']] = tmp 214 215 def __format_coll_stats(self, coll_name, collStats): 216 """ 217 统计集合信息 218 :param coll_name: 219 :param collStats: 220 :return: 221 """ 222 223 if collStats.has_key('indexSizes'): 224 225 for k, v in collStats['indexSizes'].items(): 226 227 if k not in self.stats[coll_name]['indexStats']: 228 continue 229 # 每个索引存储大小 230 self.stats[coll_name]['indexStats'][k]['size'] = v 231 232 self.stats[coll_name]['collStats']['count'] = collStats['count'] 233 234 # 索引数量 235 self.stats[coll_name]['collStats']['nindexes'] = collStats['nindexes'] 236 237 # 索引存储总量 238 self.stats[coll_name]['collStats']['totalIndexSize'] = 0 239 if 'totalIndexSize' in collStats: 240 self.stats[coll_name]['collStats']['totalIndexSize'] = collStats['totalIndexSize'] 241 242 # 集合存储大小 243 self.stats[coll_name]['collStats']['storageSize'] = 0 244 if 'storageSize' in collStats: 245 self.stats[coll_name]['collStats']['storageSize'] = collStats['storageSize'] 246 247 # 平均文档大小 248 self.stats[coll_name]['collStats']['avgObjSize'] = 0 249 if 'avgObjSize' in collStats: 250 self.stats[coll_name]['collStats']['avgObjSize'] = collStats['avgObjSize'] 251 252 # 集合实际数据大小 253 self.stats[coll_name]['collStats']['size'] = 0 254 if 'size' in collStats: 255 self.stats[coll_name]['collStats']['size'] = collStats['size'] 256 257 totalIndexSize = self.stats[coll_name]['collStats']['totalIndexSize'] 258 storageSize = self.stats[coll_name]['collStats']['storageSize'] 259 self.stats[coll_name]['collStats']['index_data_rate'] = round((float(totalIndexSize) / storageSize) * 100, 2) 260 261 def __gen_rate(self, coll_name): 262 stats = self.stats[coll_name] 263 264 # 区分高、中、低使用率索引 265 for k, v in stats['indexStats'].items(): 266 level = self.__get_rate_level(v['ops']) 267 268 stats['rate']['index'][level]['size'] += v['size'] 269 stats['rate']['index'][level]['n'] += 1 270 271 for i in ['high', 'mid', 'low', 'no']: 272 stats['rate']['index'][i]['rate_index'] = 0 273 stats['rate']['index'][i]['rate_all'] = 0 274 val = stats['rate']['index'][i] 275 276 if stats['collStats']['totalIndexSize'] > 0: 277 stats['rate']['index'][i]['rate_index'] = round( 278 (float(val['size']) / stats['collStats']['totalIndexSize']) * 100, 2) 279 280 if stats['collStats']['storageSize'] > 0: 281 stats['rate']['index'][i]['rate_all'] = round( 282 (float(val['size']) / stats['collStats']['storageSize']) * 100, 2) 283 284 self.stats[coll_name]['rate'] = stats['rate'] 285 286 def __get_rate_level(self, ops): 287 level = 'no' 288 if ops >= self.level_rule['high']: 289 level = 'high' 290 elif ops >= self.level_rule['mid']: 291 level = 'mid' 292 elif ops >= self.level_rule['low']: 293 level = 'low' 294 295 return level 296 297 def __get_rate_level_name(self, ops): 298 level = self.__get_rate_level(ops) 299 return self.level_name[level] 300 301 def __byte(self, v): 302 303 v = int(v / self.unit) 304 if v == 0: 305 return str(v) 306 307 return str(v) + " "+self.unit_name 308 309 def __rate(self, v): 310 311 if v == 0: 312 return str(int(v)) 313 314 return str(v) + "%" 315 316 317 if __name__ == "__main__": 318 obj = IndexAnalysis() 319 obj.run()
人生还有意义。那一定是还在找存在的理由
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?