Redis获得bigkey扫描脚本
众所周知,redis里面的大key存在是非常危险的一件事情。因为最近的工作转移到中间件相关的工作,因此关注了一下bigkey的扫描方法。首先介绍一下阿里云提供的扫描脚本:
具体可见:https://yq.aliyun.com/articles/117042?t=t1
我对这个脚本进行了一个压力测试,在redis的内存为15G,key的数量为2KW,ops为40K到80K之间,在这种情况下,阿里云的脚本完全不能跑成功(估计跑出来的时间以天为单位),主要原因是每确认一个key的情况,就需要与redis交互多次。因此,我对它的脚本进行了改造,加入了pipeline和debug object方法,脚本如下:
具体可见:https://yq.aliyun.com/articles/117042?t=t1
我对这个脚本进行了一个压力测试,在redis的内存为15G,key的数量为2KW,ops为40K到80K之间,在这种情况下,阿里云的脚本完全不能跑成功(估计跑出来的时间以天为单位),主要原因是每确认一个key的情况,就需要与redis交互多次。因此,我对它的脚本进行了改造,加入了pipeline和debug object方法,脚本如下:
import sys
import redis
def find_big_key_normal(db_host, db_port, db_password, db_num):
client = redis.StrictRedis(host=db_host, port=db_port, password=db_password, db=db_num)
i=0
temp = client.scan(cursor=i,count=1000)
j =0
while temp[0]>0 :
i=temp[0]
j=j+len(temp[1])
try:
r = client.pipeline(transaction=False)
for k in temp[1]:
r.debug_object(k)
tempA = r.execute()
x = 0
for key in tempA:
length = key.get("serializedlength")
##type = key.get("encoding")
if length > 10240 :
type = client.type(temp[1][x])
print temp[1][x], type,length
x=x+1
except :
print "a execption come"
temp = client.scan(cursor=i,count=1000)
if __name__ == '__main__':
if len(sys.argv) != 4:
print 'Usage: python ', sys.argv[0], ' host port password '
exit(1)
db_host = sys.argv[1]
db_port = sys.argv[2]
db_password = sys.argv[3]
r = redis.StrictRedis(host=db_host, port=int(db_port), password=db_password)
nodecount = 1
keyspace_info = r.info("keyspace")
for db in keyspace_info:
print 'check ', db, ' ', keyspace_info[db]
find_big_key_normal(db_host, db_port, db_password, db.replace("db", ""))
import redis
def find_big_key_normal(db_host, db_port, db_password, db_num):
client = redis.StrictRedis(host=db_host, port=db_port, password=db_password, db=db_num)
i=0
temp = client.scan(cursor=i,count=1000)
j =0
while temp[0]>0 :
i=temp[0]
j=j+len(temp[1])
try:
r = client.pipeline(transaction=False)
for k in temp[1]:
r.debug_object(k)
tempA = r.execute()
x = 0
for key in tempA:
length = key.get("serializedlength")
##type = key.get("encoding")
if length > 10240 :
type = client.type(temp[1][x])
print temp[1][x], type,length
x=x+1
except :
print "a execption come"
temp = client.scan(cursor=i,count=1000)
if __name__ == '__main__':
if len(sys.argv) != 4:
print 'Usage: python ', sys.argv[0], ' host port password '
exit(1)
db_host = sys.argv[1]
db_port = sys.argv[2]
db_password = sys.argv[3]
r = redis.StrictRedis(host=db_host, port=int(db_port), password=db_password)
nodecount = 1
keyspace_info = r.info("keyspace")
for db in keyspace_info:
print 'check ', db, ' ', keyspace_info[db]
find_big_key_normal(db_host, db_port, db_password, db.replace("db", ""))
我对上面的脚本同样进行了一个压力测试,在redis的内存为15G,key的数量为2KW,ops为40K到80K之间,在这种情况下:脚本10分钟跑完,完全可用。
说明:阿里云的脚本是支持cluster的,我这个脚本是只支持单机的,大家有兴趣的可以自己去更改。
下面是我自己改造的
import sys
import redis
import sys
import redis
def find_big_key_normal(db_host,db_port,db_password,db_num):
client = redis.StrictRedis(host=db_host,port=db_port,password=db_password,db=db_num)
i = 0
temp = client.scan(cursor=i,count=2)
j = 0
while temp[0] > 0:
i = temp[0]
j = j + len(temp[1])
try:
r = client.pipeline(transaction=False)
for k in temp[1]:
r.debug_object(k)
tempA = r.execute()
x = 0
for key in tempA:
length = key.get("serializedlength")
if length > 1:
type = client.type(temp[1][x])
print(temp[1][x],type,length)
x = x + 1
except:
print("a exception come")
temp = client.scan(cursor=i,count=2)
j = j + len(temp[1])
try:
r = client.pipeline(transaction=False)
for k in temp[1]:
r.debug_object(k)
tempA = r.execute()
x = 0
for key in tempA:
length = key.get("serializedlength")
if length > 1:
type = client.type(temp[1][x])
print(temp[1][x],type,length)
x = x + 1
except:
print("a exception come")
client = redis.StrictRedis(host=db_host,port=db_port,password=db_password,db=db_num)
i = 0
temp = client.scan(cursor=i,count=2)
j = 0
while temp[0] > 0:
i = temp[0]
j = j + len(temp[1])
try:
r = client.pipeline(transaction=False)
for k in temp[1]:
r.debug_object(k)
tempA = r.execute()
x = 0
for key in tempA:
length = key.get("serializedlength")
if length > 1:
type = client.type(temp[1][x])
print(temp[1][x],type,length)
x = x + 1
except:
print("a exception come")
temp = client.scan(cursor=i,count=2)
j = j + len(temp[1])
try:
r = client.pipeline(transaction=False)
for k in temp[1]:
r.debug_object(k)
tempA = r.execute()
x = 0
for key in tempA:
length = key.get("serializedlength")
if length > 1:
type = client.type(temp[1][x])
print(temp[1][x],type,length)
x = x + 1
except:
print("a exception come")
if __name__ == '__main__':
if len(sys.argv) != 4:
print('Usage:python ',sys.argv[0],' host port password')
exit(1)
db_host = sys.argv[1]
db_port = sys.argv[2]
db_password = sys.argv[3]
r = redis.StrictRedis(host=db_host,port=int(db_port),password=db_password)
nodecount = 1
keyspace_info = r.info("keyspace")
for db in keyspace_info:
print("check ",db,' ',keyspace_info[db])
find_big_key_normal(db_host,db_port,db_password,db.replace("db",""))
nodecount = 1
keyspace_info = r.info("keyspace")
for db in keyspace_info:
print("check ",db,' ',keyspace_info[db])
find_big_key_normal(db_host,db_port,db_password,db.replace("db",""))