【python】雪花生成器解决主键自增唯一问题

 tidb不支持设置主键自增,只能通过算法生成id,保证主键唯一性。

 

源码:

 

import time 

# 64位ID的划分 
WORKER_ID_BITS = 5 
DATACENTER_ID_BITS = 5 
SEQUENCE_BITS = 12 

# 最大取值计算 
MAX_WORKER_ID = -1 ^ (-1 << WORKER_ID_BITS) # 2**5-1 0b11111 
MAX_DATACENTER_ID = -1 ^ (-1 << DATACENTER_ID_BITS) 

# 移位偏移计算 
WOKER_ID_SHIFT = SEQUENCE_BITS 
DATACENTER_ID_SHIFT = SEQUENCE_BITS + WORKER_ID_BITS 
TIMESTAMP_LEFT_SHIFT = SEQUENCE_BITS + WORKER_ID_BITS + DATACENTER_ID_BITS 

# 序号循环掩码 
SEQUENCE_MASK = -1 ^ (-1 << SEQUENCE_BITS) 

# Twitter元年时间戳 
TWEPOCH = 1288834974657 

#--------------------------------

class IdWorker(object): 
    """ 
    用于生成IDs 
    """ 
    def __init__(self, datacenter_id, worker_id, sequence=0): 
        """ 
        初始化 
        :param datacenter_id: 数据中心(机器区域)ID 
        :param worker_id: 机器ID 
        :param sequence: 实例序号 
        """ 
        # sanity check 
        if worker_id > MAX_WORKER_ID or worker_id < 0: 
            raise ValueError('worker_id值越界') 
            
        if datacenter_id > MAX_DATACENTER_ID or datacenter_id < 0: 
            raise ValueError('datacenter_id值越界') 
        
        self.worker_id = worker_id 
        self.datacenter_id = datacenter_id 
        self.sequence = sequence 
        self.last_timestamp = -1 # 上次计算的时间戳 
        
    def _gen_timestamp(self): 
        """ 
        生成整数时间戳 
        :return:int timestamp 
        """ 
        return int(time.time() * 1000) 
    
    def get_id(self): 
        """ 
        获取新ID :return: 
        """ 
        timestamp = self._gen_timestamp() 
        
        # 时钟回拨 
        if timestamp < self.last_timestamp: 
            raise 
            
        if timestamp == self.last_timestamp: 
            self.sequence = (self.sequence + 1) & SEQUENCE_MASK 
            if self.sequence == 0: 
                timestamp = self._til_next_millis(self.last_timestamp) 
            else: self.sequence = 0 

        self.last_timestamp = timestamp 
        
        new_id = ((timestamp - TWEPOCH) << TIMESTAMP_LEFT_SHIFT) | (self.datacenter_id << DATACENTER_ID_SHIFT) | (self.worker_id << WOKER_ID_SHIFT) | self.sequence 
        return new_id 
        
    def _til_next_millis(self, last_timestamp): 
        """ 
        等到下一毫秒 
        """ 
        timestamp = self._gen_timestamp() 
        while timestamp <= last_timestamp: 
            timestamp = self._gen_timestamp() 
            return timestamp 
            
#================================

if __name__ == '__main__': 
    worker = IdWorker(1, 2, 0) 
    print(worker.get_id())

 

实际:

worker = IdWorker(1, 1, 0)
print(worker.get_id())

生成19位数字:1536169204380733440

 

批量调用:

from IdWorker import IdWorker
import time

 

ids = []
for i in df3.index:
time.sleep(0.00001)
ids.append(IdWorker(1, 1, 0).get_id())
df3['id'] = ids
df3.insert(0, 'id', df3.pop('id'))

 

 

table1='tb_xxx'

dtype1={'id':VARCHAR(19),}

# 更新天表
df3.to_sql(table1,engine,index=False,dtype=dtype1,if_exists='replace',chunksize=5000)

with engine.connect() as con:
con.execute(f'ALTER TABLE `{table1}` ADD PRIMARY KEY (`id`);')

 

-----------------------------

 

当前bug:每次生成1000条大概耗时22s,数据量大以后会存在重复id

posted @ 2022-06-13 14:46  HuaBro  阅读(307)  评论(0编辑  收藏  举报