Python ORM实战之CURD再现
一直以为开发个程序少依赖三方组件是没问题的,但是数据库比如Mysql,这种能算数吗?直到前段时间接个需求,原来数据库在这种环境里也是没有的。
想要CURD的挣扎
问:没有数据库的话,高可用不好实现呀?
答:没问题,初期不用考虑,先实现效果,过两周要用到了。
问:直接放内存里吗,重启就没了?
答:那不行,要落盘的,保证可靠性
问:文件的话,可能性能差一点
答:没问题,先实现
真的只是一个配置文件?
己方:基于文件,Demo上线了,大家看看啦!
甲方:嗯,速度很快,功能不错。不过这个信息有点少,我们补充几个属性
乙方:小事,就一个大JSON,好加
甲方:这个显示不好,按照这个格式都转化下吧
乙方:没问题,我写个转化器
甲方:嗯,不错。这个字段可以和某某关联起来,实现联动,比如...
己方:...(不是说只是个配置文件)?
经过反复的蹂躏,我得出了一个结论,配置文件不太适合管理逻辑比较复杂的对象,如果对象本身还有一堆交互,这就是雪上加霜。如果甲方不体贴,前期写的块,现在突然变慢,立马就要遭受diss,吃力不讨好。默默怀念起CURD的快乐时光。
基于文件的数据库设计
参考Django的ORM和悲惨经历,我把对一个对象的描述与操作总结为三个实现部分:
- Driver
- Manager
- Orm
每个部分有明确区分,层次也是由下到上。
Driver
驱动层主要实现与介质相关,以文件来说,就是序列化和读写语义,参考实现:
# -*- coding: utf-8 -*- import os from filelock import FileLock class FileDriver(object): def __init__(self, path): self.path = path self.fields = [] self.lock = self.init_lock() @property def lock_path(self): return '%s.lock' % self.path def init_lock(self): return FileLock(self.lock_path, timeout=1) def serializer(self, item): """ 单条信息序列化 :param item: dict :return key1: val1, key2: val2 """ if self.fields: content = ', '.join('%s: %s' % (k, item[k]) for k in self.fields) else: content = ', '.join('%s: %s' % (k, v) for k, v in item.iteritems()) return '%s\n' % content @classmethod def read_dolt(cls, content): result = {} for message in content.split(','): keys = message.split(':') key = keys[0].strip() value = ':'.join(keys[1:]) result[key] = value.strip() return result def deserializer(self, content): result = [] try: lines = content.splitlines() for line in lines: item = self.read_dolt(line) result.append(item) return result except Exception as e: return [] def read(self): if not os.path.isfile(self.path): return [] with self.lock: with open(self.path, 'r') as fd: content = fd.read() return self.deserializer(content) def update(self, content): with self.lock: with open(self.path, 'w') as fd: content = self.serializer(content) fd.write(content) def add(self, content): with self.lock: with open(self.path, 'a+') as fd: content = self.serializer(content) fd.write(content) def read_no_lock(self): if not os.path.isfile(self.path): return {} with open(self.path, 'r') as fd: content = fd.read() return self.deserializer(content) def update_no_lock(self, content): with open(self.path, 'w') as fd: raws = [] for raw in content: raws.append(self.serializer(raw)) body = ''.join(raws) fd.write(body)
关键点说明:
- 使用FileLock,确保多进程可用
- action_no_lock 无锁读写接口,是为了实现select for update
- 自定义序列化,以及落盘有顺序,满足VIM党
Manager
管理层,可以说中间人,实现语义转化,连通ORM与DRIVER。功能都是按需加,参考实现:
import os.path from conf.settings import get_config from utils.db.f_driver import FileDriver config = get_config() class BaseManager(object): def __init__(self, model=None): self._model = model self.driver = None @property def _path(self): table_name = self._model.__table__ return '%s.fdb' % os.path.join(config.FDB_HOME, table_name) def __call__(self, model): self._model = model self.driver = FileDriver(self._path) # save raw by sort if hasattr(self._model, 'Meta'): meta = getattr(self._model, 'Meta') if hasattr(meta, 'fields'): self.driver.fields = getattr(meta, 'fields') def to_obj(self, kwargs): return self._model(**kwargs) @classmethod def filter_raw(cls, raw, condition): for k, v in condition.iteritems(): if raw.get(k) != v: return False return True def all(self): result = [] contents = self.driver.read() for line in contents: obj = self.to_obj(line) result.append(obj) return result def query(self, **kwargs): result = [] contents = self.driver.read() for line in contents: if self.filter_raw(line, kwargs): obj = self.to_obj(line) result.append(obj) return result def query_or(self, conditions): result = [] contents = self.driver.read() for line in contents: ret = False for condition in conditions: if self.filter_raw(line, condition): ret = True if ret: obj = self.to_obj(line) result.append(obj) return result def add(self, **kwargs): self.driver.add(kwargs) def delete(self, pk): key = self._model.__primary_key__ with self.driver.lock: result = [] for item in self.query(): if getattr(item, key) == pk: continue result.append(dict(item)) self.driver.update_no_lock(result)
ORM
对象实例定义与抽象,可以说是Model映射层,参考如下:
from utils.db.f_manager import BaseManager class Field(object): def __init__(self, name, primary=False, type=str, default=None): self.name = name self.primary = primary self.type = type self.default = default def __str__(self): return '<%s, %s>' % (self.__class__.__name__, self.name) def to_python(self, val): try: return self.type(val) except: raise ValueError('Field %s must be %s, but got %s' % (self.name, self.type, val)) class FileMetaclass(type): def __new__(cls, name, bases, attrs): if name == 'FileBase': return type.__new__(cls, name, bases, attrs) mappings = dict() primary_key = None for k, v in attrs.items(): if isinstance(v, Field): mappings[k] = v if v.primary: if primary_key: raise StandardError('Duplicate primary key for field: %s' % k) primary_key = k if not primary_key: raise StandardError('Primary key not found.') for k in mappings.keys(): attrs.pop(k) attrs['__mappings__'] = mappings attrs['__table__'] = name attrs['__primary_key__'] = primary_key manager = BaseManager() attrs['objects'] = manager model = type.__new__(cls, name, bases, attrs) manager(model) return model class FileBase(dict): __metaclass__ = FileMetaclass def __init__(self, *args, **kwargs): for key in kwargs.keys(): if key not in self.__mappings__: raise AttributeError(r"'Model' object has no attribute '%s'" % key) super(FileBase, self).__init__(*args, **kwargs) def __getattr__(self, key): if key not in self.__mappings__: raise AttributeError(r"'Model' object has no attribute '%s'" % key) value = self.get(key, None) field = self.__mappings__[key] if value is None: if field.default is not None: value = field.default() if callable(field.default) else field.default setattr(self, key, value) return field.to_python(value) def __setattr__(self, key, value): if key in self.__mappings__: field = self.__mappings__[key] if not isinstance(value, field.type): raise ValueError('Field %s must be %s' % (field.name, field.type)) self[key] = value else: raise AttributeError('Unknown field "{}"'.format(key)) def save(self): maps = {} for k in self.__mappings__: maps[k] = getattr(self, k) self.objects.add(**maps)
其中关键点就是把Manager注入到实例里。
CURD它回来了!
定义一个报警压制策略,报警条目命中,则进行压制。多条命中则选择优先级最高的,并且每条策略有有效期,到期自动删除。
import datetime import uuid from utils.db.f_orm import FileBase, Field def make_id(): uid = uuid.uuid4() return uid.hex class AlertPolicy(FileBase): class Meta: fields = ['id', 'type', 'level', 'value', 'delta', 'expire'] id = Field(name='id', primary=True, default=make_id) type = Field(name='type') level = Field(name='level', type=int, default=20) value = Field(name='value') delta = Field(name='delta', type=int, default=200) expire = Field(name='expire') @property def expire_obj(self): """ like 2021-10-10 10:10:10 to dt :return: """ return datetime.datetime.strptime(self.expire, '%Y-%m-%d %H:%M:%S')
则操作如下:
- 新增策略
In [3]: obj = AlertPolicy(type='host', value='1.1.1.1', delta=60, expire='2021-12-12 10:00:00') In [4]: obj.save() # cat db/AlertPolicy.fdb id: de3a565490054308b41d1f007812c84f, type: host, level: 20, value: 1.1.1.1, delta: 60, expire: 2021-12-12 10:00:00
- 删除策略
AlertPolicy.objects.delete(pk)
- 查询符合条件优先级最高的策略
def select_top_one(cls, message): support_fields = { 'pool': 'PoolId', 'volume': 'VolId', 'tenant_id': 'TenantId', 'host': 'HostIp', } conditions = list({'type': k, 'value': message.get(v)} for k, v in support_fields.iteritems()) items = AlertPolicy.objects.query_or(conditions) max_level = float('-inf') target = None for item in items: if item.level > max_level: max_level = item.level target = item return target