django[三] ORM操作进阶 - 迁
ORM的操作
常用操作
__exact 精确等于 like 'aaa' model.objects.filter(field__exact='aaa') __iexact 精确等于 忽略大小写 ilike 'aaa' __contains 包含 like '%aaa%' __icontains 包含 忽略大小写 ilike '%aaa%',但是对于sqlite来说,contains的作用效果等同于icontains。 __gt 大于 __gte 大于等于 __lt 小于 __lte 小于等于 __in 存在于一个list范围内 # User.objects.filter(age__in=[10, 20, 30]) __startswith 以…开头 __istartswith 以…开头 忽略大小写 __endswith 以…结尾 __iendswith 以…结尾,忽略大小写 __range 在…范围内 __year 日期字段的年份 __month 日期字段的月份 __day 日期字段的日 __isnull=True/False #User.objects.filter(username__isnull=True) //查询用户名为空/不为空的用户 __isnull=True 与 __exact=None的区别 User.objects.filter().excute(age=10) // 查询年龄不为10的用户 User.objects.filter().excute(age__in=[10, 20]) // 查询年龄不为在 [10, 20] 的用户
http://www.cnblogs.com/wupeiqi/articles/6216618.html
进阶操作
# 获取个数 # # models.Tb1.objects.filter(name='seven').count() # 大于,小于 # # models.Tb1.objects.filter(id__gt=1) # 获取id大于1的值 # models.Tb1.objects.filter(id__gte=1) # 获取id大于等于1的值 # models.Tb1.objects.filter(id__lt=10) # 获取id小于10的值 # models.Tb1.objects.filter(id__lte=10) # 获取id小于10的值 # models.Tb1.objects.filter(id__lt=10, id__gt=1) # 获取id大于1 且 小于10的值 # in # # models.Tb1.objects.filter(id__in=[11, 22, 33]) # 获取id等于11、22、33的数据 # models.Tb1.objects.exclude(id__in=[11, 22, 33]) # not in # isnull # Entry.objects.filter(pub_date__isnull=True) # contains # # models.Tb1.objects.filter(name__contains="ven") # models.Tb1.objects.filter(name__icontains="ven") # icontains大小写不敏感 # models.Tb1.objects.exclude(name__icontains="ven") # range # # models.Tb1.objects.filter(id__range=[1, 2]) # 范围bettwen and # 其他类似 # # startswith,istartswith, endswith, iendswith, # order by # # models.Tb1.objects.filter(name='seven').order_by('id') # asc # models.Tb1.objects.filter(name='seven').order_by('-id') # desc # group by # # from django.db.models import Count, Min, Max, Sum # models.Tb1.objects.filter(c1=1).values('id').annotate(c=Count('num')) # SELECT "app01_tb1"."id", COUNT("app01_tb1"."num") AS "c" FROM "app01_tb1" WHERE "app01_tb1"."c1" = 1 GROUP BY "app01_tb1"."id" # limit 、offset # # models.Tb1.objects.all()[10:20] # regex正则匹配,iregex 不区分大小写 # # Entry.objects.get(title__regex=r'^(An?|The) +') # Entry.objects.get(title__iregex=r'^(an?|the) +') # date # # Entry.objects.filter(pub_date__date=datetime.date(2005, 1, 1)) # Entry.objects.filter(pub_date__date__gt=datetime.date(2005, 1, 1)) # year # # Entry.objects.filter(pub_date__year=2005) # Entry.objects.filter(pub_date__year__gte=2005) # month # # Entry.objects.filter(pub_date__month=12) # Entry.objects.filter(pub_date__month__gte=6) # day # # Entry.objects.filter(pub_date__day=3) # Entry.objects.filter(pub_date__day__gte=3) # week_day # # Entry.objects.filter(pub_date__week_day=2) # Entry.objects.filter(pub_date__week_day__gte=2) # hour # # Event.objects.filter(timestamp__hour=23) # Event.objects.filter(time__hour=5) # Event.objects.filter(timestamp__hour__gte=12) # minute # # Event.objects.filter(timestamp__minute=29) # Event.objects.filter(time__minute=46) # Event.objects.filter(timestamp__minute__gte=29) # second # # Event.objects.filter(timestamp__second=31) # Event.objects.filter(time__second=2) # Event.objects.filter(timestamp__second__gte=31)
raw sql操作
d=Publish.objects.raw("select * from app01_author;") [ i.name for i in d ] ['张三', '李四', '小龙虾', '小白', 'niubi', 'taotao']
通过raw sql操作,可以直接控制sql语句,
F
假设数据库有一个员工表,表中的年龄都自加“1”,这里就需要到orm的F功能,如下面的代码
from django.db.models import F#首先要导入这个F模models.Uinfo.objects.all().update(age=F("age")+1)#这里的F功能后面的age,它就会让数据表表中的age这列+1
F 就是用来更新获取原来值的功能
Q
数据库的查询条件我们可以使用filter,在filter里面的可以是两个条件他们之间是and的关系,也可以是一个字典,例如下面的代码
models.Uinfo.objects.all().filter(id=1,name='李伟') conditon={ 'id':'1', 'name':'李伟' } models.Uinfo.objects.all().filter(**conditon)
除了上面的方法,我们还可以加Q的对象,例如
方式一:
from django.db.models import Q models.Uinfo.objects.all().filter(Q(id=1))#条件是id为1的时候 models.Uinfo.objects.all().filter(Q(id=1)|Q(id__gt=3))#条件是或的关系,| models.Uinfo.objects.all().filter(Q(id=1) & Q(id=4))# 条件是and的关系
方式二:
#q1 里面的条件都是or的关系 q1=Q() q1.connector = 'OR' q1.children.append(('id',1)) q1.children.append(('id',3)) q1.children.append(('id',6)) #q2里面的条件都是or的关系 q2=Q() q2.connector = 'OR' q2.children.append(('c',2)) q2.children.append(('c',4)) q2.children.append(('c',6)) #con 通过and的条件把q1和q2 联系到一块 con=Q() con.add(q1,'AND') con.add(q2,'AND') models.Tb1.objects.filter(con)
extra -- 构造额外的查询条件或者映射
extra(self, select=None, where=None, params=None, tables=None, order_by=None, select_params=None) Entry.objects.extra(select={'new_id': "select col from sometable where othercol > %s"}, select_params=(1,)) Entry.objects.extra(where=['headline=%s'], params=['Lennon']) Entry.objects.extra(where=["foo='a' OR bar = 'a'", "baz = 'a'"]) Entry.objects.extra(select={'new_id': "select id from tb where id > %s"}, select_params=(1,), order_by=['-nid'])
distinct -- 去重
>>> models.Coupon.objects.values('object_id').distinct() <QuerySet [{'object_id': 1}, {'object_id': 2}]>
order_by -- 排序
# 根据 object_id 升序,越来越大,再根据id降序,越来越小 models.Coupon.objects.all().order_by('object_id','-id') <QuerySet [<Coupon: yhq2>, <Coupon: yhq1>, <Coupon: nike_yhq>]> object_id: 1 1 2 id: 3 1 2
reverse -- 倒序
# 对已排序结果,再做一次倒序 models.Coupon.objects.all().order_by('object_id').reverse()
defer -- 映射中排序某列数据
models.Coupon.objects.defer('brief') # 排除 brief字段?? 好像不管用啊
in_bulk -- 根据主键批量查找
>>> models.Coupon.objects.in_bulk([1,2,3]) {1: <Coupon: yhq1>, 2: <Coupon: nike_yhq>, 3: <Coupon: yhq2>}
get_or_create -- 如果存在,则获取,否则创建
# username是主键,defaults 指定创建时,其他字段的值 obj, created = models.UserInfo.objects.get_or_create(username='root1', defaults={'email': '1111111','u_id': 2, 't_id': 2})
first -- 第一个
last -- 最后一个
delete -- 删除
update -- 更新
exists -- 是否有结果
annotate -- 用于实现聚合group by查询
class Coupon(models.Model): """ id food_id cloth_id …… null null 1 null """ name = models.CharField("活动名称",max_length=64) brief = models.TextField(blank=True,null=True,verbose_name="优惠券介绍") brand = models.ForeignKey("brand",blank=True,null=True,on_delete=models.CASCADE) content_type = models.ForeignKey(ContentType,blank=True,null=True, on_delete=models.CASCADE) # 代表哪个app下的哪张表(默认存储关系数据的表) object_id = models.PositiveIntegerField("绑定商品",blank=True,null=True) # 代表哪张表中的对象id content_obj = GenericForeignKey("content_type","object_id") #不会生成额外的列 def __str__(self): return self.name
from django.db.models import Count, Avg, Max, Min, Sum # 根据object_id group by models.Coupon.objects.values('object_id').annotate(object_id_cnt=Count("object_id")) SELECT "modelsPractice_coupon"."object_id", COUNT("modelsPractice_coupon"."object_id") AS "object_id_cnt" FROM "modelsPractice_coupon" GROUP BY "modelsPractice_coupon"."object_id" LIMIT 21; args=()
结果:
<QuerySet [{'object_id': 1, 'object_id_cnt': 2}, {'object_id': 2, 'object_id_cnt': 1}]>
# 根据结果集 having models.Coupon.objects.values('object_id').annotate(object_id_cnt=Count("object_id")).filter(object_id_cnt__gt=1) 结果: <QuerySet [{'object_id': 1, 'object_id_cnt': 2}]> # group by后做distinct,最后having(看起来没啥意义) models.Coupon.objects.values('object_id').annotate(object_id_cnt=Count("object_id",distinct=True)).filter(object_id_cnt__g te=1) 结果: <QuerySet [{'object_id': 1, 'object_id_cnt': 1}, {'object_id': 2, 'object_id_cnt': 1}]>
ORM查询优化
表结构
from django.db import models class Province(models.Model): name = models.CharField(max_length=10) def __unicode__(self): return self.name def __str__(self): return self.name class City(models.Model): name = models.CharField(max_length=5) province = models.ForeignKey(Province,null=True, blank=True, on_delete=models.CASCADE) def __unicode__(self): return self.name class Order(models.Model): customer = models.ForeignKey("Person", on_delete=models.CASCADE) orderinfo = models.CharField(max_length=50) time = models.DateTimeField(auto_now_add = True) def __unicode__(self): return self.orderinfo class Person(models.Model): firstname = models.CharField(max_length=10) lastname = models.CharField(max_length=10) needs = models.ForeignKey(to=Order, related_name='require',null=True, blank=True,on_delete=models.DO_NOTHING) visitation = models.ManyToManyField(City, related_name="visitor", null=True, blank=True) hometown = models.ForeignKey(City, related_name="birth", on_delete=models.CASCADE) living = models.ForeignKey(City, related_name="citizen", on_delete=models.CASCADE) def __unicode__(self): return self.firstname + self.lastname
select_related的使用
常用
model.tb.objects.all().select_related('外键字段') model.tb.objects.all().select_related('外键字段__外键字段')
概念
对于一对一字段(OneToOneField)和外键字段(ForeignKey),可以使用select_related 来对QuerySet进行优化
在对QuerySet使用select_related()函数后,Django会获取相应外键对应的对象,从而在之后需要的时候不必再查询数据库了
citys = City.objects.all() for c in citys: print(c.province) 这样会导致线性的SQL查询,如果对象数量n太多,每个对象中有k个外键字段的话,就会导致n*k+1次SQL查询。在本例中,因为有3个city对象就导致了4次SQL查询 (0.000) SELECT "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id" FROM "select_related_city"; args=() (0.000) SELECT "select_related_province"."id", "select_related_province"."name" FROM "select_related_province" WHERE "select_related_province"."id" = 1; args=(1,) (0.000) SELECT "select_related_province"."id", "select_related_province"."name" FROM "select_related_province" WHERE "select_related_province"."id" = 1; args=(1,) (0.000) SELECT "select_related_province"."id", "select_related_province"."name" FROM "select_related_province" WHERE "select_related_province"."id" = 1; args=(1,) 浙江 浙江 浙江
如果我们使用select_related()函数:
citys = City.objects.select_related().all() for c in citys: print(c.province) 就只有一次SQL查询,显然大大减少了SQL查询的次数 (0.001) SELECT "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id", "select_related_province"."id", "select_related_province"."name" FROM "select_related_city" INNER JOIN "select_related_province" ON ("select_related_city"."province_id" = "select_related_province"."id"); args=() 浙江 浙江 浙江
看到他做了了inner join,把foreign key的表连接过来
这样查询 living做关联的时候就不用再查询一次了 |
select_related() 支持三种方法:
1 指定 *fields 参数
这个参数是需要获取的外键(父表内容)的字段名,用来关联外键的字段名,比如:
- 有外键的外键。 比如这里的
>>> zhu = Person.objects.select_related('living__province').get(firstname="zhu") (0.000) SELECT "select_related_person"."id", "select_related_person"."firstname", "select_related_person"."lastname", "select_related_person"."sex_id", "select_related_pers >>> zhu = Person.objects.select_related('living__province').get(firstname="zhu") >>> zhu.living.province // 没有产生其他sql <Province: 浙江>
然而,未指定的外键则不会被添加到结果中,就会产生一条查询sql
>>> zhu.hometown.province (0.000) SELECT "select_related_province"."id", "select_related_province"."name" FROM "select_related_<Province: 浙江>
- 有多个外键时需要指定到哪个外键做关联(默认全部都关联)
>>> zhu = Person.objects.select_related('living__province').select_related('hometown__province (0.000) SELECT "select_related_person"."id", "select_related_person"."firstname", "select_related_person"."lastname", "select_related_person"."sex_id", "select_related_person"."hometown_id", "select_related_person"."living_id", "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id", "select_related_province"."id", "select_related_province"."name", T4."id", T4."name", T4."province_id", T5."id", T5."name" FROM "select_related_person" INNER JOIN "select_related_city" ON ("select_related_person"."hometown_id" = "select_related_city"."id") INNER JOIN "select_related_province" ON ("select_related_city"."province_id" = "se >>> zhu.hometown.province <Province: 浙江>
2 指定 depth 参数 (已废弃了)
select_related() 接受depth参数,depth参数可以确定select_related的深度。Django会递归遍历指定深度内的所有的OneToOneField和ForeignKey
3 不指定参数
select_related() 也可以不加参数,这样表示要求Django尽可能深的select_related
- Django本身内置一个上限,对于特别复杂的表关系,Django可能在你不知道的某处跳出递归,从而与你想的做法不一样。具体限制是怎么工作的我表示不清楚。
- Django并不知道你实际要用的字段有哪些,所以会把所有的字段都抓进来,从而会造成不必要的浪费而影响性能。
小结
1 select_related主要针一对一和多对一关系进行优化。
2 select_related使用SQL的JOIN语句进行优化,通过减少SQL查询的次数来进行优化、提高性能。
3 可以通过可变长参数指定需要select_related的字段名。也可以通过使用双下划线“__”连接字段名来实现指定的递归查询。没有指定的字段不会缓存,没有指定的深度不会缓存,如果要访问的话Django会再次进行SQL查询。
4 也接受无参数的调用,Django会尽可能深的递归查询所有的字段。但注意有Django递归的限制和性能的浪
prefetch_related的应用
对于多对多字段(ManyToManyField)和一对多字段,可以使用prefetch_related()来进行优化
作用和方法
prefetch_related()和select_related()的设计目的很相似,都是为了减少SQL查询的数量,但是实现的方式不一样。后者是通过JOIN语句,在SQL查询内解决问题。
但是对于多对多关系,使用SQL语句解决就显得有些不太明智,因为JOIN得到的表将会很长,会导致SQL语句运行时间的增加和内存占用的增加。若有n个对象,每个对象的多对多字段对应Mi条,就会生成Σ(n)Mi 行的结果表。
zhu = Person.objects.prefetch_related().filter(firstname='zhu').first() (0.000) SELECT "select_related_person"."id", "select_related_person"."firstname", "select_related_person"."lastname", "select_related_person"."sex_id", "select_related_person"."hometown_id", "select_related_person"."living_id" FROM "select_related_person" WHERE "select_related_person"."firstname" = 'zhu' ORDER BY "select_related_person"."id" ASC LIMIT 1; args=('zhu',) # 关联外键内容 [ i.name for i in zhu.visitation.all() ] (0.000) SELECT "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id" FROM ['嘉兴', '温州']
获取所有用户 plist = Person.objects.prefetch_related("visitation") filter出去过嘉兴的数据 [ p.visitation.filter(name=u"嘉兴") for p in plist ] (0.000) SELECT "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id" FROM "select_related_city" INNER JOIN "select_related_person_visitation" ON ("select_related_city"."id" = "select_related_person_visitation"."city_id") WHERE ("select_related_person_visitation"."person_id" = 1 AND "select_related_city"."name" = '嘉兴') LIMIT 21; args=(1, '嘉兴') (0.000) SELECT "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id" FROM "select_related_city" INNER JOIN "select_related_person_visitation" ON ("select_related_city"."id" = "select_related_person_visitation"."city_id") WHERE ("select_related_person_visitation"."person_id" = 2 AND "select_related_city"."name" = '嘉兴') LIMIT 21; args=, '嘉兴') (0.000) SELECT "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id" FROM "sel
注意 QuerySet是lazy的,要用的时候才会去访问数据库。运行到第二行Python代码时,for循环将plist看做iterator,这会触发数据库查询。最初的两次SQL查询就是prefetch_related导致的。
虽然已经查询结果中包含所有所需的city的信息,但因为在循环体中对Person.visitation进行了filter操作,这显然改变了数据库请求。因此这些操作会忽略掉之前缓存到的数据,重新进行SQL查询。
常用使用方法
# 获取所有用户表 # 获取用户类型表where id in (用户表中的查到的所有用户ID) models.UserInfo.objects.prefetch_related('外键字段')
select_related和prefetch_related实际应用
获得所有家乡是浙江的人
方法1:
最无脑的做法是先获得浙江省,再获得浙江的所有城市,最后获得故乡是这个城市的人。就像这样:
zj = Province.objects.get(name="浙江") people = [] for city in zj.city_set.all(): people.append(city.birth.all())
显然这不是一个明智的选择,因为这样做会导致1+(浙江省城市数)次SQL查询。反正是个反例,导致的查询和获得掉结果就不列出来了。
方法2:
利用prefetch_related先查到省和市
zj=Province.objects.prefetch_related('city_set__birth').filter(name='浙江').first() 产生两条sql (0.000) SELECT "select_related_province"."id", "select_related_province"."name" FROM "select_related_province" WHERE "select_related_province"."name" = '浙江' ORDER BYselect_related_province"."id" ASC LIMIT 1; args=('浙江',) (0.000) SELECT "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id" FROM "select_related_city" WHERE "select_related_city"."province_id" IN (1); args=(1,) people =[] for city in zj.city_set.all() people.extend(city.birth.all()) 这个过程不会产生sql,说明prefetch_related已经把数据缓存下来了
因为是一个深度为2的prefetch,所以会导致2次SQL查询,有么有更好的办法呢
方法3:
利用select_related 直接获取外键缓存
p = list(Person.objects.select_related("hometown__province").filter(hometown__province__name="浙江")) (0.000) SELECT "select_related_person"."id", "select_related_person"."firstname", "select_related_person"."lastname", "select_related_person"."needs_id", "select_related_person"."hometown_id", "select_related_person"."living_id", "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id", "select_related_province"."id", "select_related_province"."name" FROM "select_related_person" INNER JOIN "select_related_city" ON ("select_related_person"."hometown_id" = "select_related_city"."id") INNER JOIN "select_related_province" ON ("select_related_city"."province_id" = "select_related_province"."id") WHERE "select_related_province"."name" = '浙江'; args=('浙江',) 只会产生一句sql
select_related()的效率要高于prefetch_related()。因此,最好在能用select_related()的地方尽量使用它,也就是说,对于ForeignKey字段,避免使用prefetch_related()。
两者的联用
在我们一直使用的例子上加一个model:Order (订单)
如果我们拿到了一个订单的id 我们要知道这个订单的客户去过的省份。因为有ManyToManyField显然必须要用prefetch_related()。
方法1: 如果只用prefetch_related()会怎样呢?
把客户 城市 省份关联起来,后面可以filter 订单id olist=Order.objects.prefetch_related("customer__visitation__province").all().first() 可以得到一个order列表: <QuerySet [<Order: Order object (1)>, <Order: Order object (2)>, <Order: Order object (3)>]> 关系到了4个表:Order、Person、City、Province,根据prefetch_related()的特性就得有4次SQL查询 (0.000) SELECT "select_related_order"."id", "select_related_order"."customer_id", "select_related_order"."orderinfo", "select_related_order"."time" FROM "select_related_order" LIMIT 21; args=() (0.000) SELECT "select_related_person"."id", "select_related_person"."firstname", "select_related_person"."lastname", "select_related_person"."hometown_id", "select_related_person"."living_id" FROM "select_related_person" WHERE "select_related_person"."id" IN (1, 2); args=(1, 2) (0.000) SELECT ("select_related_person_visitation"."person_id") AS "_prefetch_related_val_person_id", "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id" FROM "select_related_city" INNER JOIN "select_related_person_visitation" ON ("select_related_city"."id" = "select_related_person_visitation"."city_id") WHERE "select_related_person_visitation"."person_id" IN (1, 2); args=(1, 2) (0.000) SELECT "select_related_province"."id", "select_related_province"."name" FROM "select_related_province" WHERE "select_related_province"."id" IN (1); args=(1,)
>>> [ i.province.name for i in o.customer.visitation.all() ] ['浙江', '浙江']
方法2:
更好的办法是先调用一次select_related()再调用prefetch_related(),最后再select_related()后面的表
olist = Order.objects.select_related("customer").prefetch_related("customer__visitation__province").first() (0.000) SELECT "select_related_order"."id", "select_related_order"."customer_id", "select_related_order"."orderinfo", "select_related_order"."time", "select_related_person"."id", "select_related_person"."firstname", "select_related_person"."lastname", "select_related_person"."hometown_id", "select_related_person"."living_id" FROM "select_related_order" INNER JOIN "select_related_person" ON ("select_related_order"."customer_id" = "select_related_person"."id") ORDER BY "select_related_order"."id" ASC LIMIT 1; args=() (0.000) SELECT ("select_related_person_visitation"."person_id") AS "_prefetch_related_val_person_id", "select_related_city"."id", "select_related_city"."name", "select_related_city"."province_id" FROM "select_related_city" INNER JOIN "select_related_person_visitation" ON ("select_related_city"."id" = "select_related_person_visitation"."city_id") WHERE "select_related_person_visitation"."person_id" IN (1); args=(1,) 获取一个order列表 <Order: Order object (1)>
这样只会有2次SQL查询,Django会先做select_related,之后prefetch_related的时候会利用之前缓存的数据,从而避免了1次额外的SQL查询:
>>> [ c for c in olist.customer.visitation.all() ] [<City: City object (1)>, <City: City object (2)>] >>> [ c.province.name for c in olist.customer.visitation.all() ] ['浙江', '浙江'] >>> set( c.province.name for c in olist.customer.visitation.all() ) {'浙江'}
值得注意的是,可以在调用prefetch_related之前调用select_related,并且Django会按照你想的去做:先select_related,然后利用缓存到的数据prefetch_related。然而一旦prefetch_related已经调用,select_related将不起作用。
小结
- 因为select_related()总是在单次SQL查询中解决问题,而prefetch_related()会对每个相关表进行SQL查询,因此select_related()的效率通常比后者高。
- 鉴于第一条,尽可能的用select_related()解决问题。只有在select_related()不能解决问题的时候再去想prefetch_related()。
- 你可以在一个QuerySet中同时使用select_related()和prefetch_related(),从而减少SQL查询的次数。