MongDB篇,第四章:数据库知识4
MongDB 数据库知识4
GridFS 大文件存储
文件的数据库存储
1,在数据库中以 字符串的方式 存储文件在本地的路径;
优点: 节省数据库空间
缺点: 当数据库或者文件位置发生变化时则无法找到文件;
2,将文件以二进制数据的方式存储在数据库中;(GridFS 大文件存储)
优点:‘ 文件和数据库绑定;
缺点:当存储文件大时,空间使用大,提取困难;
GridFS 是mongodb当中存储大文件的一种方案,MongoDB中认为超过16M的文件为大文件;
方案:
将文件存储在MongoDB数据库中,通过两个集合共同完成该文件的存储;
fs.files : 存储文件的相关信息,比如文件名filename, 文件类型 content_type等;
fs.chunks : 实际存储文件内容;以二进制方式分块存储。 将大文件分成多个小块,每个小块占一条文档;
如何存储:
mongodb -d dbname put file
数据库 要存储的文件
1,查看文件信息; db.fs.files.find()
2,查看具体内容;
db.fs.chunks.find({files_id:ObjectId('xxxxxx')})
fs.chunks 的域 _id
files_id : 值对于文件在fs.files 集合中的文档的_id 值;
n 分块信息
data 具体文件内容
优点: 存储方便,没有文件个数限制, 方便移植
缺点:读写效率低,只能整体修改不能分块更新;
> > show dbs admin 0.000GB config 0.000GB grid 0.000GB local 0.000GB stu 0.000GB > use grid switched to db grid > show collections fs.chunks fs.files > db.fs.files.find() { "_id" : ObjectId("5b544f000235ac28ba93d4c8"), "chunkSize" : 261120, "uploadDate" : ISODate("2018-07-22T09:31:44.242Z"), "length" : 58, "md5" : "c15048e633b11b58c4c270717426c04a", "filename" : "a.sh" } > show collections fs.chunks fs.files > show dbs admin 0.000GB config 0.000GB grid 0.000GB local 0.000GB stu 0.000GB > use grid switched to db grid > show collections fs.chunks fs.files > db.fs.files.find() { "_id" : ObjectId("5b544f000235ac28ba93d4c8"), "chunkSize" : 261120, "uploadDate" : ISODate("2018-07-22T09:31:44.242Z"), "length" : 58, "md5" : "c15048e633b11b58c4c270717426c04a", "filename" : "a.sh" } { "_id" : ObjectId("5b54500d0235ac28fb90f182"), "chunkSize" : 261120, "uploadDate" : ISODate("2018-07-22T09:36:13.090Z"), "length" : 958, "md5" : "55ddb9cce837265e2f28891a94844b74", "filename" : "anaconda-ks.cfg" } > it no cursor > db.fs.chunks.find() { "_id" : ObjectId("5b544f000235ac28ba93d4c9"), "files_id" : ObjectId("5b544f000235ac28ba93d4c8"), "n" : 0, "data" : BinData(0,"ZWNobyAkMSAkMiAkMyAkNCAkNSAkNiAkNyAkOCAkOSAkezEwfSAkMTEgJDEyICQxMyAkMTQgJDE1Cg==") } { "_id" : ObjectId("5b54500d0235ac28fb90f183"), "files_id" : ObjectId("5b54500d0235ac28fb90f182"), "n" : 0, "data" : BinData(0,"I3ZlcnNpb249REVWRUwKIyBTeXN0ZW0gYXV0aG9yaXphdGlvbiBpbmZvcm1hdGlvbgphdXRoIC0tZW5hYmxlc2hhZG93IC0tcGFzc2FsZ289c2hhNTEyCiMgVXNlIENEUk9NIGluc3RhbGxhdGlvbiBtZWRpYQpjZHJvbQojIFVzZSBncmFwaGljYWwgaW5zdGFsbApncmFwaGljYWwKIyBSdW4gdGhlIFNldHVwIEFnZW50IG9uIGZpcnN0IGJvb3QKZmlyc3Rib290IC0tZW5hYmxlCmlnbm9yZWRpc2sgLS1vbmx5LXVzZT1zZGEKIyBLZXlib2FyZCBsYXlvdXRzCmtleWJvYXJkIC0tdmNrZXltYXA9dXMgLS14bGF5b3V0cz0ndXMnCiMgU3lzdGVtIGxhbmd1YWdlCmxhbmcgZW5fVVMuVVRGLTgKCiMgTmV0d29yayBpbmZvcm1hdGlvbgpuZXR3b3JrICAtLWJvb3Rwcm90bz1kaGNwIC0tZGV2aWNlPWVubzE2Nzc3NzM2IC0tb25ib290PW9mZiAtLWlwdjY9YXV0bwpuZXR3b3JrICAtLWhvc3RuYW1lPWxvY2FsaG9zdC5sb2NhbGRvbWFpbgoKIyBSb290IHBhc3N3b3JkCnJvb3RwdyAtLWlzY3J5cHRlZCAkNiQ2cjV4Smx4ZmI4cVNEVVBVJEpoNTVPZW1HOGVoTGJCQXczMTdKUHpBM3ozckhFWG1ZaUdkWnFQZ0gvLk5iRjdJNEJPTHhGU053eFNId2dFUmJ5RThaM3pvV1ZOQ1VTbG53TlhTWEQxCiMgU3lzdGVtIHRpbWV6b25lCnRpbWV6b25lIEFtZXJpY2EvTmV3X1lvcmsgLS1pc1V0YwojIFN5c3RlbSBib290bG9hZGVyIGNvbmZpZ3VyYXRpb24KYm9vdGxvYWRlciAtLWFwcGVuZD0iIGNyYXNoa2VybmVsPWF1dG8iIC0tbG9jYXRpb249bWJyIC0tYm9vdC1kcml2ZT1zZGEKYXV0b3BhcnQgLS10eXBlPWx2bQojIFBhcnRpdGlvbiBjbGVhcmluZyBpbmZvcm1hdGlvbgpjbGVhcnBhcnQgLS1ub25lIC0taW5pdGxhYmVsCgolcGFja2FnZXMKQF5taW5pbWFsCkBjb3JlCmtleGVjLXRvb2xzCgolZW5kCgolYWRkb24gY29tX3JlZGhhdF9rZHVtcCAtLWVuYWJsZSAtLXJlc2VydmUtbWI9J2F1dG8nCgolZW5kCg==") } > db.fs.files.find() { "_id" : ObjectId("5b544f000235ac28ba93d4c8"), "chunkSize" : 261120, "uploadDate" : ISODate("2018-07-22T09:31:44.242Z"), "length" : 58, "md5" : "c15048e633b11b58c4c270717426c04a", "filename" : "a.sh" } { "_id" : ObjectId("5b54500d0235ac28fb90f182"), "chunkSize" : 261120, "uploadDate" : ISODate("2018-07-22T09:36:13.090Z"), "length" : 958, "md5" : "55ddb9cce837265e2f28891a94844b74", "filename" : "anaconda-ks.cfg" } > db.fs.chunks.find({"files_id" : ObjectId("5b544f000235ac28ba93d4c8")}) { "_id" : ObjectId("5b544f000235ac28ba93d4c9"), "files_id" : ObjectId("5b544f000235ac28ba93d4c8"), "n" : 0, "data" : BinData(0,"ZWNobyAkMSAkMiAkMyAkNCAkNSAkNiAkNyAkOCAkOSAkezEwfSAkMTEgJDEyICQxMyAkMTQgJDE1Cg==") } > db.fs.chunks.find({"files_id" : ObjectId("5b54500d0235ac28fb90f182")}) { "_id" : ObjectId("5b54500d0235ac28fb90f183"), "files_id" : ObjectId("5b54500d0235ac28fb90f182"), "n" : 0, "data" : BinData(0,"I3ZlcnNpb249REVWRUwKIyBTeXN0ZW0gYXV0aG9yaXphdGlvbiBpbmZvcm1hdGlvbgphdXRoIC0tZW5hYmxlc2hhZG93IC0tcGFzc2FsZ289c2hhNTEyCiMgVXNlIENEUk9NIGluc3RhbGxhdGlvbiBtZWRpYQpjZHJvbQojIFVzZSBncmFwaGljYWwgaW5zdGFsbApncmFwaGljYWwKIyBSdW4gdGhlIFNldHVwIEFnZW50IG9uIGZpcnN0IGJvb3QKZmlyc3Rib290IC0tZW5hYmxlCmlnbm9yZWRpc2sgLS1vbmx5LXVzZT1zZGEKIyBLZXlib2FyZCBsYXlvdXRzCmtleWJvYXJkIC0tdmNrZXltYXA9dXMgLS14bGF5b3V0cz0ndXMnCiMgU3lzdGVtIGxhbmd1YWdlCmxhbmcgZW5fVVMuVVRGLTgKCiMgTmV0d29yayBpbmZvcm1hdGlvbgpuZXR3b3JrICAtLWJvb3Rwcm90bz1kaGNwIC0tZGV2aWNlPWVubzE2Nzc3NzM2IC0tb25ib290PW9mZiAtLWlwdjY9YXV0bwpuZXR3b3JrICAtLWhvc3RuYW1lPWxvY2FsaG9zdC5sb2NhbGRvbWFpbgoKIyBSb290IHBhc3N3b3JkCnJvb3RwdyAtLWlzY3J5cHRlZCAkNiQ2cjV4Smx4ZmI4cVNEVVBVJEpoNTVPZW1HOGVoTGJCQXczMTdKUHpBM3ozckhFWG1ZaUdkWnFQZ0gvLk5iRjdJNEJPTHhGU053eFNId2dFUmJ5RThaM3pvV1ZOQ1VTbG53TlhTWEQxCiMgU3lzdGVtIHRpbWV6b25lCnRpbWV6b25lIEFtZXJpY2EvTmV3X1lvcmsgLS1pc1V0YwojIFN5c3RlbSBib290bG9hZGVyIGNvbmZpZ3VyYXRpb24KYm9vdGxvYWRlciAtLWFwcGVuZD0iIGNyYXNoa2VybmVsPWF1dG8iIC0tbG9jYXRpb249bWJyIC0tYm9vdC1kcml2ZT1zZGEKYXV0b3BhcnQgLS10eXBlPWx2bQojIFBhcnRpdGlvbiBjbGVhcmluZyBpbmZvcm1hdGlvbgpjbGVhcnBhcnQgLS1ub25lIC0taW5pdGxhYmVsCgolcGFja2FnZXMKQF5taW5pbWFsCkBjb3JlCmtleGVjLXRvb2xzCgolZW5kCgolYWRkb24gY29tX3JlZGhhdF9rZHVtcCAtLWVuYWJsZSAtLXJlc2VydmUtbWI9J2F1dG8nCgolZW5kCg==") } >
游标
通过获取操作数据库的返回结果,得到返回结果的对象。 通过该游标对象可以进一步得到数据库内容;
为何使用游标?
1,防止网络拥塞,造成数据传输满;
2,避免用户解析带来的体验差,可以进行后端解析;
使用方法:
var cursor = db.class0.find() 创建游标cursor
cursor.hasNext() 查看是否有下一条文档
cursor.next() 获取下一条文档内容
###################################################
通过python操作mongodb数据库
接口:pymongo
安装:pip3 install pymongo
增 、删、改、查、索引、聚合、文件的存储;
cat mongodb_1.py #!/usr/local/bin/python3 from pymongo import MongoClient import pymongo conn = MongoClient('localhost',27017) db = conn.stu my_set = db.class0 print(my_set) ######## ]# cat mongodb_2.py from pymongo import MongoClient import pymongo conn = MongoClient('localhost',27017) db = conn['stu'] my_set = db['class0'] print(my_set) ################# python3 mongodb_1.py Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'stu'), 'class0')
操作步骤
1. 创建mongodb数据库连接对象
conn = pymongo.MongoClient('localhost',27017)
2. 生成要操作的数据库对象 (__getitem__)
db = conn.stu
db = conn['stu']
3. 获取集合对象
myset = db.class0
myset = db['class0']
4. 通过集合对象操作mongodb数据库
5. 关闭数据库连接
conn.close()
#########
增删改查索引聚合文件操作
(1)#增加
insert() insert_many() insert_one() save()
(2)#删除操作
remove()
(3)插入数据
insert() 参数用法同mongoshell中insert
insert_many() insert_one()
save()
(4)查找操作
find()
功能 : 查找数据库内容
参数 : 同mongo shell find()
返回值 : 返回一个结果游标cursor, 可迭代的游标对象,通过for循环取值;
* 在pymongo中使用操作符的方法和在mongoshell中一样,只需要加引号以字符串的方式给出
find_one() 返回一个字典
find({'age':{'$gt':20}})
find({$or:[{'name':'xx'},{'age':{'$lt':20}}]})
(5)cursor属性函数
游标对象属性函数
next() 取下一个文档
limit() 显示多少文档
skip() 跳过多少文档
count() 统计多少文档
sort() 排序
pymongo :sort([('age',-1),('name',1)])
mongo shell : sort({age:-1,name:1})
* 当游标使用了next或for取值后就不能再进行limit
skip或者sort操作了
(6)修改操作
update(query,update,upsert=False,multi=False)
update_many() 当匹配到多个文档时全部修改
update_one() 只能修改第一条匹配到的文档
(7)删除操作
remove(query,multi = True)
multi默认为True表示删除所有符合条件的数据
设置为False表示只删除一条
--------------------
索引
创建单一索引
ensure_index()
#index = my_set.ensure_index('name')
创建复合索引 (1表示升序,-1表示降序)
create_indexes()
功能: 创建多个索引
1,创建索引条件对象 from pymongo import IndexModel
#index1 = IndexModel([('name',1),('king',-1)])
index2 = IndexModel(['king',1])
2, 将索引对象使用create_indexes生成索引
#indexes = my_set.create_indexes([index1])
#print(indexes)
3创建唯一索引
index = cls.ensure_index('name', unique = True, sparse = True)
(unique = True 唯一索引参数)
(sparse = True 创建稀疏索引)
4, list_indexes()
功能: 查看指定集合中的索引 (getIndexes)
返回值: 迭代对象, 每个值为一个索引信息
5, drop_index()
功能:删除一个索引
参数:索引的名称(索引的名称不允许重复)
drop_indexes()
删除指定集合中所有索引
6,聚合
aggregate([])
功能: 实现mongo的集合操作
参数: 和mongoshell中写法一致,在使用集合操作符时用引号变为字符串即可;
返回值 : 返回一个迭代器同find
练习1
# vim mongodb_1.py #my_set.insert_many([{'name':'a1','king':'huang1'},{'name':'a2','king':'huang2'}]) #my_set.insert_one({'name':'b1','king':'huang3'}) #my_set.save({'name':'b2','king':'huang4'}) #my_set.remove({'name':'a1','king':'huang1'}) #my_set.remove({'name':'a2'}) #my_set.remove() cursor = my_set.find() print(cursor) #for i in cursor: # print(i['name'],'-----',i['king']) # print(i) #cls = db.class0 #for i in cls.find({'age':{'$lt':20}}): # print(i['name'],'-----',i['age']) print(cursor.count()) #for i in cursor.skip(2).limit(3): # print(i) #for i in cursor.sort([('name',1)]): # print(i) #my_set.update({'name':'b2'},{'$set':{'name':'lili'}}) #my_set.update({'name':'aa1'},{'$set':{'king':'huang001'}},upsert = True) #my_set.update({'king':'huang4'},{'$set':{'king_name':'huang002'}},upsert = False, multi = True) #my_set.update_many({'name':'a1'},{'$set':{'name':'b1','king':'huang001'}}) #my_set.update_one({'name':'b1'},{'$set':{'name':'cici','king':'huang003'}}) my_set.find_one_and_delete({'king':'huang003'})
练习2
# cat mongodb_3.py #!/usr/local/bin/python3 from pymongo import MongoClient,IndexModel conn = MongoClient('localhost',27017) db = conn.stu my_set = db.class3 cls = db.class0 #index = my_set.ensure_index('name') #print(index) #index = my_set.ensure_index([('name',1),('king',1)]) #print(index) #index1 = IndexModel([('name',1),('king',-1)]) #indexes = my_set.create_indexes([index1]) #print(indexes) #index3 = cls.ensure_index('name',unique = True) #print(index3) #for i in my_set.list_indexes(): # print(i) #my_set.drop_index('name_1_king_-1') #my_set.drop_indexes() #print('#'*50) #for i in my_set.list_indexes(): # print(i) #l = [{'$group':{'_id':'$king','count':{'$sum':1}}}] l = [{'$group':{'_id':'$king','count':{'$sum':1}}},\ {'$match':{'count':{'$gt':1}}}] cursor = my_set.aggregate(l) for i in cursor: print(i)
练习3
# cat grid.py #!/usr/local/bin/python3 from pymongo import MongoClient import gridfs conn = MongoClient('localhost',27017) db = conn.grid fs = gridfs.GridFS(db) files = fs.find() print(files) print(files.count()) #for file in files: # print(file.filename) for file in files: with open(file.filename,'wb') as f: while True: data = file.read(64) if not data: break f.write(data) conn.close()
练习4
# cat savefile.py #!/usr/local/bin/python3 from pymongo import MongoClient import bson.binary conn = MongoClient('localhost',27017) db = conn.savefile my_set = db.image #file = '/root/mongodb/anaconda-ks.png' #f = open(file, 'rb') #dic = dict(content = bson.binary.Binary(f.read()),filename = 'img.png') #""" #{'content':bson.binary.Binary(f.read()),'filename':'img.png'} #""" #my_set.save(dic) data = my_set.find_one({'filename':'img.png'}) with open('img.png','wb') as f: f.write(data['content']) conn.close()
xxx
from pymongo import MongoClient #创建数据库链接 conn = MongoClient("localhost",27017) #创建数据库对象 db = conn.stu # db = conn['stu'] #获得集合对象 myset = db.class4 # myset = db['class4'] #数据库增删改查 # print(dir(myset)) #插入操作 # myset.insert({'name':'张铁林','King':'乾隆'}) # myset.insert([{'name':'张国立','King':'康熙'},\ # {'name':'陈道明','King':'康熙'}]) # myset.insert_many([{'name':'唐国强','King':'雍正'},\ # {'name':'陈建斌','King':'雍正'}]) # myset.insert_one({'name':'郑少秋','King':'乾隆'}) # myset.save({'name':'吴奇隆','King':'四爷'}) #查找操作 # cursor = myset.find({},{'_id':0}) #i为每条文档转换的字典 # for i in cursor: # print(i['name'],"----",i['King']) # myset = db.class1 # cursor = myset.find({'age':{'$gt':17}},{'_id':0}) # for i in cursor: # print(i) # print(cursor.next()) # print(cursor.count()) # print(cursor.limit(2)) # print(cursor.skip(2)) # for i in cursor.sort([('age',-1),('name',1)]): # print(i) # dic = {'$or':[{'gender':'m'},{'age':{'$lt':18}}]} # data = myset.find_one(dic,{'_id':0}) # print(data) #修改操作 # myset.update({'name':'张国立'},\ # {'$set':{'name':'国立'}}) #当要修改文档不存在的时候插入 # myset.update({'name':'冰冰'},\ # {'$set':{'King':'武则天'}},upsert = True) #可以同时修改多条文档 # myset.update({'King':'康熙'},\ # {'$set':{"king_name":'玄烨'}},multi = True) #删除操作 # myset.remove({'name':'冰冰'}) # myset.remove({'King':'康熙'},multi = False) #查找并删除.返回查找到的文档字典 print\ (myset.find_one_and_delete({'King':'四爷'})) #关闭数据库链接 conn.close() ################ from pymongo import MongoClient conn = MongoClient("localhost",27017) db = conn.stu myset = db.class4 #索引操作 # index = myset.ensure_index('name') # print(index) #返回索引名称 #创建复合索引 # index = myset.ensure_index\ # ([('name',1),('King',-1)]) #创建唯一索引稀疏索引 # index = myset.ensure_index\ # ('King',unique = True,sparse = True) #查看当前索引 # for i in myset.list_indexes(): # print(i) # #删除一个索引 # myset.drop_index('name_1') # #删除所有索引 # myset.drop_indexes() #聚合操作 l = [{'$group':{'_id':'$King','count':{'$sum':1}}}, {'$match':{'count':{'$gt':1}}} ] cursor = myset.aggregate(l) for i in cursor: print(i) conn.close() ################## from pymongo import MongoClient #和pymongo 绑定 import gridfs conn = MongoClient('localhost',27017) db = conn.grid #获取gridfs 对象 #fs拥有fs.files fs.chunks两者的属性 fs = gridfs.GridFS(db) #生成文件游标 files = fs.find() #获取每一个文件 for file in files: print(file.filename) if file.filename == 'file.jpg': with open(file.filename,'wb') as f: #从数据库读取文件 data = file.read() #写入到本地 f.write(data) conn.close() ############### from pymongo import MongoClient #pymongo 附带 import bson.binary conn = MongoClient('localhost',27017) db = conn.images myset = db.img # #存储 # f = open('file.jpg','rb') # #转换为mongo能存储的格式 # content = bson.binary.Binary(f.read()) # myset.insert\ # ({'filename':'file.jpg','data':content}) #取出文件 data = myset.find_one({'filename':'file.jpg'}) #读取内容写入本地 with open(data['filename'],'wb') as f: f.write(data['data']) conn.close()