MongoDB 聚合操作(aggregate)

聚合操作

聚合函数:db.collection.aggregate()

  • 语法: db.collection.aggregate(pipeline, options)
    • <pipeline> 文档定义了操作中使用的聚合管道阶段和聚合操作符
    • <options> 文档声明了一些和聚合操作的参数
    • 返回值是游标

聚合表达式

  1. 字段路径表达式

    $<field>: 使用 $ 来表示路径

    $<field>.<sub-field> 使用 $ 和 . 来指示内嵌文档字段路径

  2. 系统变量表达式

    $$<variable>: 使用 $$ 指示系统变量

    $$CURRENT: 指示管道中当前操作的文档, $$CURRENT.<field>$<field> 是等效的

  3. 常量表达式

    $literal: <value>: 指示常量

    $literal: "$name": 指示常量字符串 "\(name"(这里的\)被当做常量处理,而不是字段路径表达式)

聚合管道阶段

$project: 对输入文档进行再次投影

  • 可以用了灵活的的控制输出文档的格式

  • 也可用了剔除不相关的字段,以优化聚合管道操作的性能

    > db.accounts.find()
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20 }
    { "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20 }
    >
    // 对账户文档进行重新映射
    > db.accounts.aggregate([
    ...     {
    ...         $project: {
    ...             _id: 0,
    ...             balance: 1,
    ...             clientName: "$name.firstName"
    ...         }
    ...     }
    ... ])
    { "balance" : 20, "clientName" : "alice" }
    { "balance" : 20, "clientName" : "bob" }
    
    > db.accounts.aggregate([
    ...     {
    ...         $project: {
    ...             _id: 0,
    ...             balance: 1,
    ...             nameArrage: ["$name.firstName", "$name.middleName", "$name.lastName"]
    ...         }
    ...     }
    ... ])
    { "balance" : 20, "nameArrage" : [ "alice", null, "wong" ] }
    { "balance" : 20, "nameArrage" : [ "bob", null, "yang" ] }
    

$match: 对输入文档进行筛选

  • $match 中使用的文档筛选语法,和读取文档时的筛选语法相同

  • 应该尽量在聚合管道开始的时候使用 $match 这样可以减少后续阶段中需要处理的文档数量,优化聚合操作的性能

    > db.accounts.aggregate([
    ...     {
    ...         $match: {
    ...             $or: [
    ...                 {balance: {$gt: 10, $lt: 30}},
    ...                 {"name.lastName": "yang"}
    ...             ]
    ...         }
    ...     },
    ...     {
    ...         $project: {
    ...             _id: 0
    ...         }
    ...     }
    ... ])
    { "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20 }
    { "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20 }
    

$limit: 筛选出管道中前 N 篇文档

$skip: 跳过管道内前 N 篇文档

> db.accounts.aggregate([
...     {
...         $limit: 1
...     }
... ])
{ "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20 }
>
>
> db.accounts.aggregate([
...     {
...         $skip: 1
...     }
... ])
{ "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20 }
>

$unwind: 展开输入文档中的数组字段

  • 就是把指定数据的元素单独拿出来,与其他文档字段组成新的文档,一变多
  • 默认展开文档时,如果文档中指定数组字段不存在,或者为null, 或者为空[], 将会被过滤掉
  • 如果展示数组时保留空数组,或者不存在数组字段的文档, 同(preserveNullAndEmptyArrays: true)实现
    > db.accounts.find()
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : [ "CNY", "USD" ] }
    { "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP" }
    >
    >
    > db.accounts.aggregate([
    ...     {
    ...         $unwind: {
    ...             path: "$currency"
    ...         }
    ...     }
    ... ])
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : "CNY" }
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : "USD" }
    { "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP" }
    >
    
    // 展开数组时添加元素的位置
    > db.accounts.aggregate([
    ...     {
    ...         $unwind: {
    ...             path: "$currency",
    ...             includeArrayIndex: "ccyIndex"
    ...         }
    ...     }
    ... ])
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : "CNY", "ccyIndex" : NumberLong(0) }
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : "USD", "ccyIndex" : NumberLong(1) }
    { "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP", "ccyIndex" : null }
    >
    
    // 默认展开文档时,如果文档中指定数组字段不存在,或者为null, 或者为空[], 将会被过滤掉
    > db.accounts.find()
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : [ "CNY", "USD" ] }
    { "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP" }
    { "_id" : ObjectId("634d6ec45799d43c905c7197"), "name" : { "firstName" : "charlie", "lastName" : "gordon" }, "balance" : 40 }
    { "_id" : ObjectId("634d6ec45799d43c905c7198"), "name" : { "firstName" : "david", "lastName" : "wu" }, "balance" : 60, "currency" : [ ] }
    { "_id" : ObjectId("634d6ec45799d43c905c7199"), "name" : { "firstName" : "eddie", "lastName" : "kim" }, "balance" : 100, "currency" : null }
    >
    > db.accounts.aggregate([
    ...     {
    ...         $unwind: {
    ...             path: "$currency"
    ...         }
    ...     }
    ... ])
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : "CNY" }
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : "USD" }
    { "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP" }
    
    // 如果展示数组时保留空数组,或者不存在数组字段的文档, 同(preserveNullAndEmptyArrays: true)实现
    > db.accounts.aggregate([
    ...     {
    ...         $unwind: {
    ...             path: "$currency",
    ...             preserveNullAndEmptyArrays: true
    ...         }
    ...     }
    ... ])
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : "CNY" }
    { "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : "USD" }
    { "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP" }
    { "_id" : ObjectId("634d6ec45799d43c905c7197"), "name" : { "firstName" : "charlie", "lastName" : "gordon" }, "balance" : 40 }
    { "_id" : ObjectId("634d6ec45799d43c905c7198"), "name" : { "firstName" : "david", "lastName" : "wu" }, "balance" : 60 }
    { "_id" : ObjectId("634d6ec45799d43c905c7199"), "name" : { "firstName" : "eddie", "lastName" : "kim" }, "balance" : 100, "currency" : null }
    

$sort: 对输入文档进行排序

> db.accounts.aggregate([
...     {
...         $sort: {balance: -1, "name.LastName": 1}
...     }
... ])
{ "_id" : ObjectId("634d6ec45799d43c905c7199"), "name" : { "firstName" : "eddie", "lastName" : "kim" }, "balance" : 100, "currency" : null }
{ "_id" : ObjectId("634d6ec45799d43c905c7198"), "name" : { "firstName" : "david", "lastName" : "wu" }, "balance" : 60, "currency" : [ ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7197"), "name" : { "firstName" : "charlie", "lastName" : "gordon" }, "balance" : 40 }
{ "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : [ "CNY", "USD" ] }
{ "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP" }

$lookup: 类似于join操作,对其他 collection 进行查询,结果写入管道文档中

官方文档地址

语法1:

{
   $lookup:
     {
       from: <collection to join>,
       localField: <field from the input documents>,
       foreignField: <field from the documents of the "from" collection>,
       as: <output array field>
     }
}

语法2:

{
   $lookup:
      {
         from: <joined collection>,
         let: { <var_1>: <expression>, …, <var_n>: <expression> },
         pipeline: [ <pipeline to run on joined collection> ],
         as: <output array field>
      }
} 
  • pipeline: 对查询集合中的文档使用聚合阶段进行处理
  • let(可参数): 对查询集合中的文档使用聚合阶段进行处理时,如果需要参考管道文档中的字段,则必须使用let参数先对字段进行声明

示例1:

> db.forex.find()
{ "_id" : ObjectId("634d73585799d43c905c719a"), "ccy" : "USD", "rate" : 6.91, "data" : ISODate("2018-12-12T00:00:00Z") }
{ "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") }
{ "_id" : ObjectId("634d73585799d43c905c719c"), "ccy" : "CNY", "rate" : 1, "data" : ISODate("2018-12-12T00:00:00Z") }
>
> db.account.find()
> db.accounts.find()
{ "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : [ "CNY", "USD" ] }
{ "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP" }
{ "_id" : ObjectId("634d6ec45799d43c905c7197"), "name" : { "firstName" : "charlie", "lastName" : "gordon" }, "balance" : 40 }
{ "_id" : ObjectId("634d6ec45799d43c905c7198"), "name" : { "firstName" : "david", "lastName" : "wu" }, "balance" : 60, "currency" : [ ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7199"), "name" : { "firstName" : "eddie", "lastName" : "kim" }, "balance" : 100, "currency" : null }
>
> db.accounts.aggregate([
...     {
...         $lookup: {
...             from: "forex",
...             localField: "currency",
...             foreignField: "ccy",
...             as: "forexData"
...         }
...     }
... ])
{ "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : [ "CNY", "USD" ], "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719a"), "ccy" : "USD", "rate" : 6.91, "data" : ISODate("2018-12-12T00:00:00Z") }, { "_id" : ObjectId("634d73585799d43c905c719c"), "ccy" : "CNY", "rate" : 1, "data" : ISODate("2018-12-12T00:00:00Z") } ] }
{ "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP", "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") } ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7197"), "name" : { "firstName" : "charlie", "lastName" : "gordon" }, "balance" : 40, "forexData" : [ ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7198"), "name" : { "firstName" : "david", "lastName" : "wu" }, "balance" : 60, "currency" : [ ], "forexData" : [ ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7199"), "name" : { "firstName" : "eddie", "lastName" : "kim" }, "balance" : 100, "currency" : null, "forexData" : [ ] }

> db.accounts.aggregate([
...     {
...         $unwind: {
...             path: "$currency"
...         }
...     },
...     {
...         $lookup: {
...             from: "forex",
...             localField: "currency",
...             foreignField: "ccy",
...             as: "forexData"
...         }
...     }
... ])
{ "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : "CNY", "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719c"), "ccy" : "CNY", "rate" : 1, "data" : ISODate("2018-12-12T00:00:00Z") } ] }
{ "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : "USD", "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719a"), "ccy" : "USD", "rate" : 6.91, "data" : ISODate("2018-12-12T00:00:00Z") } ] }
{ "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP", "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") } ] }

示例2:

# 将特定日期外汇汇率写入银行账户文档 (不相关查询 3.6版本开始支持)
 db.forex.find()
{ "_id" : ObjectId("634d73585799d43c905c719a"), "ccy" : "USD", "rate" : 6.91, "data" : ISODate("2018-12-13T00:00:00Z") }
{ "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") }
{ "_id" : ObjectId("634d73585799d43c905c719c"), "ccy" : "CNY", "rate" : 1, "data" : ISODate("2018-12-13T00:00:00Z") }
>
> db.accounts.find()
{ "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : [ "CNY", "USD" ] }
{ "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP" }
{ "_id" : ObjectId("634d6ec45799d43c905c7197"), "name" : { "firstName" : "charlie", "lastName" : "gordon" }, "balance" : 40 }
{ "_id" : ObjectId("634d6ec45799d43c905c7198"), "name" : { "firstName" : "david", "lastName" : "wu" }, "balance" : 60, "currency" : [ ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7199"), "name" : { "firstName" : "eddie", "lastName" : "kim" }, "balance" : 100, "currency" : null }
>
> db.accounts.aggregate([
...     {
...         $lookup: {
...             from: "forex",
...             pipeline: [
...                 {
...                     $match: {
...                         data: new Date("2018-12-12")
...                     }
...                 }
...             ],
...             as: "forexData"
...         }
...     }
... ])
{ "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : [ "CNY", "USD" ], "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") } ] }
{ "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP", "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") } ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7197"), "name" : { "firstName" : "charlie", "lastName" : "gordon" }, "balance" : 40, "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") } ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7198"), "name" : { "firstName" : "david", "lastName" : "wu" }, "balance" : 60, "currency" : [ ], "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") } ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7199"), "name" : { "firstName" : "eddie", "lastName" : "kim" }, "balance" : 100, "currency" : null, "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") } ] }
>

# 将特定日期的外汇汇率写入余额大于50的银行账户

> db.accounts.aggregate([
...     {
...         $lookup: {
...             from: "forex",
...             let: {bal: "$balance"},
...             pipeline: [
...                 { $match:
...                     { $expr:
...                         { $and:
...                             [
...                                 {$eq: ["$data", new Date("2018-12-12")]},
...                                 {$gt: ["$$bal", 50]}
...                             ]
...
...                         }
...
...                     }
...                 }
...             ],
...             as: "forexData"
...         }
...     }
... ])
{ "_id" : ObjectId("634d622b5799d43c905c7195"), "name" : { "firstName" : "alice", "lastName" : "wong" }, "balance" : 20, "currency" : [ "CNY", "USD" ], "forexData" : [ ] }
{ "_id" : ObjectId("634d622b5799d43c905c7196"), "name" : { "firstName" : "bob", "lastName" : "yang" }, "balance" : 20, "currency" : "GBP", "forexData" : [ ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7197"), "name" : { "firstName" : "charlie", "lastName" : "gordon" }, "balance" : 40, "forexData" : [ ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7198"), "name" : { "firstName" : "david", "lastName" : "wu" }, "balance" : 60, "currency" : [ ], "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") } ] }
{ "_id" : ObjectId("634d6ec45799d43c905c7199"), "name" : { "firstName" : "eddie", "lastName" : "kim" }, "balance" : 100, "currency" : null, "forexData" : [ { "_id" : ObjectId("634d73585799d43c905c719b"), "ccy" : "GBP", "rate" : 8.72, "data" : ISODate("2018-12-12T00:00:00Z") } ] }

$group: 对输入文档进行分组

官方文档地址

语法:

{
  $group:
    {
      _id: <expression>, // Group key
      <field1>: { <accumulator1> : <expression1> },
      ...
    }
 }
  • _id: 定义分组规则, 就是通过那个字段进行分组
  • <field1>: 可以使用聚合操作符来定义新字段(求和,平均 ...)
  • $group: 可以返回文档中某一字段的所有(不重复)值
  • 使用聚合操作符计算分组聚合值
  • 使用聚合操作符计算所有文档的聚合值(_id: null), 相当于只分了一组
  • 使用聚合操作符创建数组字段,并将指定字段,push到数组中
# 按照交易货币来分组交易记录(可以返回文档中某一字段的所有(不重复)值)
> db.transactions.find()
{ "_id" : ObjectId("635d46341b8b77383133f015"), "symbol" : "600519", "qty" : 100, "price" : 567.4, "currency" : "CNY" }
{ "_id" : ObjectId("635d46341b8b77383133f016"), "symbol" : "AMZN", "qty" : 1, "price" : 1377.5, "currency" : "USD" }
{ "_id" : ObjectId("635d46341b8b77383133f017"), "symbol" : "AAPL", "qty" : 2, "price" : 150.7, "currency" : "USD" }
>
> db.transactions.aggregate([
...     {
...         $group: {
...             _id: "$currency"
...         }
...     }
... ])
{ "_id" : "USD" }
{ "_id" : "CNY" }

# 使用聚合操作符计算分组聚合值
> db.transactions.aggregate([
...     {
...         $group: {
...             _id: "$currency",
...             totalQty: {$sum: "$qty"},
...             totalNotional: {$sum: { $multiply: ["$price", "$qty"]}},
...             avgPrice: {$avg: "$price"},
...             count: {$sum: 1},
...             maxNotional: {$max: {$multiply: ["$price", "$qty"]}},
...             minNotional: {$min: {$multiply: ["$price", "$qty"]}}
...         }
...     }
... ])
{ "_id" : "USD", "totalQty" : 3, "totalNotional" : 1678.9, "avgPrice" : 764.1, "count" : 2, "maxNotional" : 1377.5, "minNotional" : 301.4 }
{ "_id" : "CNY", "totalQty" : 100, "totalNotional" : 56740, "avgPrice" : 567.4, "count" : 1, "maxNotional" : 56740, "minNotional" : 56740 }

# 使用聚合操作符计算所有文档的聚合值(`_id: null`), 相当于只分了一个组

> db.transactions.aggregate([
...     {
...         $group: {
...             _id: null,
...             totalQty: {$sum: "$qty"},
...             totalNotional: {$sum: { $multiply: ["$price", "$qty"]}},
...             avgPrice: {$avg: "$price"},
...             count: {$sum: 1},
...             maxNotional: {$max: {$multiply: ["$price", "$qty"]}},
...             minNotional: {$min: {$multiply: ["$price", "$qty"]}}
...         }
...     }
... ])
{ "_id" : null, "totalQty" : 103, "totalNotional" : 58418.9, "avgPrice" : 698.5333333333333, "count" : 3, "maxNotional" : 56740, "minNotional" : 301.4 }

# 使用聚合操作符创建数组字段,并将指定字段,push到数组中
> db.transactions.aggregate([
...     {
...         $group: {
...             _id: "$currency",
...             "symbols": {$push: "$symbol"}
...         }
...     }
... ])
{ "_id" : "USD", "symbols" : [ "AMZN", "AAPL" ] }
{ "_id" : "CNY", "symbols" : [ "600519" ] }

$out: 获取聚合管道返回的文档并将它们写入指定的集合。从 MongoDB 4.4 开始,您可以指定输出数据库

官方文档地址

语法:

{ $out: { db: "<output-db>", coll: "<output-collection>" } }
  • 将聚合管道中的文档写入一个新集合中
  • 将聚合管道中的文档写入一个已经存在的集合中,已存在的集合会被清空,然后写入的值,原集合中的数据会丢失,非常危险
  • 如果聚合管道操作遇到错误,管道阶段不会创建新集合或是覆盖已存在的集合内容
    示例:
# 将聚合管道中的文档写入一个新集合中
> db.transactions.aggregate([
...     {
...         $group: {
...             _id: "$currency",
...             "symbols": {$push: "$symbol"}
...         }
...     },
...     {$out: "output"}
... ])
>
> db.output.find()
{ "_id" : "USD", "symbols" : [ "AMZN", "AAPL" ] }
{ "_id" : "CNY", "symbols" : [ "600519" ] }

# 将聚合管道中的文档写入一个已经存在的集合中,已存在的集合会被清空,然后写入的值,原集合中的数据会丢失,非常危险
> db.transactions.aggregate([
...     {
...         $group: {
...             _id: "$symbol",
...             totalNotional: {$sum: {$multiply: ["$price", "$qty"]}}
...         }
...     },
...     {$out: "output"}
... ])
> db.output.find()
{ "_id" : "AMZN", "totalNotional" : 1377.5 }
{ "_id" : "AAPL", "totalNotional" : 301.4 }
{ "_id" : "600519", "totalNotional" : 56740 }
>

聚合操作 <options>

  • 语法: db.<collection>.aggregate(<pipeline>, <options>)
    • <pipeline> 文档定义了操作中使用的聚合管道阶段和聚合操作符
    • <options> 文档声明了一些和聚合操作的参数

allowDiskUse: <boolean: default(false)>

  • 每个聚合管道阶段使用的内存不能超过100MB
  • 如果数据量较大,为了防止聚合管道阶段超出内存上限并抛出错误,可以启用allowDiskUse选项
  • allowDiskUse 启用后,聚合阶段可以在内存容量不足时,将操作数据写入临时文件中
  • 临时文件会被写入 dbPath 下的 _tmp 文件夹,dbPath 的默认值为 /data/db
> db.transactions.aggregate([
...         {
...             $group: {
...                 _id: "$currency",
...                 "symbols": {$push: "$symbol"}
...             }
...         }
...     ],
...     {allowDiskUse: true}
... )
{ "_id" : "CNY", "symbols" : [ "600519" ] }
{ "_id" : "USD", "symbols" : [ "AMZN", "AAPL" ] }

聚合操作的优化

  • 聚合阶段顺序的优化

    • $project + $match, $match 阶段会在 $project 阶段前运行
      
      db.transactions.aggregate([
          {
              $project: {
                  _id: 0, symbol: 1, currency: 1,
                  notional: {$multiply: ["$price", "$qty"]}
              }
          },
          {
              $match: {
                  currency: "USD",
                  notional: {$gt: 1000}
              }
          }
      ])
      
      # 实际执行时,会优化成如下
      db.transactions.aggregate([
          {
              $match: {
                  currency: "USD",
              }
          },
          {
              $project: {
                  _id: 0, symbol: 1, currency: 1,
                  notional: {$multiply: ["$price", "$qty"]}
              }
          },
          {
              $match: {
                  notional: {$gt: 1000}
              }
          }
      ])
      
    • $sort + $match, $match 阶段会在 $sort 阶段之前运行
    • $project + $skip, $skip 阶段会在 $project 阶段之前运行
  • 聚合阶段合并的优化

    • $sort + $limit, 如果两者之间没有夹杂会改变文档数量的聚合阶段, $sort 和 $limit 阶段可以合并
      db.transactions.aggregate([
          {
              $sort: {price: 1}
          },
          {
              $project: {
                  _id: 0, symbol: 1, currency: 1,
                  notional: {$multiply: ["$price", "$qty"]}
              }
          },
          {
              $limit: 2
          }
      ])
      
      # 实际执行
      db.transactions.aggregate([
          {
              $sort: {price: 1} + $limit: 2   # 伪代码
          },
          {
              $project: {
                  _id: 0, symbol: 1, currency: 1,
                  notional: {$multiply: ["$price", "$qty"]}
              }
          }
      ])
      
    • $limit + $limit $skip + $skip $match + $match: 连续 \(limit,\)skip,$match 阶段排列在一起时,可以合并为一个阶段
      { $limit: 10} + { $limit: 5} = { $limit: 5}
      { $skip: 10} + { $skip: 5} = { $skip: 15}
      
      # $match 合并
      { $match: {currency: "USD"}} 
      { $match: {qty: 1}
      
      {
          $match: {
              $and: [
                  {"currency": "USD"},
                  {"qty": 1}
              ]
          }
      }
      
    • $lookup + $unwind, 连续排列在一起的 $lookup 和 $unwind 阶段,如果 $unwind 应用在 $lookup 阶段创建的 as 字段上,则两者可以合并
      db.accounts.aggregate([
          {
              $lookup: {
                  from: "forex",
                  localField: "currency",
                  foreignField: "ccy",
                  as: "forexData"
              }
          },
          {
              $unwind: "$forexData"
          }
      ])
      
      db.accounts.aggregate([
          {
              $lookup: {
                  from: "forex",
                  localField: "currency",
                  foreignField: "ccy",
                  as: "forexData"
              }
          }
          +                  // 伪码,合并后可以节省临时性文档的创建和更改
          {
              $unwind: "$forexData"
          }
      ])
      
posted @ 2022-10-30 20:00  郭赫伟  阅读(67)  评论(0编辑  收藏  举报