MongoDB - 聚合运算:group(), aggregate(), mapReduce()
程序员文章站
2022-05-08 16:09:38
...
1 区别
- group() - 分组统计
aggregate() - 简单聚合
mapReduce() - 强大统计 - group 需要手写聚合函数的业务逻辑
- group 不支持分布式运算
分布式可以用 aggregate (version 2.2)
或者 mapReduce (version 2.4)
2 语法
-
group()
db.collection_name.group({ key: {key1: 1, key2: 2}, //分组字段 cond: {}, //查询条件 reduce: function(curr, result) { //聚合函数 }, initial: {}, //初始化 finalize: function() { //统计一组后的回调函数 } })
-
aggregate()
db.collection_name.aggregate([ //AGGREGATE_OPERATION {$match: {}}, {$group: {}}, {$sort: {}}, {$limit: {}} ]);
-
mapReduce()
使用 MapReduce 要实现两个函数 Map 函数和 Reduce 函数:Map 函数调用 emit(key, value), 遍历 collection 中所有的记录, 将 key 与 value 传递给 Reduce 函数进行处理。db.collection_name.mapReduce( function() { emit(key,value); }, //map 函数 function(key,values) { return reduceFunction }, //reduce 函数 { out: collection, //统计结果存放集合 query: document, //筛选条件 sort: document, limit: number } )
示例:在集合 orders 中查找 status:“A” 的数据,并根据 cust_id 来分组,并计算 amount 的总和
3 实例
3.1 count
- 计算每个栏目下的商品个数
-
mysql
select cat_id.count(*) from goods group by cat_id;
-
group
db.goods.group({ key: {cat_id: 1}, cond: {}, reduce: function(curr, result) { result.total += 1; }, initial: {total: 0} });
-
aggregate
db.goods.aggregate([ {$group: {_id: "$cat_id", total: {$sum: 1}}} ]);
-
mapReduce
db.goods.mapReduce( function() { emit(this.cat_id, 1) }, function(cat_id, total) { return Array.sum(total); }, {out: 'res'} );
- 查询每个栏目下价格大于50元的商品个数
- mysql
... where price>50 ...
- group
cond: {price: {$gt: 50}},
- aggregate
{$match: {price: {$gt: 50}}}, {$group: ...}
- 查询每个栏目下 价格大于50元的商品个数 大于等于3的栏目
- mysql
select cat_id.count(*) as cnt from goods where price > 50 group by cat_id having cnt >= 3;
- aggregate
db.goods.aggregate([ {$match: {price: {$gt: 50}}}, {$group: {_id: "$cat_id", total: {$sum: 1}}}, {$match: {total: {$gte: 3}}} ]);
3.2 sum
- 计算每个栏目下的商品库存量
-
mysql
select cat_id.sum(goods_number) from goods group by cat_id;
-
group
db.goods.group({ key: {cat_id: 1}, cond: {}, reduce: function(curr, result) { result.num += curr.goods_number; }, initial: {num: 0} });
-
aggregate
db.goods.aggregate([ {$group: {_id: "$cat_id", total: {$sum: "$goods_number"}}} ]);
-
mapReduce
var map = function() { emit(this.cat_id, this.goods_number); } var reduce = function(cat_id, numbers) { return Array.sum(numbers); } db.goods.mapReduce(map, reduce, {out: 'res'}); db.res.find();
- 查询 每个栏目下的商品库存量 前三名
- mysql
... order by total limit 3;
- aggregate
{$sort: {total: 1}}, {$limit: 3}
3.3 max, min
查询每个栏目下的最贵的商品价格
- mysql
select cat_id.max(price) from goods group by cat_id;
- group
db.goods.group({ key: {cat_id: 1}, cond: {}, reduce: function(curr, result) { if (curr.price > result.max) { result.max = curr.price; } }, initial: {max: 0} });
3.4 avg
- 查询每个栏目下商品的平均价格
-
mysql
select cat_id.avg(price) as avg from goods group by cat_id;
-
group
db.goods.group({ key: {cat_id: 1}, cond: {}, reduce: function(curr, result) { if (curr.price > result.max) { result.total += 1; result.sum += curr.price; } }, initial: {total: 0, sum: 0}, finalize: function(){ result.avg = result.sum / result.total; } });
-
aggregate
db.goods.aggregate([ {$group: {_id: "cat_id", avg: {$avg: "$price"}}}, ]);
-
mapReduce
var map = function() { emit(this.cat_id, this.price); } var reduce = function(cat_id, values) { return Array.avg(values); } db.goods.mapReduce(map, reduce, {out: 'res'}); db.res.find();
- 查询每个栏目下商品的平均价格,并从高到低排序
- mysql
select cat_id.avg(price) as avg from goods group by cat_id order by avg desc;
- aggregate
db.goods.aggregate([ {$group: {_id: "cat_id", avg: {$avg: "$price"}}}, {$sort: {avg: -1}} ]);