Facets: Introduction
Facets: Single Facet Query
db.companies.findOne()
db.companies.createIndex({'description': 'text', 'overview': 'text'})
db.companies.aggregate([{$match: {$text :{ $search: 'network' }}}])
// $sortByCount
mongos> db.companies.aggregate([
... {"$match": { "$text": {"$search": "network"} } },
... {"$sortByCount": "$category_code"}] )
{ "_id" : "web", "count" : 788 }
{ "_id" : "software", "count" : 463 }
{ "_id" : "network_hosting", "count" : 306 }
{ "_id" : "games_video", "count" : 276 }
{ "_id" : "mobile", "count" : 264 }
{ "_id" : "advertising", "count" : 205 }
{ "_id" : "enterprise", "count" : 151 }
{ "_id" : "other", "count" : 150 }
{ "_id" : null, "count" : 138 }
{ "_id" : "ecommerce", "count" : 101 }
{ "_id" : "hardware", "count" : 89 }
{ "_id" : "consulting", "count" : 88 }
{ "_id" : "public_relations", "count" : 86 }
{ "_id" : "security", "count" : 55 }
{ "_id" : "semiconductor", "count" : 41 }
{ "_id" : "search", "count" : 34 }
{ "_id" : "social", "count" : 30 }
{ "_id" : "cleantech", "count" : 17 }
{ "_id" : "news", "count" : 14 }
{ "_id" : "biotech", "count" : 13 }
Type "it" for more
mongos> db.companies.aggregate([
... {"$match": { "$text": {"$search": "network"} } } ,
... {"$unwind": "$offices"},
... {"$match": { "offices.city": {"$ne": ""} }} ,
... {"$sortByCount": "$offices.city"}] )
{ "_id" : "San Francisco", "count" : 245 }
{ "_id" : "New York", "count" : 218 }
{ "_id" : "London", "count" : 133 }
{ "_id" : "Los Angeles", "count" : 66 }
{ "_id" : "Palo Alto", "count" : 62 }
{ "_id" : "Sunnyvale", "count" : 58 }
{ "_id" : "San Jose", "count" : 53 }
{ "_id" : "Santa Clara", "count" : 49 }
{ "_id" : "Seattle", "count" : 44 }
{ "_id" : "Mountain View", "count" : 41 }
{ "_id" : "Paris", "count" : 41 }
{ "_id" : "Chicago", "count" : 36 }
{ "_id" : "Austin", "count" : 34 }
{ "_id" : "Boston", "count" : 33 }
{ "_id" : "San Diego", "count" : 33 }
{ "_id" : "Toronto", "count" : 32 }
{ "_id" : "Cambridge", "count" : 31 }
{ "_id" : "Santa Monica", "count" : 28 }
{ "_id" : "Atlanta", "count" : 24 }
{ "_id" : null, "count" : 23 }
Type "it" for more
Single query facets are supported by the new aggregation pipeline stage $sortByCount
.
Facets: Manual Buckets
Histogram
// create manual buckets using $ bucket
db.companies.aggregate( [
{ "$match": {"founded_year": {"$gt": 1980}, "number_of_employees": {"$ne": null}} },
{"$bucket": {
"groupBy": "$number_of_employees",
"boundaries": [ 0, 20, 50, 100, 500, 1000, Infinity ]}
}] )
// reproduce error message for non matching documents
db.coll.insert({ x: "a" });
db.coll.aggregate([{ $bucket: {groupBy: "$x", boundaries: [0, 50, 100]}}])
// set `default` option to collect documents that do not match boundaries
db.companies.aggregate( [
{ "$match": {"founded_year": {"$gt": 1980}}},
{ "$bucket": {
"groupBy": "$number_of_employees",
"boundaries": [ 0, 20, 50, 100, 500, 1000, Infinity ],
"default": "Other" }
}] )
// reproduce error message for inconsitent boundaries datatype
db.coll.aggregate([{ $bucket: {groupBy: "$x", boundaries: ["a", "b", 100]}}])
// set `output` option for $bucket stage
db.companies.aggregate([
{ "$match":
{"founded_year": {"$gt": 1980}}
},
{ "$bucket": {
"groupBy": "$number_of_employees",
"boundaries": [ 0, 20, 50, 100, 500, 1000, Infinity ],
"default": "Other",
"output": {
"total": {"$sum":1},
"average": {"$avg": "$number_of_employees" },
"categories": {"$addToSet": "$category_code"}
}
}
}
]
)
Facets: Auto Buckets
#!/bin/sh
// generate buckets automatically with $bucktAuto stage
db.companies.aggregate([
{ "$match": {"offices.city": "New York" }},
{"$bucketAuto": {
"groupBy": "$founded_year",
"buckets": 5
}}])
//set `output` option for $bucketAuto
db.companies.aggregate([
{ "$match": {"offices.city": "New York" }},
{"$bucketAuto": {
"groupBy": "$founded_year",
"buckets": 5,
"output": {
"total": {"$sum":1},
"average": {"$avg": "$number_of_employees" } }}}
])
// default $buckeAuto behaviour
for(i=1; i <= 1000; i++) { db.series.insert( {_id: i} ) };
db.series.aggregate(
{$bucketAuto:
{groupBy: "$_id", buckets: 5 }
})
// generate automatic buckets using granularity numerical series R20
db.series.aggregate(
{$bucketAuto:
{groupBy: "$_id", buckets: 5 , granularity: "R20"}
})
Auto Bucketing will, given a number of buckets, try to distribute documents evenly across buckets. Auto Bucketing will adhere bucket boundaries to a numerical series set by the granularity option
Facets: Multiple Facets
// render several different facets using $facet stage
db.companies.aggregate( [
{"$match": { "$text": {"$search": "Databases"} } },
{ "$facet": {
"Categories": [{"$sortByCount": "$category_code"}],
"Employees": [
{ "$match": {"founded_year": {"$gt": 1980}}},
{"$bucket": {
"groupBy": "$number_of_employees",
"boundaries": [ 0, 20, 50, 100, 500, 1000, Infinity ],
"default": "Other"
}}],
"Founded": [
{ "$match": {"offices.city": "New York" }},
{"$bucketAuto": {
"groupBy": "$founded_year",
"buckets": 5 }
}
]
}}]).pretty()
-
The $facet stage allows several sub-pipelines to be executed to produce multiple facets.
-
The $facet stage allows the applications to generate several different facets with one single database request. The $facet stage allows other stages to be included on the sub-pipelines, except for:
-
$facet
-
$out
-
$geoNear
-
$indexStats
-
$collStats Also, the sub-pipelines, defined for each individual facet, cannot share their output accross other parallel facets. Each sub-pipeline will receive the same input data set but does not share the result dataset with parallel facets.
Lab - $facets
Problem:
How many movies are in both the top ten highest rated movies according to the imdb.rating and the metacritic fields? We should get these results with exactly one access to the database.
db.movies.aggregate([
{$project:{"imdb.rating":1, metacritic:1}}
])
db.movies.aggregate([
{
$match: {
metacritic: { $gte: 0 },
"imdb.rating": { $gte: 0 }
}
},
{
$project: {
_id: 0,
metacritic: 1,
imdb: 1,
title: 1
}
},
{
$facet: {
top_metacritic: [
{
$sort: {
metacritic: -1,
title: 1
}
},
{
$limit: 10
},
{
$project: {
title: 1
}
}
],
top_imdb: [
{
$sort: {
"imdb.rating": -1,
title: 1
}
},
{
$limit: 10
},
{
$project: {
title: 1
}
}
]
}
},
{
$project: {
movies_in_both: {
$setIntersection: ["$top_metacritic", "$top_imdb"]
}
}
}
])
The $sortByCount
Stage
// performing a group followed by a sort to rank occurence
db.movies.aggregate([
{
"$group": {
"_id": "$imdb.rating",
"count": { "$sum": 1 }
}
},
{
"$sort": { "count": -1 }
}
])
// sortByCount is equivalent to the above. In fact, if you execute this pipeline
// with { explain: true } you will see that it is transformed to the above!
db.movies.aggregate([
{
"$sortByCount": "$imdb.rating"
}
])