Extended Reference Pattern
- Joins are expensive
- so store some must have data into the child to avoid joins.
- This creates data duplicate, yes, but it's an expense we can pay in exchange for performance.
- Duplicated data should not change frequently in nature.
- If data needs to be updated, int most cases real-time update is not necessary, so period data sync is more than enough.
var reshape_review_docs_pipeline = [
{
$lookup: {
from: "books",
localField: "product_id",
foreignField: "product_id",
as: "product_info",
},
},
{
$unwind: {
path: "$product_info",
includeArrayIndex: "string",
preserveNullAndEmptyArrays: false,
},
},
{
$project: {
_id: "$_id",
"product.product_id": "$product_id",
"product.product_type":
"$product_info.product_type",
"product.title": "$product_info.title",
"review.user_id": "$user_id",
"review.reviewTitle": "$reviewTitle",
"review.reviewBody": "$reviewBody",
"review.date": "$date",
"review.stars": "$stars",
},
},
{
$merge: {
into: "reviews",
on: "_id",
whenMatched: "replace",
whenNotMatched: "discard",
},
},
]
db.reviews.aggregate(reshape_review_docs_pipeline)