Group records by month and count them - Mongoose, nodeJs, mongoDb

二次信任 提交于 2021-02-02 10:00:25

问题


I need to query in database (mongoose) and get back the number of sales made each month of the year for one particular product (within one year period).

I am new to node and mongoDb and I have come with a 'dummy' solution where I query in database and get back all the results for one product and than I use 3 loops to split the results in months but I think that it uses more resources than it should and it will use even more if it gets filled with more data, so I need help making a database query to solve that problem.

Here is the part of my code that does that:

Assuming that the results are required from 17-02-2020 to 17-02-2019, I know that if it is from January to December it will go into one loop for nothing but I have another part of code which controls if it requires 1 year result ex: 01-01-2020 to 31-12-2020 it will not execute the code below, that code I am talking about has only one loop lol.

let startTime = performance.now();
Sales.find({productId:req.params.productId, "created_at": { "$gte": oneYearFromNow, "$lte": dateNow}}).then(result => {
        let newMonthsArray= new Array();
        let monthsArray = ['January','February','March','April','May','June','July','August','September','October', 'November','December'];
        let months = {};
        for(let i=parseInt(req.params.startDate.substring(5,7))-1; i<12; i++){
            let year = parseInt(req.params.startDate.substring(0,4))-1;
            let month = parseInt(req.params.startDate.substring(5,7));
            newMonth = monthsArray[i] + '-' + year;
            newMonthsArray.push(newMonth);
            months[newMonth] = 0; 
        }

        for(let i=0; i<parseInt(req.params.startDate.substring(5,7)); i++){
            let year = parseInt(req.params.startDate.substring(0,4));
            let month = parseInt(req.params.startDate.substring(5,7));
            newMonth = monthsArray[i] + '-' + year;
            newMonthsArray.push(newMonth);
            months[newMonth] = 0; 
          }

        for(i=0; i<result.length; i++){
            let getDate = result[i].created_at.toISOString();
            let year = getDate.substring(0,4);
            let month = parseInt(getDate.substring(5,7));
            let monthName = monthsArray[month-1];
            let date =  monthName + '-' + year;
            let count = Number(months[date]) + 1;
            months[date] = count;
        }

        let endTime = performance.now();
        res.status(200).send({Data: months, 'Execution time': endTime - startTime + ' mls'});
    });

I hope everything is clear, I think I need to use aggregation but I'm not sure how!

Sample data:

{
    {
        "created_at": "2020-04-04T17:02:07.832Z",
        "updated_at": "2020-04-04T17:02:07.832Z",
        "_id": "5e88bdcda3080736ac70f9c1",
        "price": 16800,
        "productId": "5e88bf90b9e5102ae46b154e",
        "__v": 0
    },
    {
        "created_at": "2020-04-04T17:02:07.832Z",
        "updated_at": "2020-04-04T17:02:07.832Z",
        "_id": "5e88bdf9a3080736ac70f9c2",
        "price": 12800,
        "productId": "5e88bf90b9e5102ae46b154e",
        "__v": 0
    }
}

Desired result:


回答1:


Here is an aggregation query which returns the expected output. Some sample documents:

[
  { created_at: "2020-04-04T17:02:07.832Z", productId: 1 },
  { created_at: "2020-02-01T17:02:07.832Z", productId: 1 },
  { created_at: "2020-02-19T17:02:07.832Z", productId: 1 },
  { created_at: "2019-05-22T17:02:07.832Z", productId: 1 },
  { created_at: "2020-01-15T17:02:07.832Z", productId: 1 },
  { created_at: "2020-01-30T17:02:07.832Z", productId: 2 },  // not selected
  { created_at: "2019-03-15T17:02:07.832Z", productId: 1 }   // not selected
]

The input variables and the aggregation:

let TODAY = "2020-04-06T23:59:59"
let YEAR_BEFORE = "2019-04-07T00:00:00"
let req = { params: { productId: 1 } }
const monthsArray = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December' ]

db.sales.aggregate( [
  { 
      $match: { 
          productId: req.params.productId, 
          created_at: { $gte: YEAR_BEFORE, $lte: TODAY }
      }
  },
  { 
      $group: {
          _id: { "year_month": { $substrCP: [ "$created_at", 0, 7 ] } }, 
          count: { $sum: 1 }
      } 
  },
  {
      $sort: { "_id.year_month": 1 }
  },
  { 
      $project: { 
          _id: 0, 
          count: 1, 
          month_year: { 
              $concat: [ 
                 { $arrayElemAt: [ monthsArray, { $subtract: [ { $toInt: { $substrCP: [ "$_id.year_month", 5, 2 ] } }, 1 ] } ] },
                 "-", 
                 { $substrCP: [ "$_id.year_month", 0, 4 ] }
              ] 
          }
      } 
  },
  { 
      $group: { 
          _id: null, 
          data: { $push: { k: "$month_year", v: "$count" } }
      } 
  },
  {
      $project: { 
          data: { $arrayToObject: "$data" }, 
          _id: 0 
      } 
  }
] )

The output:

{
        "data" : {
                "May-2019" : 1,
                "January-2020" : 1,
                "February-2020" : 2,
                "April-2020" : 1
        }
}



Here is the updated aggregation.

Note the following changes: (1) new constants FIRST_MONTH and LAST_MONTH, (2) changed the monthsArray variable name to MONTHS_ARRAY, (3) added 3 new pipeline stages.

The first two pipeline stages (new) build a template with all the months (covering the from and to input date range). The third new stage merges the template with the output data derived from the previous aggregation.

const FIRST_MONTH = 1
const LAST_MONTH = 12
const MONTHS_ARRAY = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December' ]

let TODAY = "2020-04-06T23:59:59"
let YEAR_BEFORE = "2019-04-07T00:00:00"

db.sales.aggregate( [
  { 
      $match: { 
          productId: req.params.productId, 
          created_at: { $gte: YEAR_BEFORE, $lte: TODAY }
      }
  },
  { 
      $group: {
          _id: { "year_month": { $substrCP: [ "$created_at", 0, 7 ] } }, 
          count: { $sum: 1 }
      } 
  },
  {
      $sort: { "_id.year_month": 1 }
  },
  { 
      $project: { 
          _id: 0, 
          count: 1, 
          month_year: { 
              $concat: [ 
                 { $arrayElemAt: [ monthsArray, { $subtract: [ { $toInt: { $substrCP: [ "$_id.year_month", 5, 2 ] } }, 1 ] } ] },
                 "-", 
                 { $substrCP: [ "$_id.year_month", 0, 4 ] }
              ] 
          }
      } 
  },
  { 
      $group: { 
          _id: null, 
          data: { $push: { k: "$month_year", v: "$count" } }
      } 
  },
  { 
      $addFields: { 
          start_year: { $substrCP: [ YEAR_BEFORE, 0, 4 ] }, 
          end_year: { $substrCP: [ TODAY, 0, 4 ] },
          months1: { $range: [ { $toInt: { $substrCP: [ YEAR_BEFORE, 5, 2 ] } }, { $add: [ LAST_MONTH, 1 ] } ] },
          months2: { $range: [ FIRST_MONTH, { $add: [ { $toInt: { $substrCP: [ TODAY, 5, 2 ] } }, 1 ] } ] }
      } 
  },
  { 
      $addFields: { 
          template_data: { 
              $concatArrays: [ 
                  { $map: { 
                       input: "$months1", as: "m1",
                       in: {
                           count: 0,
                           month_year: { 
                               $concat: [ { $arrayElemAt: [ MONTHS_ARRAY, { $subtract: [ "$$m1", 1 ] } ] }, "-",  "$start_year" ] 
                           }                                            
                       }
                  } }, 
                  { $map: { 
                       input: "$months2", as: "m2",
                       in: {
                           count: 0,
                           month_year: { 
                               $concat: [ { $arrayElemAt: [ MONTHS_ARRAY, { $subtract: [ "$$m2", 1 ] } ] }, "-",  "$end_year" ] 
                           }                                            
                       }
                  } }
              ] 
         }
      }
  },
  { 
      $addFields: { 
          data: { 
             $map: { 
                 input: "$template_data", as: "t",
                 in: {   
                     k: "$$t.month_year",
                     v: { 
                         $reduce: { 
                             input: "$data", initialValue: 0, 
                             in: {
                                 $cond: [ { $eq: [ "$$t.month_year", "$$this.k"] },
                                              { $add: [ "$$this.v", "$$value" ] },
                                              { $add: [ 0, "$$value" ] }
                                 ]
                             }
                         } 
                     }
                 }
              }
          }
      }
  },
  {
      $project: { 
          data: { $arrayToObject: "$data" }, 
          _id: 0 
      } 
  }
] )

The output:

{
        "data" : {
                "April-2019" : 0,
                "May-2019" : 1,
                "June-2019" : 0,
                "July-2019" : 0,
                "August-2019" : 0,
                "September-2019" : 0,
                "October-2019" : 0,
                "November-2019" : 0,
                "December-2019" : 0,
                "January-2020" : 1,
                "February-2020" : 2,
                "March-2020" : 0,
                "April-2020" : 1
        }
}



回答2:


Yes, you are right that you need to use aggregation. This should work:

// NOTE: It is important that the the month strings start from the second element in this array  
// becuase the $month operator returns month values as numbers from 1 to 12.
const monthStrings = ["", "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"];
Sales.aggregate([
  {
    $match: {
      // Match only salses with a specific productId
      productId: req.params.productId,
      // Match only salses that fufils the date constraint below
      $expr: {
        $and: [
          { $gt: ["$created_at", oneYearFromNow] },
          { $lt: ["$created_at", dateNow] }
        ],
      }
    }
  },
  {
    $group: {
       // Group by both month and year of the sale
      _id: {
        month: { $month: "$created_at" },
        year: { $year: "$created_at" },
      },
      // Count the no of sales
      count: {
        $sum: 1
      }
    }
  },
  // Adding a project here to just to format the group date better
  {
    $project: {
      _id: {
        $concat: [
          {
            $arrayElemAt: [
              monthStrings,
              "$_id.month"
            ]
          },
          "-",
          "$_id.year"
        ]
      },
      count: 1,
    }
  }
])

You can test the aggregation pipeline in this playground.

The output would be an array of objects like this:

{ "_id": *,  "count": * }

Where the value of _id is a string in the format <month>-<year>(e.g April-2019) representing the month and year of sale. The count value is the number of sales that happened that month/year.

I used a combination of $match, $group and $project stages in the aggregation pipeline, you can find more details on how the stages work here, here, and here respectively.



来源:https://stackoverflow.com/questions/61051225/group-records-by-month-and-count-them-mongoose-nodejs-mongodb

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!