my mongo collection : impressions has docs in following format:-
{ _uid: 10, "impressions": [ { "pos": 6, "id": 123, "service": "furniture" }, { "pos": 0, "id": 128, "service": "electronics" }, { "pos": 2, "id": 127, "service": "furniture" }, { "pos": 2, "id": 125, "service": "electronics" }, { "pos": 10, "id": 124, "service": "electronics" } ] }, { _uid: 11, "impressions": [ { "pos": 1, "id": 124, "service": "furniture" }, { "pos": 10, "id": 124, "service": "electronics" }, { "pos": 1, "id": 123, "service": "furniture" }, { "pos": 21, "id": 122, "service": "furniture" }, { "pos": 3, "id": 125, "service": "electronics" }, { "pos": 10, "id": 121, "service": "electronics" } ] }, . . . . . each of doc in collection has "impressions" key array of dictionaries. in each dictionary "id" id of entity, "service" service type , "pos"is position of item in search page results. aim find out count of number of impressions every "id" in each category. above data "service" == "furniture", want have aggregation results:-
[ {"id": 123,"impressions_count":2}, {"id": 127,"impressions_count":1}, {"id": 124,"impressions_count":1}, {"id": 122,"impressions_count":1} ] i tried aggregate on "id" using mapreduce via following function in python script
def fetch_impressions(): try: imp_collection = get_mongo_connection('impressions') map = code(""" function(){ for( x in this.impressions){ var flat_id = x['id']; var service_type = x['service'] emit(parseint(flat_id),1); } }; """) """) reduce = code(""" function(a,b){ return array.sum(b); }; """) results = imp_collection.map_reduce(map, reduce, 'aggregation_result') return results except exception e: raise exception(e) but i'm getting results none, because of faulty map function.i'm new javascript , mongo kindly help!
you can use aggregation framework
import pymongo conn = pymongo.mongoclient() db = conn.test col = db.collection doc in col.aggregate([{'$unwind': '$impressions'}, {'$match': {'impressions.service': 'furniture'}}, {'$group': {'_id': '$impressions.id', 'impressions_count': {'$sum': 1}}}, ]): print(doc) or more efficiently using $map , $setdifference operators.
col.aggregate([ { "$project": { "impressions": {"$setdifference": [{ "$map": { "input": "$impressions", "as": "imp", "in": { "$cond": { "if": { "$eq": [ "$$imp.service", "furniture" ] }, "then": "$$imp.id", "else": 0 }}}}, [0]]}}}, { "$unwind": "$impressions" }, { "$group": { "_id": "$impressions", "impressions_count": { "$sum": 1 }}} ]) which yields:
{'_id': 122.0, 'impressions_count': 1} {'_id': 124.0, 'impressions_count': 1} {'_id': 127.0, 'impressions_count': 1} {'_id': 123.0, 'impressions_count': 2}
Comments
Post a Comment