使用pymongo时Map reduce执行失败,但在mongo shell中成功

0 投票
2 回答
1314 浏览
提问于 2025-04-16 23:17

我遇到了一个关于MongoDB的map reduce的问题,我在MongoDB的命令行中可以成功执行这个操作,但当我尝试用pymongo来执行时,出现了以下错误。

下面这个json是我的集合的JSON格式示例。

{ "_id" : ObjectId( "4e41661ecacbd10e00012600" ),
  "timestamp" : "20110809",
  "variants" : { "407" : { "number_of_ad_clicks" : 101,
      "number_of_search_keywords" : 20,
      "total_duration" : 4,
      "os" : { "os_2" : 2,
        "os_1" : 1,
        "os_0" : 0 },
      "countries" : { "ge" : 2,
        "ca" : 7,
        "fr" : 2,
        "uk" : 5,
        "us" : 2 },
      "screen_resolutions" : { "(320, 240)" : 5,
        "(640, 480)" : 7,
        "(1024, 960)" : 9,
        "(1280, 768)" : 6 },
      "widgets" : { "widget_1" : 1,
        "widget_0" : 0 },
      "languages" : { "ua_uk" : 8,
        "ca_en" : 6,
        "ca_fr" : 8,
        "us_en" : 1 },
      "search_keywords" : { "search_keyword_15" : 15,
        "search_keyword_14" : 14,
        "search_keyword_17" : 17,
        "search_keyword_16" : 16,
        "search_keyword_11" : 11,
        "search_keyword_10" : 10,
        "search_keyword_13" : 13,
        "search_keyword_12" : 12,
        "search_keyword_19" : 19,
        "search_keyword_18" : 18,
        "search_keyword_9" : 9,
        "search_keyword_8" : 8,
        "search_keyword_5" : 5,
        "search_keyword_4" : 4,
        "search_keyword_7" : 7,
        "search_keyword_6" : 6,
        "search_keyword_1" : 1,
        "search_keyword_3" : 3,
        "search_keyword_2" : 2 },
      "number_of_pageviews" : 38,
      "browsers" : { "browser_4" : 4,
        "browser_0" : 0,
        "browser_1" : 1,
        "browser_2" : 2,
        "browser_3" : 3 },
      "keywords" : { "keyword_5" : 5,
        "keyword_4" : 4,
        "keyword_1" : 1,
        "keyword_0" : 0,
        "keyword_3" : 3,
        "keyword_2" : 2 },
      "number_of_keyword_clicks" : 205,
      "number_of_visits" : 91 },
    "306" : { "number_of_ad_clicks" : 29,
      "number_of_search_keywords" : 4,
      "total_duration" : 4,
      "os" : { "os_2" : 2,
        "os_1" : 1,
        "os_0" : 0 },
      "countries" : { "ge" : 7,
        "ca" : 7,
        "fr" : 6,
        "uk" : 1,
        "us" : 3 },
      "screen_resolutions" : { "(320, 240)" : 2,
        "(640, 480)" : 1,
        "(1024, 960)" : 9,
        "(1280, 768)" : 5 },
      "widgets" : { "widget_1" : 1,
        "widget_0" : 0 },
      "languages" : { "ua_uk" : 2,
        "ca_en" : 8,
        "ca_fr" : 5,
        "us_en" : 4 },
      "search_keywords" : { "search_keyword_1" : 1,
        "search_keyword_3" : 3,
        "search_keyword_2" : 2 },
      "number_of_pageviews" : 35,
      "browsers" : { "browser_4" : 4,
        "browser_0" : 0,
        "browser_1" : 1,
        "browser_2" : 2,
        "browser_3" : 3 },
      "keywords" : { "keyword_5" : 5,
        "keyword_4" : 4,
        "keyword_1" : 1,
        "keyword_0" : 0,
        "keyword_3" : 3,
        "keyword_2" : 2 },
      "number_of_keyword_clicks" : 18,
      "number_of_visits" : 57 },
    "408" : { "number_of_ad_clicks" : 180,
      "number_of_search_keywords" : 41,
      "total_duration" : 7,
      "os" : { "os_2" : 2,
        "os_1" : 1,
        "os_0" : 0 },
      "countries" : { "ge" : 3,
        "ca" : 6,
        "fr" : 3,
        "uk" : 9,
        "us" : 9 },
      "screen_resolutions" : { "(320, 240)" : 9,
        "(640, 480)" : 9,
        "(1024, 960)" : 5,
        "(1280, 768)" : 10 },
      "widgets" : { "widget_1" : 1,
        "widget_0" : 0 },
      "languages" : { "ua_uk" : 3,
        "ca_en" : 2,
        "ca_fr" : 10,
        "us_en" : 7 },
      "search_keywords" : { "search_keyword_37" : 37,
        "search_keyword_36" : 36,
        "search_keyword_28" : 28,
        "search_keyword_29" : 29,
        "search_keyword_24" : 24,
        "search_keyword_25" : 25,
        "search_keyword_26" : 26,
        "search_keyword_27" : 27,
        "search_keyword_20" : 20,
        "search_keyword_21" : 21,
        "search_keyword_22" : 22,
        "search_keyword_23" : 23,
        "search_keyword_39" : 39,
        "search_keyword_38" : 38,
        "search_keyword_40" : 40,
        "search_keyword_15" : 15,
        "search_keyword_14" : 14,
        "search_keyword_17" : 17,
        "search_keyword_16" : 16,
        "search_keyword_11" : 11,
        "search_keyword_10" : 10,
        "search_keyword_13" : 13,
        "search_keyword_12" : 12,
        "search_keyword_33" : 33,
        "search_keyword_32" : 32,
        "search_keyword_31" : 31,
        "search_keyword_30" : 30,
        "search_keyword_19" : 19,
        "search_keyword_18" : 18,
        "search_keyword_35" : 35,
        "search_keyword_34" : 34,
        "search_keyword_9" : 9,
        "search_keyword_8" : 8,
        "search_keyword_5" : 5,
        "search_keyword_4" : 4,
        "search_keyword_7" : 7,
        "search_keyword_6" : 6,
        "search_keyword_1" : 1,
        "search_keyword_3" : 3,
        "search_keyword_2" : 2 },
      "number_of_pageviews" : 25,
      "browsers" : { "browser_4" : 4,
        "browser_0" : 0,
        "browser_1" : 1,
        "browser_2" : 2,
        "browser_3" : 3 },
      "keywords" : { "keyword_5" : 5,
        "keyword_4" : 4,
        "keyword_1" : 1,
        "keyword_0" : 0,
        "keyword_3" : 3,
        "keyword_2" : 2 },
      "number_of_keyword_clicks" : 15,
      "number_of_visits" : 19 } },
  "site_name" : "radiotiempo.com",
  "number_of_variants" : 3 }

这是我的map reduce代码。

map = function(){
    emit(1, {variants:this.variants});
}

reduce = function(key, vals) {
    var returnValue = { 
      clicks: 0, 
     };
    for(var j = 0 ; j < vals.length; j++){
       for(var i = 0 ; i < variant_ids.length; i++){
          try{
             returnValue.clicks += vals[j].variants[variant_ids[i]].number_of_ad_clicks;
          }catch(err)
          {}
       }
    }
    return returnValue;
}


function emit(k, v) {
    print("emit");
    print("  k:" + k + " v:" + tojson(v));
}

res = db.variant_daily_collection.mapReduce(map, reduce, {"scope": {'variant_ids': ['4519','4518']}, "out" : "myoutput", "query":{"site_name": {'$in':['julie2.com','julie3.com']}, 'timestamp': {'$gte':'20110601','$lte':'20110603'}}})
db.myoutput.find()

接下来是我使用pymongo库的Python代码。

map = Code("function () {"
                   "    emit(1, {variants:this.variants});"
                   "}")
        reduce = Code("function (key, values) {"
                      "   var result = {"
                      "       clicks: 0"
                      "   };"
                      "   for (var i = 0; i < values.length; i++) {"
                      "       for(var j = 0 ; j < variant_ids.length; j++){"
                      "           result.clicks += values[i].variants[variant_ids[j]].number_of_ad_clicks;"
                      "       }"
                      "    }"
                      "    return result;"
                      "}")

以下是pymongo返回的错误信息。

In [103]:  reduce = Code("function (key, valudb.variant_daily_collection.map_reduce(map, reduce, out = 'output',full_response = True, fields = {"scope": {'variant_ids': ['398']}, "query":{"site_name": 'routeplanner.net', 'timestamp': '20110809'}} )                                                                                                                   .....: ---------------------------------------------------------------------------OperationFailure                          Traceback (most recent call last)/workspace/construction/<ipython console> in <module>()/workspace/construction/.ve/lib/python2.7/site-packages/pymongo-2.0-py2.7-macosx-10.7-intel.egg/pymongo/collection.py in map_reduce(self, map, reduce, out, merge_output, reduce_output, full_response, **kwargs)
   1031         response = self.__database.command("mapreduce", self.__name,
   1032                                            map=map, reduce=reduce,
-> 1033                                            out=out_conf, **kwargs)
   1034 
   1035         if full_response or not response.get('result'):

/workspace/construction/.ve/lib/python2.7/site-packages/pymongo-2.0-py2.7-macosx-10.7-intel.egg/pymongo/database.py in command(self, command, value, check, allowable_errors, **kwargs)
    338             msg = "command %r failed: %%s" % command
    339             helpers._check_command_response(result, self.connection.disconnect,
--> 340                                             msg, allowable_errors)
    341 
    342         return result

/workspace/construction/.ve/lib/python2.7/site-packages/pymongo-2.0-py2.7-macosx-10.7-intel.egg/pymongo/helpers.py in _check_command_response(response, reset, msg, allowable_errors)
    123                     ex_msg += (", assertionCode: %d" %
    124                                (response["assertionCode"],))
--> 125                 raise OperationFailure(ex_msg, response.get("assertionCode"))
    126             raise OperationFailure(msg % response["errmsg"])
    127 

OperationFailure: db assertion failure, assertion: 'invoke failed: JS Error: TypeError: values[i].variants[variant_ids[j]] has no properties nofile_b:0', assertionCode: 9004

In [104]: 

2 个回答

0

我看到这里有几个潜在的问题,除了我在上面的评论中提到的:

  1. 在你的示例中,你覆盖了 emit,这意味着你的归约函数实际上不会被执行(如果没有发出任何东西,就没有东西可以归约)
  2. 你发出的格式和你的 reduce 函数返回的格式不一样。reduce 需要返回和 emit 的第二个参数相同格式的值,因为 reduce 可能会多次被调用,包括它自己返回的结果(可以参考 Map-Reduce 文档
  3. 你总是用键 1 来发出,这可能会根据你想要的工作方式造成问题
  4. 如上所述,你的代码在使用的循环变量上似乎有不同;我没有逐行比较来确认是否还有其他差异,但你最好的办法是直接把你的 JavaScript 函数定义复制到 pymongo 的 Code

另外,如果你能展示一下你是如何调用 map_reduce 的,以及 mapreduce 函数,那会更有帮助。

3

问题在于你把'scope'当成'fields'来传递了。

这样做是可以的:

db.variant_daily_collection.map_reduce(m, r, out="myoutput", query={"site_name": {'$in':['julie2.com','julie3.com','radiotiempo.com']}, 'timestamp': {'$gte':'20110601','$lte':'20110809'}}, scope={'variant_ids': ['4519','4518']})

这里的m是你之前提到的map函数,r是你之前提到的reduce函数。我只是把你的示例文档插入了三次来进行测试。

撰写回答