You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2019/04/25 06:47:39 UTC

[GitHub] [spark] pntuananh commented on a change in pull request #24455: [SPARK-27540][MLlib] Add 'meanAveragePrecision_at_k' metric to RankingMetrics

pntuananh commented on a change in pull request #24455: [SPARK-27540][MLlib] Add 'meanAveragePrecision_at_k' metric to RankingMetrics
URL: https://github.com/apache/spark/pull/24455#discussion_r278413766
 
 

 ##########
 File path: mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
 ##########
 @@ -70,26 +70,42 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
    */
   lazy val meanAveragePrecision: Double = {
     predictionAndLabels.map { case (pred, lab) =>
-      val labSet = lab.toSet
+      computeMeanAveragePrecisonAtK(pred, lab, pred.length)
+    }.mean()
+  }
 
-      if (labSet.nonEmpty) {
-        var i = 0
-        var cnt = 0
-        var precSum = 0.0
-        val n = pred.length
-        while (i < n) {
-          if (labSet.contains(pred(i))) {
-            cnt += 1
-            precSum += cnt.toDouble / (i + 1)
-          }
-          i += 1
+  /**
+   * Returns mean average precision truncated at position k.
+   *
+   * If a query has an empty ground truth set, the value will be zero and a log
+   * warning is generated.
+   *
+   * @param pred predicted ranking
+   * @param lab ground truth
+   * @param k use the top k predicted ranking, must be positive
+   * @return mean average precision of first k ranking positions
+   */
+  private def computeMeanAveragePrecisonAtK(pred: Array[T],
+                                            lab: Array[T],
+                                            k: Int): Double = {
+    val labSet = lab.toSet
+    if (labSet.nonEmpty) {
+      var i = 0
+      var cnt = 0
+      var precSum = 0.0
+      val n = math.min(pred.length, k)
+      while (i < n) {
+        if (labSet.contains(pred(i))) {
+          cnt += 1
+          precSum += cnt.toDouble / (i + 1)
         }
-        precSum / labSet.size
 
 Review comment:
   @qb-tarushg : based on definition of Average Precision, we should divide by number of relevant items, not by k.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org