From 9eeeca8a3f030536d79e89c9f03c936ee3a5ec24 Mon Sep 17 00:00:00 2001 From: Shahid Date: Sat, 19 Jan 2019 02:54:17 +0530 Subject: [PATCH 1/6] update doc for evaluation metrics --- docs/mllib-evaluation-metrics.md | 12 ++++++------ .../spark/mllib/evaluation/RankingMetrics.scala | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md index c65ecdcb67ee..fa4242d8d4cb 100644 --- a/docs/mllib-evaluation-metrics.md +++ b/docs/mllib-evaluation-metrics.md @@ -439,10 +439,10 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{ Precision at k - $p(k)=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{k} \sum_{j=0}^{\text{min}(\left|D\right|, k) - 1} rel_{D_i}(R_i(j))}$ + $p(k)=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{k} \sum_{j=0}^{\text{min}(\left|R_i\right|, k) - 1} rel_{D_i}(R_i(j))}$ - Precision at k is a measure of + Precision at k is a measure of how many of the first k recommended documents are in the set of true relevant documents averaged across all users. In this metric, the order of the recommendations is not taken into account. @@ -450,10 +450,10 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{ Mean Average Precision - $MAP=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{\left|D_i\right|} \sum_{j=0}^{Q-1} \frac{rel_{D_i}(R_i(j))}{j + 1}}$ + $MAP=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{\left|R_i\right|} \sum_{j=0}^{\left|R_i\right|-1} \frac{rel_{D_i}(R_i(j))}{j + 1}}$ - MAP is a measure of how + MAP is a measure of how many of the recommended documents are in the set of true relevant documents, where the order of the recommendations is taken into account (i.e. penalty for highly relevant documents is higher). @@ -462,10 +462,10 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{ Normalized Discounted Cumulative Gain $NDCG(k)=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{IDCG(D_i, k)}\sum_{j=0}^{n-1} - \frac{rel_{D_i}(R_i(j))}{\text{ln}(j+2)}} \\ + \frac{rel_{D_i}(R_i(j))}{log_2(j+2)}} \\ \text{Where} \\ \hspace{5 mm} n = \text{min}\left(\text{max}\left(|R_i|,|D_i|\right),k\right) \\ - \hspace{5 mm} IDCG(D, k) = \sum_{j=0}^{\text{min}(\left|D\right|, k) - 1} \frac{1}{\text{ln}(j+2)}$ + \hspace{5 mm} IDCG(D, k) = \sum_{j=0}^{\text{min}(\left|D\right|, k) - 1} \frac{1}{log_2(j+2)}$ NDCG at k is a diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala index b98aa0534152..a5540faf37ec 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala @@ -100,7 +100,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])] } i += 1 } - precSum / labSet.size + precSum / n } else { logWarning("Empty ground truth set, check input data") 0.0 From c25b342ef452d51cc65df06896a1f1460d694b68 Mon Sep 17 00:00:00 2001 From: Shahid Date: Sat, 19 Jan 2019 03:48:43 +0530 Subject: [PATCH 2/6] UT correction --- .../apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java | 2 +- .../org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java index e9d7e4fdbe8c..765571d0d819 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java @@ -49,7 +49,7 @@ public void setUp() throws IOException { public void rankingMetrics() { @SuppressWarnings("unchecked") RankingMetrics metrics = RankingMetrics.of(predictionAndLabels); - Assert.assertEquals(0.355026, metrics.meanAveragePrecision(), 1e-5); + Assert.assertEquals(0.147989, metrics.meanAveragePrecision(), 1e-5); Assert.assertEquals(0.75 / 3.0, metrics.precisionAt(4), 1e-5); } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala index f334be2c2ba8..3c3a138ea8bf 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala @@ -43,7 +43,7 @@ class RankingMetricsSuite extends SparkFunSuite with MLlibTestSparkContext { assert(metrics.precisionAt(10) ~== 0.8/3 absTol eps) assert(metrics.precisionAt(15) ~== 8.0/45 absTol eps) - assert(map ~== 0.355026 absTol eps) + assert(map ~== 0.147989 absTol eps) assert(metrics.ndcgAt(3) ~== 1.0/3 absTol eps) assert(metrics.ndcgAt(5) ~== 0.328788 absTol eps) From a160561d8e77e2164ef1b36bafb19f82964eb1af Mon Sep 17 00:00:00 2001 From: Shahid Date: Sat, 19 Jan 2019 10:02:47 +0530 Subject: [PATCH 3/6] python correction --- python/pyspark/mllib/evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py index 6ca6df672f30..de03ee790e7a 100644 --- a/python/pyspark/mllib/evaluation.py +++ b/python/pyspark/mllib/evaluation.py @@ -324,7 +324,7 @@ class RankingMetrics(JavaModelWrapper): >>> metrics.precisionAt(15) 0.17... >>> metrics.meanAveragePrecision - 0.35... + 0.14... >>> metrics.ndcgAt(3) 0.33... >>> metrics.ndcgAt(10) From b763babdd4580f8170440df00922872897d3262a Mon Sep 17 00:00:00 2001 From: Shahid Date: Sat, 19 Jan 2019 14:46:53 +0530 Subject: [PATCH 4/6] add comment --- .../org/apache/spark/mllib/evaluation/RankingMetrics.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala index a5540faf37ec..32f81c723ee3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala @@ -138,6 +138,8 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])] var dcg = 0.0 var i = 0 while (i < n) { + // Base of the log doesn't matter for calculating NDCG, + // if the relevance value is binary. val gain = 1.0 / math.log(i + 2) if (i < pred.length && labSet.contains(pred(i))) { dcg += gain From 2003599acd27fd129047964d71a61ada41d4b633 Mon Sep 17 00:00:00 2001 From: Shahid Date: Sat, 19 Jan 2019 21:54:11 +0530 Subject: [PATCH 5/6] address comment --- docs/mllib-evaluation-metrics.md | 6 +++--- .../org/apache/spark/mllib/evaluation/RankingMetrics.scala | 2 +- .../spark/mllib/evaluation/JavaRankingMetricsSuite.java | 2 +- .../apache/spark/mllib/evaluation/RankingMetricsSuite.scala | 2 +- python/pyspark/mllib/evaluation.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md index fa4242d8d4cb..7ad10161487b 100644 --- a/docs/mllib-evaluation-metrics.md +++ b/docs/mllib-evaluation-metrics.md @@ -450,7 +450,7 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{ Mean Average Precision - $MAP=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{\left|R_i\right|} \sum_{j=0}^{\left|R_i\right|-1} \frac{rel_{D_i}(R_i(j))}{j + 1}}$ + $MAP=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{\left|D_i\right|} \sum_{j=0}^{\left|R_i\right|-1} \frac{rel_{D_i}(R_i(j))}{j + 1}}$ MAP is a measure of how @@ -462,10 +462,10 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{ Normalized Discounted Cumulative Gain $NDCG(k)=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{IDCG(D_i, k)}\sum_{j=0}^{n-1} - \frac{rel_{D_i}(R_i(j))}{log_2(j+2)}} \\ + \frac{rel_{D_i}(R_i(j))}{\text{log}_2(j+2)}} \\ \text{Where} \\ \hspace{5 mm} n = \text{min}\left(\text{max}\left(|R_i|,|D_i|\right),k\right) \\ - \hspace{5 mm} IDCG(D, k) = \sum_{j=0}^{\text{min}(\left|D\right|, k) - 1} \frac{1}{log_2(j+2)}$ + \hspace{5 mm} IDCG(D, k) = \sum_{j=0}^{\text{min}(\left|D\right|, k) - 1} \frac{1}{\text{log}_2(j+2)}$ NDCG at k is a diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala index 32f81c723ee3..4935d1141113 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala @@ -100,7 +100,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])] } i += 1 } - precSum / n + precSum / labSet.size } else { logWarning("Empty ground truth set, check input data") 0.0 diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java index 765571d0d819..e9d7e4fdbe8c 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java @@ -49,7 +49,7 @@ public void setUp() throws IOException { public void rankingMetrics() { @SuppressWarnings("unchecked") RankingMetrics metrics = RankingMetrics.of(predictionAndLabels); - Assert.assertEquals(0.147989, metrics.meanAveragePrecision(), 1e-5); + Assert.assertEquals(0.355026, metrics.meanAveragePrecision(), 1e-5); Assert.assertEquals(0.75 / 3.0, metrics.precisionAt(4), 1e-5); } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala index 3c3a138ea8bf..f334be2c2ba8 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala @@ -43,7 +43,7 @@ class RankingMetricsSuite extends SparkFunSuite with MLlibTestSparkContext { assert(metrics.precisionAt(10) ~== 0.8/3 absTol eps) assert(metrics.precisionAt(15) ~== 8.0/45 absTol eps) - assert(map ~== 0.147989 absTol eps) + assert(map ~== 0.355026 absTol eps) assert(metrics.ndcgAt(3) ~== 1.0/3 absTol eps) assert(metrics.ndcgAt(5) ~== 0.328788 absTol eps) diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py index de03ee790e7a..6ca6df672f30 100644 --- a/python/pyspark/mllib/evaluation.py +++ b/python/pyspark/mllib/evaluation.py @@ -324,7 +324,7 @@ class RankingMetrics(JavaModelWrapper): >>> metrics.precisionAt(15) 0.17... >>> metrics.meanAveragePrecision - 0.14... + 0.35... >>> metrics.ndcgAt(3) 0.33... >>> metrics.ndcgAt(10) From d89fd0dbd5345ff5f42918241dbccbbddb2f7589 Mon Sep 17 00:00:00 2001 From: Shahid Date: Sun, 20 Jan 2019 10:11:37 +0530 Subject: [PATCH 6/6] address comment --- docs/mllib-evaluation-metrics.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md index 7ad10161487b..896d95bd4884 100644 --- a/docs/mllib-evaluation-metrics.md +++ b/docs/mllib-evaluation-metrics.md @@ -413,13 +413,13 @@ A ranking system usually deals with a set of $M$ users $$U = \left\{u_0, u_1, ..., u_{M-1}\right\}$$ -Each user ($u_i$) having a set of $N$ ground truth relevant documents +Each user ($u_i$) having a set of $N_i$ ground truth relevant documents -$$D_i = \left\{d_0, d_1, ..., d_{N-1}\right\}$$ +$$D_i = \left\{d_0, d_1, ..., d_{N_i-1}\right\}$$ -And a list of $Q$ recommended documents, in order of decreasing relevance +And a list of $Q_i$ recommended documents, in order of decreasing relevance -$$R_i = \left[r_0, r_1, ..., r_{Q-1}\right]$$ +$$R_i = \left[r_0, r_1, ..., r_{Q_i-1}\right]$$ The goal of the ranking system is to produce the most relevant set of documents for each user. The relevance of the sets and the effectiveness of the algorithms can be measured using the metrics listed below. @@ -439,7 +439,7 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{ Precision at k - $p(k)=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{k} \sum_{j=0}^{\text{min}(\left|R_i\right|, k) - 1} rel_{D_i}(R_i(j))}$ + $p(k)=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{k} \sum_{j=0}^{\text{min}(Q_i, k) - 1} rel_{D_i}(R_i(j))}$ Precision at k is a measure of @@ -450,7 +450,7 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{ Mean Average Precision - $MAP=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{\left|D_i\right|} \sum_{j=0}^{\left|R_i\right|-1} \frac{rel_{D_i}(R_i(j))}{j + 1}}$ + $MAP=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{N_i} \sum_{j=0}^{Q_i-1} \frac{rel_{D_i}(R_i(j))}{j + 1}}$ MAP is a measure of how @@ -462,10 +462,10 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{ Normalized Discounted Cumulative Gain $NDCG(k)=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{IDCG(D_i, k)}\sum_{j=0}^{n-1} - \frac{rel_{D_i}(R_i(j))}{\text{log}_2(j+2)}} \\ + \frac{rel_{D_i}(R_i(j))}{\text{log}(j+2)}} \\ \text{Where} \\ - \hspace{5 mm} n = \text{min}\left(\text{max}\left(|R_i|,|D_i|\right),k\right) \\ - \hspace{5 mm} IDCG(D, k) = \sum_{j=0}^{\text{min}(\left|D\right|, k) - 1} \frac{1}{\text{log}_2(j+2)}$ + \hspace{5 mm} n = \text{min}\left(\text{max}\left(Q_i, N_i\right),k\right) \\ + \hspace{5 mm} IDCG(D, k) = \sum_{j=0}^{\text{min}(\left|D\right|, k) - 1} \frac{1}{\text{log}(j+2)}$ NDCG at k is a