Skip to content

Commit 872d03d

Browse files
committed
[MRG] FIX Revert the addition of ndcg_score and dcg_score (scikit-learn#9932)
1 parent 063eb15 commit 872d03d

File tree

5 files changed

+1
-136
lines changed

5 files changed

+1
-136
lines changed

doc/modules/classes.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -783,15 +783,13 @@ details.
783783
metrics.classification_report
784784
metrics.cohen_kappa_score
785785
metrics.confusion_matrix
786-
metrics.dcg_score
787786
metrics.f1_score
788787
metrics.fbeta_score
789788
metrics.hamming_loss
790789
metrics.hinge_loss
791790
metrics.jaccard_similarity_score
792791
metrics.log_loss
793792
metrics.matthews_corrcoef
794-
metrics.ndcg_score
795793
metrics.precision_recall_curve
796794
metrics.precision_recall_fscore_support
797795
metrics.precision_score

doc/modules/model_evaluation.rst

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -308,14 +308,6 @@ Some also work in the multilabel case:
308308
recall_score
309309
zero_one_loss
310310

311-
Some are typically used for ranking:
312-
313-
.. autosummary::
314-
:template: function.rst
315-
316-
dcg_score
317-
ndcg_score
318-
319311
And some work with binary and multilabel (but not multiclass) problems:
320312

321313
.. autosummary::

sklearn/metrics/__init__.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
from .ranking import precision_recall_curve
1313
from .ranking import roc_auc_score
1414
from .ranking import roc_curve
15-
from .ranking import dcg_score
16-
from .ranking import ndcg_score
1715

1816
from .classification import accuracy_score
1917
from .classification import classification_report
@@ -118,6 +116,4 @@
118116
'v_measure_score',
119117
'zero_one_loss',
120118
'brier_score_loss',
121-
'dcg_score',
122-
'ndcg_score'
123119
]

sklearn/metrics/ranking.py

Lines changed: 1 addition & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
from ..utils import assert_all_finite
2828
from ..utils import check_consistent_length
29-
from ..utils import column_or_1d, check_array, check_X_y
29+
from ..utils import column_or_1d, check_array
3030
from ..utils.multiclass import type_of_target
3131
from ..utils.extmath import stable_cumsum
3232
from ..utils.sparsefuncs import count_nonzero
@@ -788,91 +788,3 @@ def label_ranking_loss(y_true, y_score, sample_weight=None):
788788
loss[np.logical_or(n_positives == 0, n_positives == n_labels)] = 0.
789789

790790
return np.average(loss, weights=sample_weight)
791-
792-
793-
def dcg_score(y_true, y_score, k=5):
794-
"""Discounted cumulative gain (DCG) at rank K.
795-
796-
Parameters
797-
----------
798-
y_true : array, shape = [n_samples]
799-
Ground truth (true relevance labels).
800-
y_score : array, shape = [n_samples]
801-
Predicted scores.
802-
k : int
803-
Rank.
804-
805-
Returns
806-
-------
807-
score : float
808-
809-
References
810-
----------
811-
.. [1] `Wikipedia entry for the Discounted Cumulative Gain
812-
<https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_
813-
"""
814-
order = np.argsort(y_score)[::-1]
815-
y_true = np.take(y_true, order[:k])
816-
817-
gain = 2 ** y_true - 1
818-
819-
discounts = np.log2(np.arange(len(y_true)) + 2)
820-
return np.sum(gain / discounts)
821-
822-
823-
def ndcg_score(y_true, y_score, k=5):
824-
"""Normalized discounted cumulative gain (NDCG) at rank K.
825-
826-
Normalized Discounted Cumulative Gain (NDCG) measures the performance of a
827-
recommendation system based on the graded relevance of the recommended
828-
entities. It varies from 0.0 to 1.0, with 1.0 representing the ideal
829-
ranking of the entities.
830-
831-
Parameters
832-
----------
833-
y_true : array, shape = [n_samples]
834-
Ground truth (true labels represended as integers).
835-
y_score : array, shape = [n_samples, n_classes]
836-
Predicted probabilities.
837-
k : int
838-
Rank.
839-
840-
Returns
841-
-------
842-
score : float
843-
844-
Examples
845-
--------
846-
>>> y_true = [1, 0, 2]
847-
>>> y_score = [[0.15, 0.55, 0.2], [0.7, 0.2, 0.1], [0.06, 0.04, 0.9]]
848-
>>> ndcg_score(y_true, y_score, k=2)
849-
1.0
850-
>>> y_score = [[0.9, 0.5, 0.8], [0.7, 0.2, 0.1], [0.06, 0.04, 0.9]]
851-
>>> ndcg_score(y_true, y_score, k=2)
852-
0.66666666666666663
853-
854-
References
855-
----------
856-
.. [1] `Kaggle entry for the Normalized Discounted Cumulative Gain
857-
<https://www.kaggle.com/wiki/NormalizedDiscountedCumulativeGain>`_
858-
"""
859-
y_score, y_true = check_X_y(y_score, y_true)
860-
861-
# Make sure we use all the labels (max between the length and the higher
862-
# number in the array)
863-
lb = LabelBinarizer()
864-
lb.fit(np.arange(max(np.max(y_true) + 1, len(y_true))))
865-
binarized_y_true = lb.transform(y_true)
866-
867-
if binarized_y_true.shape != y_score.shape:
868-
raise ValueError("y_true and y_score have different value ranges")
869-
870-
scores = []
871-
872-
# Iterate over each y_value_true and compute the DCG score
873-
for y_value_true, y_value_score in zip(binarized_y_true, y_score):
874-
actual = dcg_score(y_value_true, y_value_score, k)
875-
best = dcg_score(y_value_true, y_value_true, k)
876-
scores.append(actual / best)
877-
878-
return np.mean(scores)

sklearn/metrics/tests/test_ranking.py

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
from sklearn.metrics import label_ranking_loss
3030
from sklearn.metrics import roc_auc_score
3131
from sklearn.metrics import roc_curve
32-
from sklearn.metrics import ndcg_score
3332

3433
from sklearn.exceptions import UndefinedMetricWarning
3534

@@ -738,38 +737,6 @@ def check_zero_or_all_relevant_labels(lrap_score):
738737
[[0.5], [0.5], [0.5], [0.5]]), 1.)
739738

740739

741-
def test_ndcg_score():
742-
# Check perfect ranking
743-
y_true = [1, 0, 2]
744-
y_score = [
745-
[0.15, 0.55, 0.2],
746-
[0.7, 0.2, 0.1],
747-
[0.06, 0.04, 0.9]
748-
]
749-
perfect = ndcg_score(y_true, y_score)
750-
assert_equal(perfect, 1.0)
751-
752-
# Check bad ranking with a small K
753-
y_true = [0, 2, 1]
754-
y_score = [
755-
[0.15, 0.55, 0.2],
756-
[0.7, 0.2, 0.1],
757-
[0.06, 0.04, 0.9]
758-
]
759-
short_k = ndcg_score(y_true, y_score, k=1)
760-
assert_equal(short_k, 0.0)
761-
762-
# Check a random scoring
763-
y_true = [2, 1, 0]
764-
y_score = [
765-
[0.15, 0.55, 0.2],
766-
[0.7, 0.2, 0.1],
767-
[0.06, 0.04, 0.9]
768-
]
769-
average_ranking = ndcg_score(y_true, y_score, k=2)
770-
assert_almost_equal(average_ranking, 0.63092975)
771-
772-
773740
def check_lrap_error_raised(lrap_score):
774741
# Raise value error if not appropriate format
775742
assert_raises(ValueError, lrap_score,

0 commit comments

Comments
 (0)