Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
update since version in spark.evaluation
  • Loading branch information
mengxr committed Aug 25, 2015
commit 9d1499daa16b9a2aacf3e038e51e6ca6a75f217f
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ import org.apache.spark.sql.DataFrame
* be smaller as a result, meaning there may be an extra sample at
* partition boundaries.
*/
@Since("1.3.0")
@Since("1.0.0")
@Experimental
class BinaryClassificationMetrics(
val scoreAndLabels: RDD[(Double, Double)],
val numBins: Int) extends Logging {
class BinaryClassificationMetrics @Since("1.3.0") (
@Since("1.3.0") val scoreAndLabels: RDD[(Double, Double)],
@Since("1.3.0") val numBins: Int) extends Logging {

require(numBins >= 0, "numBins must be nonnegative")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.sql.DataFrame
*/
@Since("1.1.0")
@Experimental
class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[(Double, Double)]) {

/**
* An auxiliary constructor taking a DataFrame.
Expand Down Expand Up @@ -140,6 +140,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns precision
*/
@Since("1.1.0")
lazy val precision: Double = tpByClass.values.sum.toDouble / labelCount

/**
Expand All @@ -148,23 +149,27 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
* because sum of all false positives is equal to sum
* of all false negatives)
*/
@Since("1.1.0")
lazy val recall: Double = precision

/**
* Returns f-measure
* (equals to precision and recall because precision equals recall)
*/
@Since("1.1.0")
lazy val fMeasure: Double = precision

/**
* Returns weighted true positive rate
* (equals to precision, recall and f-measure)
*/
@Since("1.1.0")
lazy val weightedTruePositiveRate: Double = weightedRecall

/**
* Returns weighted false positive rate
*/
@Since("1.1.0")
lazy val weightedFalsePositiveRate: Double = labelCountByClass.map { case (category, count) =>
falsePositiveRate(category) * count.toDouble / labelCount
}.sum
Expand All @@ -173,13 +178,15 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
* Returns weighted averaged recall
* (equals to precision, recall and f-measure)
*/
@Since("1.1.0")
lazy val weightedRecall: Double = labelCountByClass.map { case (category, count) =>
recall(category) * count.toDouble / labelCount
}.sum

/**
* Returns weighted averaged precision
*/
@Since("1.1.0")
lazy val weightedPrecision: Double = labelCountByClass.map { case (category, count) =>
precision(category) * count.toDouble / labelCount
}.sum
Expand All @@ -196,12 +203,14 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns weighted averaged f1-measure
*/
@Since("1.1.0")
lazy val weightedFMeasure: Double = labelCountByClass.map { case (category, count) =>
fMeasure(category, 1.0) * count.toDouble / labelCount
}.sum

/**
* Returns the sequence of labels in ascending order
*/
@Since("1.1.0")
lazy val labels: Array[Double] = tpByClass.keys.toArray.sorted
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import org.apache.spark.sql.DataFrame
* both are non-null Arrays, each with unique elements.
*/
@Since("1.2.0")
class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double], Array[Double])]) {

/**
* An auxiliary constructor taking a DataFrame.
Expand All @@ -46,13 +46,15 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns subset accuracy
* (for equal sets of labels)
*/
@Since("1.2.0")
lazy val subsetAccuracy: Double = predictionAndLabels.filter { case (predictions, labels) =>
predictions.deep == labels.deep
}.count().toDouble / numDocs

/**
* Returns accuracy
*/
@Since("1.2.0")
lazy val accuracy: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.intersect(predictions).size.toDouble /
(labels.size + predictions.size - labels.intersect(predictions).size)}.sum / numDocs
Expand All @@ -61,13 +63,15 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns Hamming-loss
*/
@Since("1.2.0")
lazy val hammingLoss: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.size + predictions.size - 2 * labels.intersect(predictions).size
}.sum / (numDocs * numLabels)

/**
* Returns document-based precision averaged by the number of documents
*/
@Since("1.2.0")
lazy val precision: Double = predictionAndLabels.map { case (predictions, labels) =>
if (predictions.size > 0) {
predictions.intersect(labels).size.toDouble / predictions.size
Expand All @@ -79,13 +83,15 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns document-based recall averaged by the number of documents
*/
@Since("1.2.0")
lazy val recall: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.intersect(predictions).size.toDouble / labels.size
}.sum / numDocs

/**
* Returns document-based f1-measure averaged by the number of documents
*/
@Since("1.2.0")
lazy val f1Measure: Double = predictionAndLabels.map { case (predictions, labels) =>
2.0 * predictions.intersect(labels).size / (predictions.size + labels.size)
}.sum / numDocs
Expand Down Expand Up @@ -143,6 +149,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns micro-averaged label-based precision
* (equals to micro-averaged document-based precision)
*/
@Since("1.2.0")
lazy val microPrecision: Double = {
val sumFp = fpPerClass.foldLeft(0L){ case(cum, (_, fp)) => cum + fp}
sumTp.toDouble / (sumTp + sumFp)
Expand All @@ -152,6 +159,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns micro-averaged label-based recall
* (equals to micro-averaged document-based recall)
*/
@Since("1.2.0")
lazy val microRecall: Double = {
val sumFn = fnPerClass.foldLeft(0.0){ case(cum, (_, fn)) => cum + fn}
sumTp.toDouble / (sumTp + sumFn)
Expand All @@ -161,10 +169,12 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns micro-averaged label-based f1-measure
* (equals to micro-averaged document-based f1-measure)
*/
@Since("1.2.0")
lazy val microF1Measure: Double = 2.0 * sumTp / (2 * sumTp + sumFnClass + sumFpClass)

/**
* Returns the sequence of labels in ascending order
*/
@Since("1.2.0")
lazy val labels: Array[Double] = tpPerClass.keys.toArray.sorted
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ import org.apache.spark.sql.DataFrame
*/
@Since("1.2.0")
@Experimental
class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
class RegressionMetrics @Since("1.2.0") (
predictionAndObservations: RDD[(Double, Double)]) extends Logging {

/**
* An auxiliary constructor taking a DataFrame.
Expand Down