modified LogisticRegressionSummary and LogisticRegressionModel in cla…

…ssification.py
apache · jmwdpk · Sep 4, 2017 · Sep 5, 2017 · Sep 7, 2017 · Sep 8, 2017
commit 60579d5f36ef26f6e3ec675a795ccc86d6a71c8d
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
@@ -529,8 +529,13 @@ def summary(self):
         """
         if self.hasSummary:
             java_blrt_summary = self._call_java("summary")
-            # Note: Once multiclass is added, update this to return correct summary
-            return BinaryLogisticRegressionTrainingSummary(java_blrt_summary)
+            java_blrt_interceptVector = self._call_java("interceptVector")
+            java_blrt_numClasses = self._call_java("numClasses")
+            java_blrt_binarysummary = self._call_java("binarySummary")
+            if (len(java_blrt_interceptVector) == 1):
+                return BinaryLogisticRegressionTrainingSummary(java_blrt_binarysummary)
+            else:
+                return LogisticRegressionTrainingSummary(java_blrt_summary)
         else:
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
@@ -611,6 +616,112 @@ def featuresCol(self):
         """
         return self._call_java("featuresCol")
 
+    @property
+    @since("2.3.0")
+    def labels(self):
+        """
+        Returns the sequence of labels in ascending order. This order matches the order used
+        in metrics which are specified as arrays over labels, e.g., truePositiveRateByLabel.
+
+        Note: In most cases, it will be values {0.0, 1.0, ..., numClasses-1}, However, if the
+        training set is missing a label, then all of the arrays over labels
+        (e.g., from truePositiveRateByLabel) will be of length numClasses-1 instead of the
+        expected numClasses.
+        """
+        return self._call_java("labels")
+
+    @property
+    @since("2.3.0")
+    def truePositiveRateByLabel(self):
+        """
+        Returns true positive rate for each label (category).
+        """
+        return self._call_java("truePositiveRateByLabel")
+
+    @property
+    @since("2.3.0")
+    def falsePositiveRateByLabel(self):
+        """
+        Returns false positive rate for each label (category).
+        """
+        return self._call_java("falsePositiveRateByLabel")
+
+    @property
+    @since("2.3.0")
+    def precisionByLabel(self):
+        """
+        Returns precision for each label (category).
+        """
+        return self._call_java("precisionByLabel")
+
+    @property
+    @since("2.3.0")
+    def recallByLabel(self):
+        """
+        Returns recall for each label (category).
+        """
+        return self._call_java("recallByLabel")
+
+    @property
+    @since("2.3.0")
+    def fMeasureByLabel(self, beta=1.0):
+        """
+        Returns f-measure for each label (category).
+        """
+        return self._call_java("fMeasureByLabel", beta)
+
+    @property
+    @since("2.3.0")
+    def accuracy(self):
+        """
+        Returns accuracy.
+        (equals to the total number of correctly classified instances
+        out of the total number of instances.)
+        """
+        return self._call_java("accuracy")
+
+    @property
+    @since("2.3.0")
+    def weightedTruePositiveRate(self):
+        """
+        Returns weighted true positive rate.
+        (equals to precision, recall and f-measure)
+        """
+        return self._call_java("weightedTruePositiveRate")
+
+    @property
+    @since("2.3.0")
+    def weightedFalsePositiveRate(self):
+        """
+        Returns weighted false positive rate.
+        """
+        return self._call_java("weightedFalsePositiveRate")
+
+    @property
+    @since("2.3.0")
+    def weightedRecall(self):
+        """
+        Returns weighted averaged recall.
+        (equals to precision, recall and f-measure)
+        """
+        return self._call_java("weightedRecall")
+
+    @property
+    @since("2.3.0")
+    def weightedPrecision(self):
+        """
+        Returns weighted averaged precision.
+        """
+        return self._call_java("weightedPrecision")
+
+    @property
+    @since("2.3.0")
+    def weightedFMeasure(self, beta=1.0):
+        """
+        Returns weighted averaged f-measure.
+        """
+        return self._call_java("weightedFMeasure", beta)
+
 
 @inherit_doc
 class LogisticRegressionTrainingSummary(LogisticRegressionSummary):