Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update version
  • Loading branch information
WeichenXu123 committed Mar 1, 2018
commit b3e9dddc5eff082a892d109ad959369d5f5510a9
22 changes: 11 additions & 11 deletions python/pyspark/ml/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,70 +177,70 @@ class Summarizer(object):
+--------------+
<BLANKLINE>

.. versionadded:: 2.3.0
.. versionadded:: 2.4.0

"""
def __init__(self, js):
self._js = js

@staticmethod
@since("2.3.0")
@since("2.4.0")
def mean(col, weightCol=None):
"""
return a column of mean summary
"""
return Summarizer._get_single_metric(col, weightCol, "mean")

@staticmethod
@since("2.3.0")
@since("2.4.0")
def variance(col, weightCol=None):
"""
return a column of variance summary
"""
return Summarizer._get_single_metric(col, weightCol, "variance")

@staticmethod
@since("2.3.0")
@since("2.4.0")
def count(col, weightCol=None):
"""
return a column of count summary
"""
return Summarizer._get_single_metric(col, weightCol, "count")

@staticmethod
@since("2.3.0")
@since("2.4.0")
def numNonZeros(col, weightCol=None):
"""
return a column of numNonZero summary
"""
return Summarizer._get_single_metric(col, weightCol, "numNonZeros")

@staticmethod
@since("2.3.0")
@since("2.4.0")
def max(col, weightCol=None):
"""
return a column of max summary
"""
return Summarizer._get_single_metric(col, weightCol, "max")

@staticmethod
@since("2.3.0")
@since("2.4.0")
def min(col, weightCol=None):
"""
return a column of min summary
"""
return Summarizer._get_single_metric(col, weightCol, "min")

@staticmethod
@since("2.3.0")
@since("2.4.0")
def normL1(col, weightCol=None):
"""
return a column of normL1 summary
"""
return Summarizer._get_single_metric(col, weightCol, "normL1")

@staticmethod
@since("2.3.0")
@since("2.4.0")
def normL2(col, weightCol=None):
"""
return a column of normL2 summary
Expand All @@ -262,7 +262,7 @@ def _get_single_metric(col, weightCol, metric):
col._jc, weightCol._jc))

@staticmethod
@since("2.3.0")
@since("2.4.0")
def metrics(*metrics):
"""
Given a list of metrics, provides a builder that it turns computes metrics from a column.
Expand Down Expand Up @@ -290,7 +290,7 @@ def metrics(*metrics):
_to_seq(sc, metrics))
return Summarizer(js)

@since("2.3.0")
@since("2.4.0")
def summary(self, featureCol, weightCol=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to move the "summary" method into another class, and have Summary only contain static methods. That will help with autocomplete so that it's clear that you're not meant to do Summery.metrics("min").mean(features).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds reasonable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto: naming should match Scala: "featuresCol"

"""
Returns an aggregate object that contains the summary of the column with the requested
Expand Down