Skip to content

Commit 20968c1

Browse files
committed
address comments
1 parent f7cec51 commit 20968c1

File tree

1 file changed

+22
-11
lines changed

1 file changed

+22
-11
lines changed

python/pyspark/ml/stat.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -308,12 +308,12 @@ def normL2(col, weightCol=None):
308308
return Summarizer._get_single_metric(col, weightCol, "normL2")
309309

310310
@staticmethod
311-
def _check_param(featureCol, weightCol):
311+
def _check_param(featuresCol, weightCol):
312312
if weightCol is None:
313313
weightCol = lit(1.0)
314-
if not isinstance(featureCol, Column) or not isinstance(weightCol, Column):
314+
if not isinstance(featuresCol, Column) or not isinstance(weightCol, Column):
315315
raise TypeError("featureCol and weightCol should be a Column")
316-
return featureCol, weightCol
316+
return featuresCol, weightCol
317317

318318
@staticmethod
319319
def _get_single_metric(col, weightCol, metric):
@@ -339,26 +339,28 @@ def metrics(*metrics):
339339
- normL2: the Euclidian norm for each coefficient.
340340
- normL1: the L1 norm of each coefficient (sum of the absolute values).
341341
342-
:param metrics metrics that can be provided.
343-
:return a Summarizer
342+
:param metrics:
343+
metrics that can be provided.
344+
:return:
345+
an object of :py:class:`pyspark.ml.stat.SummaryBuilder`
344346
345347
Note: Currently, the performance of this interface is about 2x~3x slower then using the RDD
346348
interface.
347349
"""
348350
sc = SparkContext._active_spark_context
349351
js = JavaWrapper._new_java_obj("org.apache.spark.ml.stat.Summarizer.metrics",
350352
_to_seq(sc, metrics))
351-
return SummarizerBuilder(js)
353+
return SummaryBuilder(js)
352354

353355

354-
class SummarizerBuilder(object):
356+
class SummaryBuilder(JavaWrapper):
355357
"""
356358
.. note:: Experimental
357359
358360
A builder object that provides summary statistics about a given column.
359361
360362
Users should not directly create such builders, but instead use one of the methods in
361-
:py:class:`pyspark.ml.stat.Summary`
363+
:py:class:`pyspark.ml.stat.Summarizer`
362364
363365
.. versionadded:: 2.4.0
364366
@@ -367,13 +369,22 @@ def __init__(self, js):
367369
self._js = js
368370

369371
@since("2.4.0")
370-
def summary(self, featureCol, weightCol=None):
372+
def summary(self, featuresCol, weightCol=None):
371373
"""
372374
Returns an aggregate object that contains the summary of the column with the requested
373375
metrics.
376+
377+
:param featuresCol:
378+
a column that contains features Vector object.
379+
:param weightCol
380+
a column that contains weight value. Default weight is 1.0.
381+
:return:
382+
an aggregate column that contains the statistics. The exact content of this
383+
structure is determined during the creation of the builder.
384+
374385
"""
375-
featureCol, weightCol = Summarizer._check_param(featureCol, weightCol)
376-
return Column(self._js.summary(featureCol._jc, weightCol._jc))
386+
featuresCol, weightCol = Summarizer._check_param(featuresCol, weightCol)
387+
return Column(self._js.summary(featuresCol._jc, weightCol._jc))
377388

378389

379390
if __name__ == "__main__":

0 commit comments

Comments
 (0)