@@ -308,12 +308,12 @@ def normL2(col, weightCol=None):
308308 return Summarizer ._get_single_metric (col , weightCol , "normL2" )
309309
310310 @staticmethod
311- def _check_param (featureCol , weightCol ):
311+ def _check_param (featuresCol , weightCol ):
312312 if weightCol is None :
313313 weightCol = lit (1.0 )
314- if not isinstance (featureCol , Column ) or not isinstance (weightCol , Column ):
314+ if not isinstance (featuresCol , Column ) or not isinstance (weightCol , Column ):
315315 raise TypeError ("featureCol and weightCol should be a Column" )
316- return featureCol , weightCol
316+ return featuresCol , weightCol
317317
318318 @staticmethod
319319 def _get_single_metric (col , weightCol , metric ):
@@ -339,26 +339,28 @@ def metrics(*metrics):
339339 - normL2: the Euclidian norm for each coefficient.
340340 - normL1: the L1 norm of each coefficient (sum of the absolute values).
341341
342- :param metrics metrics that can be provided.
343- :return a Summarizer
342+ :param metrics:
343+ metrics that can be provided.
344+ :return:
345+ an object of :py:class:`pyspark.ml.stat.SummaryBuilder`
344346
345347 Note: Currently, the performance of this interface is about 2x~3x slower then using the RDD
346348 interface.
347349 """
348350 sc = SparkContext ._active_spark_context
349351 js = JavaWrapper ._new_java_obj ("org.apache.spark.ml.stat.Summarizer.metrics" ,
350352 _to_seq (sc , metrics ))
351- return SummarizerBuilder (js )
353+ return SummaryBuilder (js )
352354
353355
354- class SummarizerBuilder ( object ):
356+ class SummaryBuilder ( JavaWrapper ):
355357 """
356358 .. note:: Experimental
357359
358360 A builder object that provides summary statistics about a given column.
359361
360362 Users should not directly create such builders, but instead use one of the methods in
361- :py:class:`pyspark.ml.stat.Summary `
363+ :py:class:`pyspark.ml.stat.Summarizer `
362364
363365 .. versionadded:: 2.4.0
364366
@@ -367,13 +369,22 @@ def __init__(self, js):
367369 self ._js = js
368370
369371 @since ("2.4.0" )
370- def summary (self , featureCol , weightCol = None ):
372+ def summary (self , featuresCol , weightCol = None ):
371373 """
372374 Returns an aggregate object that contains the summary of the column with the requested
373375 metrics.
376+
377+ :param featuresCol:
378+ a column that contains features Vector object.
379+ :param weightCol
380+ a column that contains weight value. Default weight is 1.0.
381+ :return:
382+ an aggregate column that contains the statistics. The exact content of this
383+ structure is determined during the creation of the builder.
384+
374385 """
375- featureCol , weightCol = Summarizer ._check_param (featureCol , weightCol )
376- return Column (self ._js .summary (featureCol ._jc , weightCol ._jc ))
386+ featuresCol , weightCol = Summarizer ._check_param (featuresCol , weightCol )
387+ return Column (self ._js .summary (featuresCol ._jc , weightCol ._jc ))
377388
378389
379390if __name__ == "__main__" :
0 commit comments