@@ -2930,7 +2930,7 @@ setMethod("saveAsTable",
29302930 invisible (callJMethod(write , " saveAsTable" , tableName ))
29312931 })
29322932
2933- # ' summary
2933+ # ' describe
29342934# '
29352935# ' Computes statistics for numeric and string columns.
29362936# ' If no columns are given, this function computes statistics for all numerical or string columns.
@@ -2941,7 +2941,7 @@ setMethod("saveAsTable",
29412941# ' @return A SparkDataFrame.
29422942# ' @family SparkDataFrame functions
29432943# ' @aliases describe,SparkDataFrame,character-method describe,SparkDataFrame,ANY-method
2944- # ' @rdname summary
2944+ # ' @rdname describe
29452945# ' @name describe
29462946# ' @export
29472947# ' @examples
@@ -2953,6 +2953,7 @@ setMethod("saveAsTable",
29532953# ' describe(df, "col1")
29542954# ' describe(df, "col1", "col2")
29552955# ' }
2956+ # ' @seealso Ues \code{\link{summary}} for expanded statistics and control over which statistics to compute.
29562957# ' @note describe(SparkDataFrame, character) since 1.4.0
29572958setMethod ("describe ",
29582959 signature(x = " SparkDataFrame" , col = " character" ),
@@ -2962,7 +2963,7 @@ setMethod("describe",
29622963 dataFrame(sdf )
29632964 })
29642965
2965- # ' @rdname summary
2966+ # ' @rdname describe
29662967# ' @name describe
29672968# ' @aliases describe,SparkDataFrame-method
29682969# ' @note describe(SparkDataFrame) since 1.4.0
@@ -2973,15 +2974,47 @@ setMethod("describe",
29732974 dataFrame(sdf )
29742975 })
29752976
2977+ # ' summary
2978+ # '
2979+ # ' Computes specified statistics for numeric and string columns.
2980+ # '
2981+ # ' Available statistics are:
2982+ # '
2983+ # ' - count
2984+ # ' - mean
2985+ # ' - stddev
2986+ # ' - min
2987+ # ' - max
2988+ # ' - arbitrary approximate percentiles specified as a percentage (eg, 75%)
2989+ # '
2990+ # ' If no statistics are given, this function computes count, mean, stddev, min,
2991+ # ' approximate quartiles (percentiles at 25%, 50%, and 75%), and max.
2992+ # '
2993+ # ' This function is meant for exploratory data analysis, as we make no guarantee about the
2994+ # ' backward compatibility of the schema of the resulting Dataset. If you want to
2995+ # ' programmatically compute summary statistics, use the `agg` function instead.
2996+ # '
2997+ # '
29762998# ' @param object a SparkDataFrame to be summarized.
29772999# ' @param ... (optional) statistics to be computed for all columns.
29783000# ' @rdname summary
29793001# ' @name summary
29803002# ' @aliases summary,SparkDataFrame-method
3003+ # ' @export
3004+ # ' @examples
3005+ # '\dontrun{
3006+ # ' sparkR.session()
3007+ # ' path <- "path/to/file.json"
3008+ # ' df <- read.json(path)
3009+ # ' summary(df)
3010+ # ' summary(df, "min", "25%", "75%", "max")
3011+ # ' summary(select(df, "age", "height"))
3012+ # ' }
29813013# ' @note summary(SparkDataFrame) since 1.5.0
2982- # ' @note the statistics provided by this method were change in 2.3.0 use describe for previous defaults.
3014+ # ' @note The statistics provided by \code{summary} were change in 2.3.0 use \code{\link{describe}} for previous defaults.
3015+ # ' @seealso \code{\link{describe}}
29833016setMethod ("summary ",
2984- signature(object = " SparkDataFrame" , ... = " character" ),
3017+ signature(object = " SparkDataFrame" ), # , ... = "character"),
29853018 function (object , ... ) {
29863019 statisticsList <- list (... )
29873020 sdf <- callJMethod(object @ sdf , " summary" , statisticsList )
0 commit comments