Skip to content

Commit 8c9cd0a

Browse files
keypointtmengxr
authored andcommitted
[SPARK-16140][MLLIB][SPARKR][DOCS] Group k-means method in generated R doc
https://issues.apache.org/jira/browse/SPARK-16140 ## What changes were proposed in this pull request? Group the R doc of spark.kmeans, predict(KM), summary(KM), read/write.ml(KM) under Rd spark.kmeans. The example code was updated. ## How was this patch tested? Tested on my local machine And on my laptop `jekyll build` is failing to build API docs, so here I can only show you the html I manually generated from Rd files, with no CSS applied, but the doc content should be there. ![screenshotkmeans](https://cloud.githubusercontent.com/assets/3925641/16403203/c2c9ca1e-3ca7-11e6-9e29-f2164aee75fc.png) Author: Xin Ren <iamshrek@126.com> Closes #13921 from keypointt/SPARK-16140.
1 parent c6a220d commit 8c9cd0a

File tree

2 files changed

+35
-39
lines changed

2 files changed

+35
-39
lines changed

R/pkg/R/generics.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,7 @@ setGeneric("spark.glm", function(data, formula, ...) { standardGeneric("spark.gl
12471247
#' @export
12481248
setGeneric("glm")
12491249

1250+
#' predict
12501251
#' @rdname predict
12511252
#' @export
12521253
setGeneric("predict", function(object, ...) { standardGeneric("predict") })
@@ -1271,6 +1272,7 @@ setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("s
12711272
#' @export
12721273
setGeneric("spark.survreg", function(data, formula, ...) { standardGeneric("spark.survreg") })
12731274

1275+
#' write.ml
12741276
#' @rdname write.ml
12751277
#' @export
12761278
setGeneric("write.ml", function(object, path, ...) { standardGeneric("write.ml") })

R/pkg/R/mllib.R

Lines changed: 33 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,10 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
267267
return(list(apriori = apriori, tables = tables))
268268
})
269269

270-
#' Fit a k-means model
270+
#' K-Means Clustering Model
271271
#'
272-
#' Fit a k-means model, similarly to R's kmeans().
272+
#' Fits a k-means clustering model against a Spark DataFrame, similarly to R's kmeans().
273+
#' Users can print, make predictions on the produced model and save the model to the input path.
273274
#'
274275
#' @param data SparkDataFrame for training
275276
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
@@ -278,14 +279,32 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
278279
#' @param k Number of centers
279280
#' @param maxIter Maximum iteration number
280281
#' @param initMode The initialization algorithm choosen to fit the model
281-
#' @return A fitted k-means model
282+
#' @return \code{spark.kmeans} returns a fitted k-means model
282283
#' @rdname spark.kmeans
284+
#' @name spark.kmeans
283285
#' @export
284286
#' @examples
285287
#' \dontrun{
286-
#' model <- spark.kmeans(data, ~ ., k = 4, initMode = "random")
288+
#' sparkR.session()
289+
#' data(iris)
290+
#' df <- createDataFrame(iris)
291+
#' model <- spark.kmeans(df, Sepal_Length ~ Sepal_Width, k = 4, initMode = "random")
292+
#' summary(model)
293+
#'
294+
#' # fitted values on training data
295+
#' fitted <- predict(model, df)
296+
#' head(select(fitted, "Sepal_Length", "prediction"))
297+
#'
298+
#' # save fitted model to input path
299+
#' path <- "path/to/model"
300+
#' write.ml(model, path)
301+
#'
302+
#' # can also read back the saved model and print
303+
#' savedModel <- read.ml(path)
304+
#' summary(savedModel)
287305
#' }
288306
#' @note spark.kmeans since 2.0.0
307+
#' @seealso \link{predict}, \link{read.ml}, \link{write.ml}
289308
setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"),
290309
function(data, formula, k = 2, maxIter = 20, initMode = c("k-means||", "random")) {
291310
formula <- paste(deparse(formula), collapse = "")
@@ -301,7 +320,7 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"
301320
#' Note: A saved-loaded model does not support this method.
302321
#'
303322
#' @param object A fitted k-means model
304-
#' @return SparkDataFrame containing fitted values
323+
#' @return \code{fitted} returns a SparkDataFrame containing fitted values
305324
#' @rdname fitted
306325
#' @export
307326
#' @examples
@@ -323,20 +342,12 @@ setMethod("fitted", signature(object = "KMeansModel"),
323342
}
324343
})
325344

326-
#' Get the summary of a k-means model
327-
#'
328-
#' Returns the summary of a k-means model produced by spark.kmeans(),
329-
#' similarly to R's summary().
345+
# Get the summary of a k-means model
330346
#'
331-
#' @param object a fitted k-means model
332-
#' @return the model's coefficients, size and cluster
333-
#' @rdname summary
347+
#' @param object A fitted k-means model
348+
#' @return \code{summary} returns the model's coefficients, size and cluster
349+
#' @rdname spark.kmeans
334350
#' @export
335-
#' @examples
336-
#' \dontrun{
337-
#' model <- spark.kmeans(trainingData, ~ ., 2)
338-
#' summary(model)
339-
#' }
340351
#' @note summary(KMeansModel) since 2.0.0
341352
setMethod("summary", signature(object = "KMeansModel"),
342353
function(object, ...) {
@@ -358,19 +369,11 @@ setMethod("summary", signature(object = "KMeansModel"),
358369
cluster = cluster, is.loaded = is.loaded))
359370
})
360371

361-
#' Predicted values based on model
362-
#'
363-
#' Makes predictions from a k-means model or a model produced by spark.kmeans().
372+
# Predicted values based on a k-means model
364373
#'
365-
#' @param object A fitted k-means model
366-
#' @rdname predict
374+
#' @return \code{predict} returns the predicted values based on a k-means model
375+
#' @rdname spark.kmeans
367376
#' @export
368-
#' @examples
369-
#' \dontrun{
370-
#' model <- spark.kmeans(trainingData, ~ ., 2)
371-
#' predicted <- predict(model, testData)
372-
#' showDF(predicted)
373-
#' }
374377
#' @note predict(KMeansModel) since 2.0.0
375378
setMethod("predict", signature(object = "KMeansModel"),
376379
function(object, newData) {
@@ -477,24 +480,15 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
477480
invisible(callJMethod(writer, "save", path))
478481
})
479482

480-
#' Save fitted MLlib model to the input path
481-
#'
482-
#' Save the k-means model to the input path.
483+
# Save fitted MLlib model to the input path
483484
#'
484-
#' @param object A fitted k-means model
485485
#' @param path The directory where the model is saved
486486
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
487487
#' which means throw exception if the output path exists.
488488
#'
489-
#' @rdname write.ml
489+
#' @rdname spark.kmeans
490490
#' @name write.ml
491491
#' @export
492-
#' @examples
493-
#' \dontrun{
494-
#' model <- spark.kmeans(trainingData, ~ ., k = 2)
495-
#' path <- "path/to/model"
496-
#' write.ml(model, path)
497-
#' }
498492
#' @note write.ml(KMeansModel, character) since 2.0.0
499493
setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
500494
function(object, path, overwrite = FALSE) {

0 commit comments

Comments
 (0)