@@ -986,10 +986,10 @@ setMethod("unique",
986986# ' @param x A SparkDataFrame
987987# ' @param withReplacement Sampling with replacement or not
988988# ' @param fraction The (rough) sample target fraction
989- # ' @param seed Randomness seed value
989+ # ' @param seed Randomness seed value. Default is a random seed.
990990# '
991991# ' @family SparkDataFrame functions
992- # ' @aliases sample,SparkDataFrame,logical,numeric -method
992+ # ' @aliases sample,SparkDataFrame-method
993993# ' @rdname sample
994994# ' @name sample
995995# ' @export
@@ -998,33 +998,47 @@ setMethod("unique",
998998# ' sparkR.session()
999999# ' path <- "path/to/file.json"
10001000# ' df <- read.json(path)
1001+ # ' collect(sample(df, fraction = 0.5))
10011002# ' collect(sample(df, FALSE, 0.5))
1002- # ' collect(sample(df, TRUE, 0.5))
1003+ # ' collect(sample(df, TRUE, 0.5, seed = 3 ))
10031004# '}
10041005# ' @note sample since 1.4.0
10051006setMethod ("sample ",
1006- signature(x = " SparkDataFrame" , withReplacement = " logical" ,
1007- fraction = " numeric" ),
1008- function (x , withReplacement , fraction , seed ) {
1009- if (fraction < 0.0 ) stop(cat(" Negative fraction value:" , fraction ))
1007+ signature(x = " SparkDataFrame" ),
1008+ function (x , withReplacement = FALSE , fraction , seed ) {
1009+ if (! is.numeric(fraction )) {
1010+ stop(paste(" fraction must be numeric; however, got" , class(fraction )))
1011+ }
1012+ if (! is.logical(withReplacement )) {
1013+ stop(paste(" withReplacement must be logical; however, got" , class(withReplacement )))
1014+ }
1015+
10101016 if (! missing(seed )) {
1017+ if (is.null(seed )) {
1018+ stop(" seed must not be NULL or NA; however, got NULL" )
1019+ }
1020+ if (is.na(seed )) {
1021+ stop(" seed must not be NULL or NA; however, got NA" )
1022+ }
1023+
10111024 # TODO : Figure out how to send integer as java.lang.Long to JVM so
10121025 # we can send seed as an argument through callJMethod
1013- sdf <- callJMethod(x @ sdf , " sample" , withReplacement , fraction , as.integer(seed ))
1026+ sdf <- handledCallJMethod(x @ sdf , " sample" , as.logical(withReplacement ),
1027+ as.numeric(fraction ), as.integer(seed ))
10141028 } else {
1015- sdf <- callJMethod(x @ sdf , " sample" , withReplacement , fraction )
1029+ sdf <- handledCallJMethod(x @ sdf , " sample" ,
1030+ as.logical(withReplacement ), as.numeric(fraction ))
10161031 }
10171032 dataFrame(sdf )
10181033 })
10191034
10201035# ' @rdname sample
1021- # ' @aliases sample_frac,SparkDataFrame,logical,numeric -method
1036+ # ' @aliases sample_frac,SparkDataFrame-method
10221037# ' @name sample_frac
10231038# ' @note sample_frac since 1.4.0
10241039setMethod ("sample_frac ",
1025- signature(x = " SparkDataFrame" , withReplacement = " logical" ,
1026- fraction = " numeric" ),
1027- function (x , withReplacement , fraction , seed ) {
1040+ signature(x = " SparkDataFrame" ),
1041+ function (x , withReplacement = FALSE , fraction , seed ) {
10281042 sample(x , withReplacement , fraction , seed )
10291043 })
10301044
@@ -2683,7 +2697,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
26832697# ' @rdname union
26842698# ' @name union
26852699# ' @aliases union,SparkDataFrame,SparkDataFrame-method
2686- # ' @seealso \link{rbind}
2700+ # ' @seealso \link{rbind} \link{unionByName}
26872701# ' @export
26882702# ' @examples
26892703# '\dontrun{
@@ -2714,6 +2728,40 @@ setMethod("unionAll",
27142728 union(x , y )
27152729 })
27162730
2731+ # ' Return a new SparkDataFrame containing the union of rows, matched by column names
2732+ # '
2733+ # ' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame
2734+ # ' and another SparkDataFrame. This is different from \code{union} function, and both
2735+ # ' \code{UNION ALL} and \code{UNION DISTINCT} in SQL as column positions are not taken
2736+ # ' into account. Input SparkDataFrames can have different data types in the schema.
2737+ # '
2738+ # ' Note: This does not remove duplicate rows across the two SparkDataFrames.
2739+ # ' This function resolves columns by name (not by position).
2740+ # '
2741+ # ' @param x A SparkDataFrame
2742+ # ' @param y A SparkDataFrame
2743+ # ' @return A SparkDataFrame containing the result of the union.
2744+ # ' @family SparkDataFrame functions
2745+ # ' @rdname unionByName
2746+ # ' @name unionByName
2747+ # ' @aliases unionByName,SparkDataFrame,SparkDataFrame-method
2748+ # ' @seealso \link{rbind} \link{union}
2749+ # ' @export
2750+ # ' @examples
2751+ # '\dontrun{
2752+ # ' sparkR.session()
2753+ # ' df1 <- select(createDataFrame(mtcars), "carb", "am", "gear")
2754+ # ' df2 <- select(createDataFrame(mtcars), "am", "gear", "carb")
2755+ # ' head(unionByName(df1, df2))
2756+ # ' }
2757+ # ' @note unionByName since 2.3.0
2758+ setMethod ("unionByName ",
2759+ signature(x = " SparkDataFrame" , y = " SparkDataFrame" ),
2760+ function (x , y ) {
2761+ unioned <- callJMethod(x @ sdf , " unionByName" , y @ sdf )
2762+ dataFrame(unioned )
2763+ })
2764+
27172765# ' Union two or more SparkDataFrames
27182766# '
27192767# ' Union two or more SparkDataFrames by row. As in R's \code{rbind}, this method
@@ -2730,7 +2778,7 @@ setMethod("unionAll",
27302778# ' @aliases rbind,SparkDataFrame-method
27312779# ' @rdname rbind
27322780# ' @name rbind
2733- # ' @seealso \link{union}
2781+ # ' @seealso \link{union} \link{unionByName}
27342782# ' @export
27352783# ' @examples
27362784# '\dontrun{
0 commit comments