Skip to content

Commit 1801e96

Browse files
committed
merge conflicts
2 parents 7bedeb6 + 21e1fc7 commit 1801e96

File tree

362 files changed

+10253
-2300
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

362 files changed

+10253
-2300
lines changed

R/pkg/NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,9 @@ exportMethods("%<=>%",
204204
"array_max",
205205
"array_min",
206206
"array_position",
207+
"array_repeat",
207208
"array_sort",
209+
"arrays_overlap",
208210
"asc",
209211
"ascii",
210212
"asin",
@@ -302,6 +304,7 @@ exportMethods("%<=>%",
302304
"lower",
303305
"lpad",
304306
"ltrim",
307+
"map_entries",
305308
"map_keys",
306309
"map_values",
307310
"max",

R/pkg/R/DataFrame.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,6 +2297,8 @@ setMethod("rename",
22972297

22982298
setClassUnion("characterOrColumn", c("character", "Column"))
22992299

2300+
setClassUnion("numericOrColumn", c("numeric", "Column"))
2301+
23002302
#' Arrange Rows by Variables
23012303
#'
23022304
#' Sort a SparkDataFrame by the specified column(s).

R/pkg/R/client.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack
6363
checkJavaVersion <- function() {
6464
javaBin <- "java"
6565
javaHome <- Sys.getenv("JAVA_HOME")
66-
javaReqs <- utils::packageDescription(utils::packageName(), fields=c("SystemRequirements"))
66+
javaReqs <- utils::packageDescription(utils::packageName(), fields = c("SystemRequirements"))
6767
sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 1L))
6868
if (javaHome != "") {
6969
javaBin <- file.path(javaHome, "bin", javaBin)
@@ -82,15 +82,16 @@ checkJavaVersion <- function() {
8282
})
8383
javaVersionFilter <- Filter(
8484
function(x) {
85-
grepl("java version", x)
85+
grepl(" version", x)
8686
}, javaVersionOut)
8787

8888
javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2]
8989
# javaVersionStr is of the form 1.8.0_92.
9090
# Extract 8 from it to compare to sparkJavaVersion
9191
javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2])
9292
if (javaVersionNum != sparkJavaVersion) {
93-
stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", javaVersionStr))
93+
stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:",
94+
javaVersionStr))
9495
}
9596
}
9697

R/pkg/R/functions.R

Lines changed: 69 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ NULL
189189
#' the map or array of maps.
190190
#' \item \code{from_json}: it is the column containing the JSON string.
191191
#' }
192+
#' @param y Column to compute on.
192193
#' @param value A value to compute on.
193194
#' \itemize{
194195
#' \item \code{array_contains}: a value to be checked if contained in the column.
@@ -207,18 +208,20 @@ NULL
207208
#' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
208209
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
209210
#' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
210-
#' head(select(tmp, array_position(tmp$v1, 21), array_sort(tmp$v1)))
211-
#' head(select(tmp, flatten(tmp$v1)))
211+
#' head(select(tmp, array_position(tmp$v1, 21), array_repeat(df$mpg, 3), array_sort(tmp$v1)))
212+
#' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1)))
212213
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
213214
#' head(tmp2)
214215
#' head(select(tmp, posexplode(tmp$v1)))
215216
#' head(select(tmp, slice(tmp$v1, 2L, 2L)))
216217
#' head(select(tmp, sort_array(tmp$v1)))
217218
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
218219
#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
219-
#' head(select(tmp3, map_keys(tmp3$v3)))
220-
#' head(select(tmp3, map_values(tmp3$v3)))
221-
#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))}
220+
#' head(select(tmp3, map_entries(tmp3$v3), map_keys(tmp3$v3), map_values(tmp3$v3)))
221+
#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
222+
#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp))
223+
#' head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5)))
224+
#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))}
222225
NULL
223226

224227
#' Window functions for Column operations
@@ -1260,9 +1263,9 @@ setMethod("quarter",
12601263
})
12611264

12621265
#' @details
1263-
#' \code{reverse}: Reverses the string column and returns it as a new string column.
1266+
#' \code{reverse}: Returns a reversed string or an array with reverse order of elements.
12641267
#'
1265-
#' @rdname column_string_functions
1268+
#' @rdname column_collection_functions
12661269
#' @aliases reverse reverse,Column-method
12671270
#' @note reverse since 1.5.0
12681271
setMethod("reverse",
@@ -2055,20 +2058,10 @@ setMethod("countDistinct",
20552058

20562059
#' @details
20572060
#' \code{concat}: Concatenates multiple input columns together into a single column.
2058-
#' If all inputs are binary, concat returns an output as binary. Otherwise, it returns as string.
2061+
#' The function works with strings, binary and compatible array columns.
20592062
#'
2060-
#' @rdname column_string_functions
2063+
#' @rdname column_collection_functions
20612064
#' @aliases concat concat,Column-method
2062-
#' @examples
2063-
#'
2064-
#' \dontrun{
2065-
#' # concatenate strings
2066-
#' tmp <- mutate(df, s1 = concat(df$Class, df$Sex),
2067-
#' s2 = concat(df$Class, df$Sex, df$Age),
2068-
#' s3 = concat(df$Class, df$Sex, df$Age, df$Class),
2069-
#' s4 = concat_ws("_", df$Class, df$Sex),
2070-
#' s5 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
2071-
#' head(tmp)}
20722065
#' @note concat since 1.5.0
20732066
setMethod("concat",
20742067
signature(x = "Column"),
@@ -2409,6 +2402,13 @@ setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
24092402
#' @param sep separator to use.
24102403
#' @rdname column_string_functions
24112404
#' @aliases concat_ws concat_ws,character,Column-method
2405+
#' @examples
2406+
#'
2407+
#' \dontrun{
2408+
#' # concatenate strings
2409+
#' tmp <- mutate(df, s1 = concat_ws("_", df$Class, df$Sex),
2410+
#' s2 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
2411+
#' head(tmp)}
24122412
#' @note concat_ws since 1.5.0
24132413
setMethod("concat_ws", signature(sep = "character", x = "Column"),
24142414
function(sep, x, ...) {
@@ -3048,6 +3048,26 @@ setMethod("array_position",
30483048
column(jc)
30493049
})
30503050

3051+
#' @details
3052+
#' \code{array_repeat}: Creates an array containing \code{x} repeated the number of times
3053+
#' given by \code{count}.
3054+
#'
3055+
#' @param count a Column or constant determining the number of repetitions.
3056+
#' @rdname column_collection_functions
3057+
#' @aliases array_repeat array_repeat,Column,numericOrColumn-method
3058+
#' @note array_repeat since 2.4.0
3059+
setMethod("array_repeat",
3060+
signature(x = "Column", count = "numericOrColumn"),
3061+
function(x, count) {
3062+
if (class(count) == "Column") {
3063+
count <- count@jc
3064+
} else {
3065+
count <- as.integer(count)
3066+
}
3067+
jc <- callJStatic("org.apache.spark.sql.functions", "array_repeat", x@jc, count)
3068+
column(jc)
3069+
})
3070+
30513071
#' @details
30523072
#' \code{array_sort}: Sorts the input array in ascending order. The elements of the input array
30533073
#' must be orderable. NA elements will be placed at the end of the returned array.
@@ -3063,7 +3083,23 @@ setMethod("array_sort",
30633083
})
30643084

30653085
#' @details
3066-
#' \code{flatten}: Transforms an array of arrays into a single array.
3086+
#' \code{arrays_overlap}: Returns true if the input arrays have at least one non-null element in
3087+
#' common. If not and both arrays are non-empty and any of them contains a null, it returns null.
3088+
#' It returns false otherwise.
3089+
#'
3090+
#' @rdname column_collection_functions
3091+
#' @aliases arrays_overlap arrays_overlap,Column-method
3092+
#' @note arrays_overlap since 2.4.0
3093+
setMethod("arrays_overlap",
3094+
signature(x = "Column", y = "Column"),
3095+
function(x, y) {
3096+
jc <- callJStatic("org.apache.spark.sql.functions", "arrays_overlap", x@jc, y@jc)
3097+
column(jc)
3098+
})
3099+
3100+
#' @details
3101+
#' \code{flatten}: Creates a single array from an array of arrays.
3102+
#' If a structure of nested arrays is deeper than two levels, only one level of nesting is removed.
30673103
#'
30683104
#' @rdname column_collection_functions
30693105
#' @aliases flatten flatten,Column-method
@@ -3075,6 +3111,19 @@ setMethod("flatten",
30753111
column(jc)
30763112
})
30773113

3114+
#' @details
3115+
#' \code{map_entries}: Returns an unordered array of all entries in the given map.
3116+
#'
3117+
#' @rdname column_collection_functions
3118+
#' @aliases map_entries map_entries,Column-method
3119+
#' @note map_entries since 2.4.0
3120+
setMethod("map_entries",
3121+
signature(x = "Column"),
3122+
function(x) {
3123+
jc <- callJStatic("org.apache.spark.sql.functions", "map_entries", x@jc)
3124+
column(jc)
3125+
})
3126+
30783127
#' @details
30793128
#' \code{map_keys}: Returns an unordered array containing the keys of the map.
30803129
#'

R/pkg/R/generics.R

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ setGeneric("summarize", function(x, ...) { standardGeneric("summarize") })
624624
#' @rdname summary
625625
setGeneric("summary", function(object, ...) { standardGeneric("summary") })
626626

627-
setGeneric("toJSON", function(x) { standardGeneric("toJSON") })
627+
setGeneric("toJSON", function(x, ...) { standardGeneric("toJSON") })
628628

629629
setGeneric("toRDD", function(x) { standardGeneric("toRDD") })
630630

@@ -769,10 +769,18 @@ setGeneric("array_min", function(x) { standardGeneric("array_min") })
769769
#' @name NULL
770770
setGeneric("array_position", function(x, value) { standardGeneric("array_position") })
771771

772+
#' @rdname column_collection_functions
773+
#' @name NULL
774+
setGeneric("array_repeat", function(x, count) { standardGeneric("array_repeat") })
775+
772776
#' @rdname column_collection_functions
773777
#' @name NULL
774778
setGeneric("array_sort", function(x) { standardGeneric("array_sort") })
775779

780+
#' @rdname column_collection_functions
781+
#' @name NULL
782+
setGeneric("arrays_overlap", function(x, y) { standardGeneric("arrays_overlap") })
783+
776784
#' @rdname column_string_functions
777785
#' @name NULL
778786
setGeneric("ascii", function(x) { standardGeneric("ascii") })
@@ -817,7 +825,7 @@ setGeneric("collect_set", function(x) { standardGeneric("collect_set") })
817825
#' @rdname column
818826
setGeneric("column", function(x) { standardGeneric("column") })
819827

820-
#' @rdname column_string_functions
828+
#' @rdname column_collection_functions
821829
#' @name NULL
822830
setGeneric("concat", function(x, ...) { standardGeneric("concat") })
823831

@@ -1034,6 +1042,10 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
10341042
#' @name NULL
10351043
setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") })
10361044

1045+
#' @rdname column_collection_functions
1046+
#' @name NULL
1047+
setGeneric("map_entries", function(x) { standardGeneric("map_entries") })
1048+
10371049
#' @rdname column_collection_functions
10381050
#' @name NULL
10391051
setGeneric("map_keys", function(x) { standardGeneric("map_keys") })
@@ -1134,7 +1146,7 @@ setGeneric("regexp_replace",
11341146
#' @name NULL
11351147
setGeneric("repeat_string", function(x, n) { standardGeneric("repeat_string") })
11361148

1137-
#' @rdname column_string_functions
1149+
#' @rdname column_collection_functions
11381150
#' @name NULL
11391151
setGeneric("reverse", function(x) { standardGeneric("reverse") })
11401152

R/pkg/R/sparkR.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ sparkR.sparkContext <- function(
194194

195195
# Don't use readString() so that we can provide a useful
196196
# error message if the R and Java versions are mismatched.
197-
authSecretLen = readInt(f)
197+
authSecretLen <- readInt(f)
198198
if (length(authSecretLen) == 0 || authSecretLen == 0) {
199199
stop("Unexpected EOF in JVM connection data. Mismatched versions?")
200200
}

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,7 +1479,7 @@ test_that("column functions", {
14791479
df5 <- createDataFrame(list(list(a = "010101")))
14801480
expect_equal(collect(select(df5, conv(df5$a, 2, 16)))[1, 1], "15")
14811481

1482-
# Test array_contains(), array_max(), array_min(), array_position() and element_at()
1482+
# Test array_contains(), array_max(), array_min(), array_position(), element_at() and reverse()
14831483
df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
14841484
result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
14851485
expect_equal(result, c(TRUE, FALSE))
@@ -1496,6 +1496,28 @@ test_that("column functions", {
14961496
result <- collect(select(df, element_at(df[[1]], 1L)))[[1]]
14971497
expect_equal(result, c(1, 6))
14981498

1499+
result <- collect(select(df, reverse(df[[1]])))[[1]]
1500+
expect_equal(result, list(list(3L, 2L, 1L), list(4L, 5L, 6L)))
1501+
1502+
df2 <- createDataFrame(list(list("abc")))
1503+
result <- collect(select(df2, reverse(df2[[1]])))[[1]]
1504+
expect_equal(result, "cba")
1505+
1506+
# Test array_repeat()
1507+
df <- createDataFrame(list(list("a", 3L), list("b", 2L)))
1508+
result <- collect(select(df, array_repeat(df[[1]], df[[2]])))[[1]]
1509+
expect_equal(result, list(list("a", "a", "a"), list("b", "b")))
1510+
1511+
result <- collect(select(df, array_repeat(df[[1]], 2L)))[[1]]
1512+
expect_equal(result, list(list("a", "a"), list("b", "b")))
1513+
1514+
# Test arrays_overlap()
1515+
df <- createDataFrame(list(list(list(1L, 2L), list(3L, 1L)),
1516+
list(list(1L, 2L), list(3L, 4L)),
1517+
list(list(1L, NA), list(3L, 4L))))
1518+
result <- collect(select(df, arrays_overlap(df[[1]], df[[2]])))[[1]]
1519+
expect_equal(result, c(TRUE, FALSE, NA))
1520+
14991521
# Test array_sort() and sort_array()
15001522
df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))
15011523

@@ -1512,14 +1534,25 @@ test_that("column functions", {
15121534
result <- collect(select(df, slice(df[[1]], 2L, 2L)))[[1]]
15131535
expect_equal(result, list(list(2L, 3L), list(5L)))
15141536

1515-
# Test flattern
1537+
# Test concat()
1538+
df <- createDataFrame(list(list(list(1L, 2L, 3L), list(4L, 5L, 6L)),
1539+
list(list(7L, 8L, 9L), list(10L, 11L, 12L))))
1540+
result <- collect(select(df, concat(df[[1]], df[[2]])))[[1]]
1541+
expect_equal(result, list(list(1L, 2L, 3L, 4L, 5L, 6L), list(7L, 8L, 9L, 10L, 11L, 12L)))
1542+
1543+
# Test flatten()
15161544
df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
15171545
list(list(list(5L, 6L), list(7L, 8L)))))
15181546
result <- collect(select(df, flatten(df[[1]])))[[1]]
15191547
expect_equal(result, list(list(1L, 2L, 3L, 4L), list(5L, 6L, 7L, 8L)))
15201548

1521-
# Test map_keys(), map_values() and element_at()
1549+
# Test map_entries(), map_keys(), map_values() and element_at()
15221550
df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
1551+
result <- collect(select(df, map_entries(df$map)))[[1]]
1552+
expected_entries <- list(listToStruct(list(key = "x", value = 1)),
1553+
listToStruct(list(key = "y", value = 2)))
1554+
expect_equal(result, list(expected_entries))
1555+
15231556
result <- collect(select(df, map_keys(df$map)))[[1]]
15241557
expect_equal(result, list(list("x", "y")))
15251558

core/src/main/scala/org/apache/spark/SSLOptions.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ private[spark] case class SSLOptions(
128128
}
129129

130130
/** Returns a string representation of this SSLOptions with all the passwords masked. */
131-
override def toString: String = s"SSLOptions{enabled=$enabled, " +
131+
override def toString: String = s"SSLOptions{enabled=$enabled, port=$port, " +
132132
s"keyStore=$keyStore, keyStorePassword=${keyStorePassword.map(_ => "xxx")}, " +
133133
s"trustStore=$trustStore, trustStorePassword=${trustStorePassword.map(_ => "xxx")}, " +
134134
s"protocol=$protocol, enabledAlgorithms=$enabledAlgorithms}"
@@ -142,6 +142,7 @@ private[spark] object SSLOptions extends Logging {
142142
*
143143
* The following settings are allowed:
144144
* $ - `[ns].enabled` - `true` or `false`, to enable or disable SSL respectively
145+
* $ - `[ns].port` - the port where to bind the SSL server
145146
* $ - `[ns].keyStore` - a path to the key-store file; can be relative to the current directory
146147
* $ - `[ns].keyStorePassword` - a password to the key-store file
147148
* $ - `[ns].keyPassword` - a password to the private key

0 commit comments

Comments
 (0)