Skip to content

Commit 5f9ce73

Browse files
Sun Ruishivaram
authored andcommitted
[SPARK-8844] [SPARKR] head/collect is broken in SparkR.
This is a WIP patch for SPARK-8844 for collecting reviews. This bug is about reading an empty DataFrame. in readCol(), lapply(1:numRows, function(x) { does not take into consideration the case where numRows = 0. Will add unit test case. Author: Sun Rui <rui.sun@intel.com> Closes #7419 from sun-rui/SPARK-8844.
1 parent 182f9b7 commit 5f9ce73

File tree

2 files changed

+30
-6
lines changed

2 files changed

+30
-6
lines changed

R/pkg/R/deserialize.R

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,14 @@ readRow <- function(inputCon) {
176176

177177
# Take a single column as Array[Byte] and deserialize it into an atomic vector
178178
readCol <- function(inputCon, numRows) {
179-
# sapply can not work with POSIXlt
180-
do.call(c, lapply(1:numRows, function(x) {
181-
value <- readObject(inputCon)
182-
# Replace NULL with NA so we can coerce to vectors
183-
if (is.null(value)) NA else value
184-
}))
179+
if (numRows > 0) {
180+
# sapply can not work with POSIXlt
181+
do.call(c, lapply(1:numRows, function(x) {
182+
value <- readObject(inputCon)
183+
# Replace NULL with NA so we can coerce to vectors
184+
if (is.null(value)) NA else value
185+
}))
186+
} else {
187+
vector()
188+
}
185189
}

R/pkg/inst/tests/test_sparkSQL.R

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", {
408408
expect_equal(names(rdf)[1], "age")
409409
expect_equal(nrow(rdf), 3)
410410
expect_equal(ncol(rdf), 2)
411+
412+
# collect() returns data correctly from a DataFrame with 0 row
413+
df0 <- limit(df, 0)
414+
rdf <- collect(df0)
415+
expect_true(is.data.frame(rdf))
416+
expect_equal(names(rdf)[1], "age")
417+
expect_equal(nrow(rdf), 0)
418+
expect_equal(ncol(rdf), 2)
411419
})
412420

413421
test_that("limit() returns DataFrame with the correct number of rows", {
@@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", {
492500

493501
testFirst <- first(df)
494502
expect_equal(nrow(testFirst), 1)
503+
504+
# head() and first() return the correct data on
505+
# a DataFrame with 0 row
506+
df0 <- limit(df, 0)
507+
508+
testHead <- head(df0)
509+
expect_equal(nrow(testHead), 0)
510+
expect_equal(ncol(testHead), 2)
511+
512+
testFirst <- first(df0)
513+
expect_equal(nrow(testFirst), 0)
514+
expect_equal(ncol(testFirst), 2)
495515
})
496516

497517
test_that("distinct() and unique on DataFrames", {

0 commit comments

Comments
 (0)