Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -2152,7 +2152,7 @@ setMethod("with",
})

#' Returns the column types of a DataFrame.
#'
#'
#' @name coltypes
#' @title Get column types of a DataFrame
#' @family dataframe_funcs
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -2204,7 +2204,7 @@ setMethod("denseRank",
#' @export
#' @examples \dontrun{lag(df$c)}
setMethod("lag",
signature(x = "characterOrColumn", offset = "numeric", defaultValue = "ANY"),
signature(x = "characterOrColumn"),
function(x, offset, defaultValue = NULL) {
col <- if (class(x) == "Column") {
x@jc
Expand Down
4 changes: 2 additions & 2 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ setGeneric("showDF", function(x,...) { standardGeneric("showDF") })

# @rdname subset
# @export
setGeneric("subset", function(x, subset, select, ...) { standardGeneric("subset") })
setGeneric("subset", function(x, ...) { standardGeneric("subset") })

#' @rdname agg
#' @export
Expand Down Expand Up @@ -790,7 +790,7 @@ setGeneric("kurtosis", function(x) { standardGeneric("kurtosis") })

#' @rdname lag
#' @export
setGeneric("lag", function(x, offset, defaultValue = NULL) { standardGeneric("lag") })
setGeneric("lag", function(x, ...) { standardGeneric("lag") })

#' @rdname last
#' @export
Expand Down
5 changes: 5 additions & 0 deletions R/pkg/inst/tests/test_mllib.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ test_that("glm and predict", {
model <- glm(Sepal_Width ~ Sepal_Length, training, family = "gaussian")
prediction <- predict(model, test)
expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")

# Test stats::predict is working
x <- rnorm(15)
y <- x + rnorm(15)
expect_equal(length(predict(lm(y ~ x))), 15)
})

test_that("glm should work with long formula", {
Expand Down
33 changes: 32 additions & 1 deletion R/pkg/inst/tests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,10 @@ test_that("table() returns a new DataFrame", {
expect_is(tabledf, "DataFrame")
expect_equal(count(tabledf), 3)
dropTempTable(sqlContext, "table1")

# Test base::table is working
#a <- letters[1:3]
#expect_equal(class(table(a, sample(a))), "table")
})

test_that("toRDD() returns an RRDD", {
Expand Down Expand Up @@ -673,6 +677,9 @@ test_that("sample on a DataFrame", {
# Also test sample_frac
sampled3 <- sample_frac(df, FALSE, 0.1, 0) # set seed for predictable result
expect_true(count(sampled3) < 3)

# Test base::sample is working
#expect_equal(length(sample(1:12)), 12)
})

test_that("select operators", {
Expand Down Expand Up @@ -753,6 +760,9 @@ test_that("subsetting", {
df6 <- subset(df, df$age %in% c(30), c(1,2))
expect_equal(count(df6), 1)
expect_equal(columns(df6), c("name", "age"))

# Test base::subset is working
expect_equal(nrow(subset(airquality, Temp > 80, select = c(Ozone, Temp))), 68)
})

test_that("selectExpr() on a DataFrame", {
Expand Down Expand Up @@ -888,6 +898,9 @@ test_that("column functions", {
expect_equal(result, list(list(3L, 2L, 1L), list(6L, 5L, 4L)))
result <- collect(select(df, sort_array(df[[1]])))[[1]]
expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))

# Test that stats::lag is working
expect_equal(length(lag(ldeaths, 12)), 72)
})
#
test_that("column binary mathfunctions", {
Expand Down Expand Up @@ -1086,7 +1099,7 @@ test_that("group by, agg functions", {
gd3_local <- collect(agg(gd3, var(df8$age)))
expect_equal(162, gd3_local[gd3_local$name == "Justin",][1, 2])

# make sure base:: or stats::sd, var are working
# Test stats::sd, stats::var are working
expect_true(abs(sd(1:2) - 0.7071068) < 1e-6)
expect_true(abs(var(1:5, 1:5) - 2.5) < 1e-6)

Expand Down Expand Up @@ -1138,6 +1151,9 @@ test_that("filter() on a DataFrame", {
expect_equal(count(filtered5), 1)
filtered6 <- where(df, df$age %in% c(19, 30))
expect_equal(count(filtered6), 2)

# Test stats::filter is working
#expect_true(is.ts(filter(1:100, rep(1, 3))))
})

test_that("join() and merge() on a DataFrame", {
Expand Down Expand Up @@ -1284,6 +1300,12 @@ test_that("unionAll(), rbind(), except(), and intersect() on a DataFrame", {
expect_is(unioned, "DataFrame")
expect_equal(count(intersected), 1)
expect_equal(first(intersected)$name, "Andy")

# Test base::rbind is working
expect_equal(length(rbind(1:4, c = 2, a = 10, 10, deparse.level = 0)), 16)

# Test base::intersect is working
expect_equal(length(intersect(1:20, 3:23)), 18)
})

test_that("withColumn() and withColumnRenamed()", {
Expand Down Expand Up @@ -1365,6 +1387,9 @@ test_that("describe() and summarize() on a DataFrame", {
stats2 <- summary(df)
expect_equal(collect(stats2)[4, "name"], "Andy")
expect_equal(collect(stats2)[5, "age"], "30")

# Test base::summary is working
expect_equal(length(summary(attenu, digits = 4)), 35)
})

test_that("dropna() and na.omit() on a DataFrame", {
Expand Down Expand Up @@ -1448,6 +1473,9 @@ test_that("dropna() and na.omit() on a DataFrame", {
expect_identical(expected, actual)
actual <- collect(na.omit(df, minNonNulls = 3, cols = c("name", "age", "height")))
expect_identical(expected, actual)

# Test stats::na.omit is working
expect_equal(nrow(na.omit(data.frame(x = c(0, 10, NA)))), 2)
})

test_that("fillna() on a DataFrame", {
Expand Down Expand Up @@ -1510,6 +1538,9 @@ test_that("cov() and corr() on a DataFrame", {
expect_true(abs(result - 1.0) < 1e-12)
result <- corr(df, "singles", "doubles", "pearson")
expect_true(abs(result - 1.0) < 1e-12)

# Test stats::cov is working
#expect_true(abs(max(cov(swiss)) - 1739.295) < 1e-3)
})

test_that("freqItems() on a DataFrame", {
Expand Down
37 changes: 36 additions & 1 deletion docs/sparkr.md
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ head(teenagers)

# Machine Learning

SparkR allows the fitting of generalized linear models over DataFrames using the [glm()](api/R/glm.html) function. Under the hood, SparkR uses MLlib to train a model of the specified family. Currently the gaussian and binomial families are supported. We support a subset of the available R formula operators for model fitting, including '~', '.', ':', '+', and '-'.
SparkR allows the fitting of generalized linear models over DataFrames using the [glm()](api/R/glm.html) function. Under the hood, SparkR uses MLlib to train a model of the specified family. Currently the gaussian and binomial families are supported. We support a subset of the available R formula operators for model fitting, including '~', '.', ':', '+', and '-'.

The [summary()](api/R/summary.html) function gives the summary of a model produced by [glm()](api/R/glm.html).

Expand Down Expand Up @@ -351,3 +351,38 @@ summary(model)
##Sepal_Width 0.404655
{% endhighlight %}
</div>

# R Function Name Conflicts

When loading and attaching a new package in R, it is possible to have a name [conflict](https://stat.ethz.ch/R-manual/R-devel/library/base/html/library.html), where a
function is masking another function.

The following functions are masked by the SparkR package:

<table class="table">
<tr><th>Masked function</th><th>How to Access</th></tr>
<tr>
<td><code>cov</code> in <code>package:stats</code></td>
<td><code><pre>stats::cov(x, y = NULL, use = "everything",
method = c("pearson", "kendall", "spearman"))</pre></code></td>
</tr>
<tr>
<td><code>filter</code> in <code>package:stats</code></td>
<td><code><pre>stats::filter(x, filter, method = c("convolution", "recursive"),
sides = 2, circular = FALSE, init)</pre></code></td>
</tr>
<tr>
<td><code>sample</code> in <code>package:base</code></td>
<td><code>base::sample(x, size, replace = FALSE, prob = NULL)</code></td>
</tr>
<tr>
<td><code>table</code> in <code>package:base</code></td>
<td><code><pre>base::table(...,
exclude = if (useNA == "no") c(NA, NaN),
useNA = c("no", "ifany", "always"),
dnn = list.names(...), deparse.level = 1)</pre></code></td>
</tr>
</table>

You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-manual/R-devel/library/base/html/search.html)