Skip to content

Commit 184fcd8

Browse files
committed
Merge remote-tracking branch 'remotes/origin/master' into rebase-parquet-datetime
# Conflicts: # sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala # sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
2 parents 5b52735 + 57fcc49 commit 184fcd8

File tree

271 files changed

+4358
-4164
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

271 files changed

+4358
-4164
lines changed

R/pkg/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ exportMethods("%<=>%",
345345
"over",
346346
"overlay",
347347
"percent_rank",
348+
"percentile_approx",
348349
"pmod",
349350
"posexplode",
350351
"posexplode_outer",

R/pkg/R/functions.R

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,6 +1410,52 @@ setMethod("quarter",
14101410
column(jc)
14111411
})
14121412

1413+
#' @details
1414+
#' \code{percentile_approx} Returns the approximate percentile value of
1415+
#' numeric column at the given percentage.
1416+
#'
1417+
#' @param percentage Numeric percentage at which percentile should be computed
1418+
#' All values should be between 0 and 1.
1419+
#' If length equals to 1 resulting column is of type double,
1420+
#' otherwise, array type of double.
1421+
#' @param accuracy A positive numeric literal (default: 10000) which
1422+
#' controls approximation accuracy at the cost of memory.
1423+
#' Higher value of accuracy yields better accuracy, 1.0/accuracy
1424+
#' is the relative error of the approximation.
1425+
#'
1426+
#' @rdname column_aggregate_functions
1427+
#' @aliases percentile_approx percentile_approx,Column-method
1428+
#' @note percentile_approx since 3.1.0
1429+
setMethod("percentile_approx",
1430+
signature(x = "characterOrColumn", percentage = "numericOrColumn"),
1431+
function(x, percentage, accuracy = 10000) {
1432+
col <- if (class(x) == "Column") {
1433+
x@jc
1434+
} else {
1435+
column(x)@jc
1436+
}
1437+
1438+
percentage <- if (class(percentage) == "Column") {
1439+
percentage@jc
1440+
} else if (length(percentage) > 1) {
1441+
do.call(create_array, lapply(percentage, lit))@jc
1442+
} else {
1443+
lit(percentage)@jc
1444+
}
1445+
1446+
accuracy <- if (class(accuracy) == "Column") {
1447+
accuracy@jc
1448+
} else {
1449+
lit(as.integer(accuracy))@jc
1450+
}
1451+
1452+
jc <- callJStatic(
1453+
"org.apache.spark.sql.functions", "percentile_approx",
1454+
col, percentage, accuracy
1455+
)
1456+
column(jc)
1457+
})
1458+
14131459
#' @details
14141460
#' \code{reverse}: Returns a reversed string or an array with reverse order of elements.
14151461
#'

R/pkg/R/generics.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,11 @@ setGeneric("overlay", function(x, replace, pos, ...) { standardGeneric("overlay"
11891189
#' @name NULL
11901190
setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_rank") })
11911191

1192+
#' @rdname column_aggregate_functions
1193+
#' @name NULL
1194+
setGeneric("percentile_approx",
1195+
function(x, percentage, ...) { standardGeneric("percentile_approx") })
1196+
11921197
#' @rdname column_math_functions
11931198
#' @name NULL
11941199
setGeneric("pmod", function(y, x) { standardGeneric("pmod") })

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1772,6 +1772,28 @@ test_that("column functions", {
17721772
collect(select(df, alias(not(df$is_true), "is_false"))),
17731773
data.frame(is_false = c(FALSE, TRUE, NA))
17741774
)
1775+
1776+
# Test percentile_approx
1777+
actual <- lapply(
1778+
list(
1779+
percentile_approx(column("foo"), 0.5),
1780+
percentile_approx(column("bar"), lit(0.25), lit(42L)),
1781+
percentile_approx(column("bar"), c(0.25, 0.5, 0.75)),
1782+
percentile_approx(column("foo"), c(0.05, 0.95), 100L),
1783+
percentile_approx("foo", c(0.5)),
1784+
percentile_approx("bar", c(0.1, 0.9), 10L)),
1785+
function(x) SparkR:::callJMethod(x@jc, "toString"))
1786+
1787+
expected <- list(
1788+
"percentile_approx(foo, 0.5, 10000)",
1789+
"percentile_approx(bar, 0.25, 42)",
1790+
"percentile_approx(bar, array(0.25, 0.5, 0.75), 10000)",
1791+
"percentile_approx(foo, array(0.05, 0.95), 100)",
1792+
"percentile_approx(foo, 0.5, 10000)",
1793+
"percentile_approx(bar, array(0.1, 0.9), 10)"
1794+
)
1795+
1796+
expect_equal(actual, expected)
17751797
})
17761798

17771799
test_that("column binary mathfunctions", {

core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,7 @@ public final class BytesToBytesMap extends MemoryConsumer {
9696
* since that's the largest power-of-2 that's less than Integer.MAX_VALUE. We need two long array
9797
* entries per key, giving us a maximum capacity of (1 &lt;&lt; 29).
9898
*/
99-
@VisibleForTesting
100-
static final int MAX_CAPACITY = (1 << 29);
99+
public static final int MAX_CAPACITY = (1 << 29);
101100

102101
// This choice of page table size and page size means that we can address up to 500 gigabytes
103102
// of memory.

core/src/main/resources/org/apache/spark/ui/static/bootstrap.bundle.min.js

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/src/main/resources/org/apache/spark/ui/static/bootstrap.bundle.min.js.map

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/src/main/resources/org/apache/spark/ui/static/bootstrap.min.css

100755100644
Lines changed: 6 additions & 873 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/src/main/resources/org/apache/spark/ui/static/bootstrap.min.css.map

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)