doc

apache · felixcheung · Jan 30, 2017 · Jan 30, 2017 · Jan 30, 2017 · Feb 1, 2017
commit 3593794710423c2699188c5d22f34f3eb79cd43e
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -683,7 +683,8 @@ setMethod("storageLevel",
 #' Returns a new SparkDataFrame that has exactly \code{numPartitions} partitions.
 #' This operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100
 #' partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of
-#' the current partitions.
+#' the current partitions. If a larger number of partitions is requested, it will stay at the
+#' current number of partitions.
 #'
 #' @param numPartitions the number of partitions to use.
 #'

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2491,7 +2491,7 @@ test_that("repartition by columns on DataFrame", {
     ("Please, specify the number of partitions and/or a column\\(s\\)", retError), TRUE)
 
   # repartition by column and number of partitions
-  actual <- repartition(df, 3L, col = df$"a")
+  actual <- repartition(df, 3, col = df$"a")
 
   # Checking that at least the dimensions are identical
   expect_identical(dim(df), dim(actual))
@@ -2502,6 +2502,7 @@ test_that("repartition by columns on DataFrame", {
   expect_identical(dim(df), dim(actual))
   expect_equal(getNumPartitions(actual), 13L)
 
+  expect_equal(getNumPartitions(coalesce(actual, 14)), 13L)
   expect_equal(getNumPartitions(coalesce(actual, 1L)), 1L)
 
   # a test case with a column and dapply

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -423,7 +423,8 @@ abstract class RDD[T: ClassTag](
    *
    * This results in a narrow dependency, e.g. if you go from 1000 partitions
    * to 100 partitions, there will not be a shuffle, instead each of the 100
-   * new partitions will claim 10 of the current partitions.
+   * new partitions will claim 10 of the current partitions. If a larger number
+   * of partitions is requested, it will stay at the current number of partitions.
    *
    * However, if you're doing a drastic coalesce, e.g. to numPartitions = 1,
    * this may result in your computation taking place on fewer nodes than

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
@@ -515,7 +515,8 @@ def coalesce(self, numPartitions):
         Similar to coalesce defined on an :class:`RDD`, this operation results in a
         narrow dependency, e.g. if you go from 1000 partitions to 100 partitions,
         there will not be a shuffle, instead each of the 100 new partitions will
-        claim 10 of the current partitions.
+        claim 10 of the current partitions. If a larger number of partitions is requested,
+        it will stay at the current number of partitions.
 
         >>> df.coalesce(1).rdd.getNumPartitions()
         1

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2432,7 +2432,8 @@ class Dataset[T] private[sql](
    * Returns a new Dataset that has exactly `numPartitions` partitions.
    * Similar to coalesce defined on an `RDD`, this operation results in a narrow dependency, e.g.
    * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of
-   * the 100 new partitions will claim 10 of the current partitions.
+   * the 100 new partitions will claim 10 of the current partitions.  If a larger number of
+   * partitions is requested, it will stay at the current number of partitions.
    *
    * @group typedrel
    * @since 1.6.0

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -541,7 +541,8 @@ case class UnionExec(children: Seq[SparkPlan]) extends SparkPlan {
  * Physical plan for returning a new RDD that has exactly `numPartitions` partitions.
  * Similar to coalesce defined on an [[RDD]], this operation results in a narrow dependency, e.g.
  * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of
- * the 100 new partitions will claim 10 of the current partitions.
+ * the 100 new partitions will claim 10 of the current partitions.  If a larger number of partitions
+ * is requested, it will stay at the current number of partitions.
  */
 case class CoalesceExec(numPartitions: Int, child: SparkPlan) extends UnaryExecNode {
   override def output: Seq[Attribute] = child.output