apache
diff --git a/‎.github/PULL_REQUEST_TEMPLATE‎
Lines changed: 1 addition & 1 deletion b/‎.github/PULL_REQUEST_TEMPLATE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 2 additions & 2 deletions b/‎CONTRIBUTING.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎LICENSE-binary‎
Lines changed: 2 additions & 1 deletion b/‎LICENSE-binary‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎NOTICE-binary‎
Lines changed: 15 additions & 0 deletions b/‎NOTICE-binary‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎R/README.md‎
Lines changed: 4 additions & 4 deletions b/‎R/README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/WINDOWS.md‎
Lines changed: 6 additions & 6 deletions b/‎R/WINDOWS.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎R/pkg/R/DataFrame.R‎
Lines changed: 4 additions & 4 deletions b/‎R/pkg/R/DataFrame.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/pkg/R/SQLContext.R‎
Lines changed: 2 additions & 2 deletions b/‎R/pkg/R/SQLContext.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/pkg/R/group.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/group.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/mllib_classification.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/mllib_classification.R‎
Lines changed: 1 addition & 1 deletion
@@ -7,4 +7,4 @@
 (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
 (If this patch involves UI changes, please attach a screenshot; otherwise, remove this)
 
-Please review http://spark.apache.org/contributing.html before opening a pull request.
+Please review https://spark.apache.org/contributing.html before opening a pull request.
@@ -1,12 +1,12 @@
 ## Contributing to Spark
 
 *Before opening a pull request*, review the 
-[Contributing to Spark guide](http://spark.apache.org/contributing.html). 
+[Contributing to Spark guide](https://spark.apache.org/contributing.html). 
 It lists steps that are required before creating a PR. In particular, consider:
 
 - Is the change important and ready enough to ask the community to spend time reviewing?
 - Have you searched for existing, related JIRAs and pull requests?
-- Is this a new feature that can stand alone as a [third party project](http://spark.apache.org/third-party-projects.html) ?
+- Is this a new feature that can stand alone as a [third party project](https://spark.apache.org/third-party-projects.html) ?
 - Is the change being proposed clearly explained and motivated?
 
 When you contribute code, you affirm that the contribution is your original work and that you 
 
@@ -368,6 +368,8 @@ org.eclipse.jetty:jetty-servlets
 org.eclipse.jetty:jetty-util
 org.eclipse.jetty:jetty-webapp
 org.eclipse.jetty:jetty-xml
+org.scala-lang.modules:scala-xml_2.12
+org.opencypher:okapi-shade
 
 core/src/main/java/org/apache/spark/util/collection/TimSort.java
 core/src/main/resources/org/apache/spark/ui/static/bootstrap*
@@ -412,7 +414,6 @@ org.scala-lang:scala-compiler
 org.scala-lang:scala-library
 org.scala-lang:scala-reflect
 org.scala-lang.modules:scala-parser-combinators_2.12
-org.scala-lang.modules:scala-xml_2.12
 org.fusesource.leveldbjni:leveldbjni-all
 net.sourceforge.f2j:arpack_combined_all
 xmlenc:xmlenc
 
@@ -1163,3 +1163,18 @@ Copyright 2014 The Apache Software Foundation
 
 Apache Mahout (http://mahout.apache.org/)
 Copyright 2014 The Apache Software Foundation
+
+scala-xml
+Copyright (c) 2002-2019 EPFL
+Copyright (c) 2011-2019 Lightbend, Inc.
+
+scala-xml includes software developed at
+LAMP/EPFL (https://lamp.epfl.ch/) and
+Lightbend, Inc. (https://www.lightbend.com/).
+
+Licensed under the Apache License, Version 2.0 (the "License").
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
@@ -17,7 +17,7 @@ export R_HOME=/home/username/R
 
 #### Build Spark
 
-Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
+Build Spark with [Maven](https://spark.apache.org/docs/latest/building-spark.html#buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
 
 ```bash
 build/mvn -DskipTests -Psparkr package
@@ -35,15 +35,15 @@ SparkContext, you can run
 
     ./bin/sparkR --master "local[2]"
 
-To set other options like driver memory, executor memory etc. you can pass in the [spark-submit](http://spark.apache.org/docs/latest/submitting-applications.html) arguments to `./bin/sparkR`
+To set other options like driver memory, executor memory etc. you can pass in the [spark-submit](https://spark.apache.org/docs/latest/submitting-applications.html) arguments to `./bin/sparkR`
 
 #### Using SparkR from RStudio
 
 If you wish to use SparkR from RStudio, please refer [SparkR documentation](https://spark.apache.org/docs/latest/sparkr.html#starting-up-from-rstudio).
 
 #### Making changes to SparkR
 
-The [instructions](http://spark.apache.org/contributing.html) for making contributions to Spark also apply to SparkR.
+The [instructions](https://spark.apache.org/contributing.html) for making contributions to Spark also apply to SparkR.
 If you only make R file changes (i.e. no Scala changes) then you can just re-install the R package using `R/install-dev.sh` and test your changes.
 Once you have made your changes, please include unit tests for them and run existing unit tests using the `R/run-tests.sh` script as described below.
 
@@ -58,7 +58,7 @@ To run one of them, use `./bin/spark-submit <filename> <args>`. For example:
 ```bash
 ./bin/spark-submit examples/src/main/r/dataframe.R
 ```
-You can run R unit tests by following the instructions under [Running R Tests](http://spark.apache.org/docs/latest/building-spark.html#running-r-tests).
+You can run R unit tests by following the instructions under [Running R Tests](https://spark.apache.org/docs/latest/building-spark.html#running-r-tests).
 
 ### Running on YARN
 
 
@@ -20,19 +20,19 @@ license: |
 
 To build SparkR on Windows, the following steps are required
 
-1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
+1. Install R (>= 3.1) and [Rtools](https://cloud.r-project.org/bin/windows/Rtools/). Make sure to
 include Rtools and R in `PATH`. Note that support for R prior to version 3.4 is deprecated as of Spark 3.0.0.
 
 2. Install
-[JDK8](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) and set
+[JDK8](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) and set
 `JAVA_HOME` in the system environment variables.
 
-3. Download and install [Maven](http://maven.apache.org/download.html). Also include the `bin`
+3. Download and install [Maven](https://maven.apache.org/download.html). Also include the `bin`
 directory in Maven in `PATH`.
 
-4. Set `MAVEN_OPTS` as described in [Building Spark](http://spark.apache.org/docs/latest/building-spark.html).
+4. Set `MAVEN_OPTS` as described in [Building Spark](https://spark.apache.org/docs/latest/building-spark.html).
 
-5. Open a command shell (`cmd`) in the Spark directory and build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
+5. Open a command shell (`cmd`) in the Spark directory and build Spark with [Maven](https://spark.apache.org/docs/latest/building-spark.html#buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
 
     ```bash
     mvn.cmd -DskipTests -Psparkr package
@@ -52,7 +52,7 @@ To run the SparkR unit tests on Windows, the following steps are required —ass
 
 4. Set the environment variable `HADOOP_HOME` to the full path to the newly created `hadoop` directory.
 
-5. Run unit tests for SparkR by running the command below. You need to install the needed packages following the instructions under [Running R Tests](http://spark.apache.org/docs/latest/building-spark.html#running-r-tests) first:
+5. Run unit tests for SparkR by running the command below. You need to install the needed packages following the instructions under [Running R Tests](https://spark.apache.org/docs/latest/building-spark.html#running-r-tests) first:
 
     ```
     .\bin\spark-submit2.cmd --conf spark.hadoop.fs.defaultFS="file:///" R\pkg\tests\run-all.R
 
@@ -1179,16 +1179,16 @@ setMethod("collect",
           function(x, stringsAsFactors = FALSE) {
             connectionTimeout <- as.numeric(Sys.getenv("SPARKR_BACKEND_CONNECTION_TIMEOUT", "6000"))
             useArrow <- FALSE
-            arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
+            arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.sparkr.enabled")[[1]] == "true"
             if (arrowEnabled) {
               useArrow <- tryCatch({
                 checkSchemaInArrow(schema(x))
                 TRUE
               }, error = function(e) {
                 warning(paste0("The conversion from Spark DataFrame to R DataFrame was attempted ",
                                "with Arrow optimization because ",
-                               "'spark.sql.execution.arrow.enabled' is set to true; however, ",
-                               "failed, attempting non-optimization. Reason: ",
+                               "'spark.sql.execution.arrow.sparkr.enabled' is set to true; ",
+                               "however, failed, attempting non-optimization. Reason: ",
                                e))
                 FALSE
               })
@@ -1476,7 +1476,7 @@ dapplyInternal <- function(x, func, schema) {
     schema <- structType(schema)
   }
 
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.sparkr.enabled")[[1]] == "true"
   if (arrowEnabled) {
     if (inherits(schema, "structType")) {
       checkSchemaInArrow(schema)
 
@@ -259,7 +259,7 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) {
 createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0,
                             numPartitions = NULL) {
   sparkSession <- getSparkSession()
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.sparkr.enabled")[[1]] == "true"
   useArrow <- FALSE
   firstRow <- NULL
 
@@ -302,7 +302,7 @@ createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0,
       },
       error = function(e) {
         warning(paste0("createDataFrame attempted Arrow optimization because ",
-                       "'spark.sql.execution.arrow.enabled' is set to true; however, ",
+                       "'spark.sql.execution.arrow.sparkr.enabled' is set to true; however, ",
                        "failed, attempting non-optimization. Reason: ",
                        e))
         FALSE
 
@@ -229,7 +229,7 @@ gapplyInternal <- function(x, func, schema) {
   if (is.character(schema)) {
     schema <- structType(schema)
   }
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.sparkr.enabled")[[1]] == "true"
   if (arrowEnabled) {
     if (inherits(schema, "structType")) {
       checkSchemaInArrow(schema)
 
@@ -50,7 +50,7 @@ setClass("NaiveBayesModel", representation(jobj = "jobj"))
 #'
 #' @param data SparkDataFrame for training.
 #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
-#'                operators are supported, including '~', '.', ':', '+', and '-'.
+#'                operators are supported, including '~', '.', ':', '+', '-', '*', and '^'.
 #' @param regParam The regularization parameter. Only supports L2 regularization currently.
 #' @param maxIter Maximum iteration number.
 #' @param tol Convergence tolerance of iterations.
Original file line number	Diff line number	Diff line change
`@@ -229,7 +229,7 @@ gapplyInternal <- function(x, func, schema) {`
`229`	`229`	`if (is.character(schema)) {`
`230`	`230`	`schema <- structType(schema)`
`231`	`231`	`}`
`232`		`- arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"`
	`232`	`+ arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.sparkr.enabled")[[1]] == "true"`
`233`	`233`	`if (arrowEnabled) {`
`234`	`234`	`if (inherits(schema, "structType")) {`
`235`	`235`	`checkSchemaInArrow(schema)`