apache
diff --git a/‎core/src/main/scala/org/apache/spark/BarrierTaskContext.scala‎
Lines changed: 57 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/BarrierTaskContext.scala‎
Lines changed: 57 additions & 3 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/BarrierTaskContextImpl.scala‎
Lines changed: 0 additions & 49 deletions b/‎core/src/main/scala/org/apache/spark/BarrierTaskContextImpl.scala‎
Lines changed: 0 additions & 49 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 1 addition & 0 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/RDDBarrier.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/rdd/RDDBarrier.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/Task.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/scheduler/Task.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/deps/spark-deps-hadoop-2.6‎
Lines changed: 1 addition & 1 deletion b/‎dev/deps/spark-deps-hadoop-2.6‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/deps/spark-deps-hadoop-2.7‎
Lines changed: 1 addition & 1 deletion b/‎dev/deps/spark-deps-hadoop-2.7‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/deps/spark-deps-hadoop-3.1‎
Lines changed: 1 addition & 1 deletion b/‎dev/deps/spark-deps-hadoop-3.1‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/sql-programming-guide.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/sql-programming-guide.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala‎
Lines changed: 13 additions & 2 deletions b/‎external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala‎
Lines changed: 13 additions & 2 deletions
@@ -17,26 +17,80 @@
 
 package org.apache.spark
 
+import java.util.Properties
+
 import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.memory.TaskMemoryManager
+import org.apache.spark.metrics.MetricsSystem
 
 /** A [[TaskContext]] with extra info and tooling for a barrier stage. */
-trait BarrierTaskContext extends TaskContext {
+class BarrierTaskContext(
+    override val stageId: Int,
+    override val stageAttemptNumber: Int,
+    override val partitionId: Int,
+    override val taskAttemptId: Long,
+    override val attemptNumber: Int,
+    override val taskMemoryManager: TaskMemoryManager,
+    localProperties: Properties,
+    @transient private val metricsSystem: MetricsSystem,
+    // The default value is only used in tests.
+    override val taskMetrics: TaskMetrics = TaskMetrics.empty)
+  extends TaskContextImpl(stageId, stageAttemptNumber, partitionId, taskAttemptId, attemptNumber,
+      taskMemoryManager, localProperties, metricsSystem, taskMetrics) {
 
   /**
    * :: Experimental ::
    * Sets a global barrier and waits until all tasks in this stage hit this barrier. Similar to
    * MPI_Barrier function in MPI, the barrier() function call blocks until all tasks in the same
    * stage have reached this routine.
+   *
+   * CAUTION! In a barrier stage, each task must have the same number of barrier() calls, in all
+   * possible code branches. Otherwise, you may get the job hanging or a SparkException after
+   * timeout. Some examples of misuses listed below:
+   * 1. Only call barrier() function on a subset of all the tasks in the same barrier stage, it
+   * shall lead to timeout of the function call.
+   * {{{
+   *   rdd.barrier().mapPartitions { (iter, context) =>
+   *       if (context.partitionId() == 0) {
+   *           // Do nothing.
+   *       } else {
+   *           context.barrier()
+   *       }
+   *       iter
+   *   }
+   * }}}
+   *
+   * 2. Include barrier() function in a try-catch code block, this may lead to timeout of the
+   * second function call.
+   * {{{
+   *   rdd.barrier().mapPartitions { (iter, context) =>
+   *       try {
+   *           // Do something that might throw an Exception.
+   *           doSomething()
+   *           context.barrier()
+   *       } catch {
+   *           case e: Exception => logWarning("...", e)
+   *       }
+   *       context.barrier()
+   *       iter
+   *   }
+   * }}}
    */
   @Experimental
   @Since("2.4.0")
-  def barrier(): Unit
+  def barrier(): Unit = {
+    // TODO SPARK-24817 implement global barrier.
+  }
 
   /**
    * :: Experimental ::
    * Returns the all task infos in this barrier stage, the task infos are ordered by partitionId.
    */
   @Experimental
   @Since("2.4.0")
-  def getTaskInfos(): Array[BarrierTaskInfo]
+  def getTaskInfos(): Array[BarrierTaskInfo] = {
+    val addressesStr = localProperties.getProperty("addresses", "")
+    addressesStr.split(",").map(_.trim()).map(new BarrierTaskInfo(_))
+  }
 }
@@ -181,6 +181,7 @@ private[spark] class SparkSubmit extends Logging {
     if (args.isStandaloneCluster && args.useRest) {
       try {
         logInfo("Running Spark using the REST application submission protocol.")
+        doRunMain()
       } catch {
         // Fail over to use the legacy submission gateway
         case e: SubmitRestConnectionException =>
 
@@ -28,7 +28,7 @@ class RDDBarrier[T: ClassTag](rdd: RDD[T]) {
 
   /**
    * :: Experimental ::
-   * Maps partitions together with a provided BarrierTaskContext.
+   * Maps partitions together with a provided [[org.apache.spark.BarrierTaskContext]].
    *
    * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
    * should be `false` unless `rdd` is a pair RDD and the input function doesn't modify the keys.
 
@@ -83,7 +83,7 @@ private[spark] abstract class Task[T](
     // TODO SPARK-24874 Allow create BarrierTaskContext based on partitions, instead of whether
     // the stage is barrier.
     context = if (isBarrier) {
-      new BarrierTaskContextImpl(
+      new BarrierTaskContext(
         stageId,
         stageAttemptId, // stageAttemptId and stageAttemptNumber are semantically equal
         partitionId,
 
@@ -191,7 +191,7 @@ stax-api-1.0.1.jar
 stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
-univocity-parsers-2.6.3.jar
+univocity-parsers-2.7.3.jar
 validation-api-1.1.0.Final.jar
 xbean-asm6-shaded-4.8.jar
 xercesImpl-2.9.1.jar
 
@@ -192,7 +192,7 @@ stax-api-1.0.1.jar
 stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
-univocity-parsers-2.6.3.jar
+univocity-parsers-2.7.3.jar
 validation-api-1.1.0.Final.jar
 xbean-asm6-shaded-4.8.jar
 xercesImpl-2.9.1.jar
 
@@ -212,7 +212,7 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 token-provider-1.0.1.jar
-univocity-parsers-2.6.3.jar
+univocity-parsers-2.7.3.jar
 validation-api-1.1.0.Final.jar
 woodstox-core-5.0.3.jar
 xbean-asm6-shaded-4.8.jar
 
@@ -1876,6 +1876,7 @@ working with timestamps in `pandas_udf`s to get the best performance, see
 
 ## Upgrading From Spark SQL 2.3 to 2.4
 
+  - Since Spark 2.4, Spark will evaluate the set operations referenced in a query by following a precedence rule as per the SQL standard. If the order is not specified by parentheses, set operations are performed from left to right with the exception that all INTERSECT operations are performed before any UNION, EXCEPT or MINUS operations. The old behaviour of giving equal precedence to all the set operations are preserved under a newly added configuaration `spark.sql.legacy.setopsPrecedence.enabled` with a default value of `false`. When this property is set to `true`, spark will evaluate the set operators from left to right as they appear in the query given no explicit ordering is enforced by usage of parenthesis.
   - Since Spark 2.4, Spark will display table description column Last Access value as UNKNOWN when the value was Jan 01 1970.
   - Since Spark 2.4, Spark maximizes the usage of a vectorized ORC reader for ORC files by default. To do that, `spark.sql.orc.impl` and `spark.sql.orc.filterPushdown` change their default values to `native` and `true` respectively.
   - In PySpark, when Arrow optimization is enabled, previously `toPandas` just failed when Arrow optimization is unable to be used whereas `createDataFrame` from Pandas DataFrame allowed the fallback to non-optimization. Now, both `toPandas` and `createDataFrame` from Pandas DataFrame allow the fallback by default, which can be switched off by `spark.sql.execution.arrow.fallback.enabled`.
 
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.avro.{Schema, SchemaBuilder}
+import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
 import org.apache.avro.Schema.Type._
 import org.apache.avro.generic._
 import org.apache.avro.util.Utf8
@@ -86,8 +87,18 @@ class AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) {
       case (LONG, LongType) => (updater, ordinal, value) =>
         updater.setLong(ordinal, value.asInstanceOf[Long])
 
-      case (LONG, TimestampType) => (updater, ordinal, value) =>
-        updater.setLong(ordinal, value.asInstanceOf[Long] * 1000)
+      case (LONG, TimestampType) => avroType.getLogicalType match {
+        case _: TimestampMillis => (updater, ordinal, value) =>
+          updater.setLong(ordinal, value.asInstanceOf[Long] * 1000)
+        case _: TimestampMicros => (updater, ordinal, value) =>
+          updater.setLong(ordinal, value.asInstanceOf[Long])
+        case null => (updater, ordinal, value) =>
+          // For backward compatibility, if the Avro type is Long and it is not logical type,
+          // the value is processed as timestamp type with millisecond precision.
+          updater.setLong(ordinal, value.asInstanceOf[Long] * 1000)
+        case other => throw new IncompatibleSchemaException(
+          s"Cannot convert Avro logical type ${other} to Catalyst Timestamp type.")
+      }
 
       case (LONG, DateType) => (updater, ordinal, value) =>
         updater.setInt(ordinal, (value.asInstanceOf[Long] / DateTimeUtils.MILLIS_PER_DAY).toInt)
Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ class RDDBarrier[T: ClassTag](rdd: RDD[T]) {`
`28`	`28`
`29`	`29`	`/**`
`30`	`30`	`* :: Experimental ::`
`31`		`- * Maps partitions together with a provided BarrierTaskContext.`
	`31`	`+ * Maps partitions together with a provided [[org.apache.spark.BarrierTaskContext]].`
`32`	`32`	`*`
`33`	`33`	* `preservesPartitioning` indicates whether the input function preserves the partitioner, which
`34`	`34`	* should be `false` unless `rdd` is a pair RDD and the input function doesn't modify the keys.