apache
diff --git a/‎README.md‎
Lines changed: 9 additions & 0 deletions b/‎README.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/io/CompressionCodec.scala‎
Lines changed: 9 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/io/CompressionCodec.scala‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/MemoryStore.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/storage/MemoryStore.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala‎
Lines changed: 1 addition & 0 deletions b/‎core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala‎
Lines changed: 2 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/Utils.scala‎
Lines changed: 22 additions & 7 deletions b/‎core/src/main/scala/org/apache/spark/util/Utils.scala‎
Lines changed: 22 additions & 7 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala‎
Lines changed: 3 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala‎
Lines changed: 18 additions & 0 deletions b/‎core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎docs/building-with-maven.md‎
Lines changed: 9 additions & 0 deletions b/‎docs/building-with-maven.md‎
Lines changed: 9 additions & 0 deletions
@@ -115,6 +115,15 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
     </dependency>
 
 
+## A Note About Thrift JDBC server and CLI for Spark SQL
+
+Spark SQL supports Thrift JDBC server and CLI.
+See sql-programming-guide.md for more information about those features.
+You can use those features by setting `-Phive-thriftserver` when building Spark as follows.
+
+    $ sbt/sbt -Phive-thriftserver assembly
+
+
 ## Configuration
 
 Please refer to the [Configuration guide](http://spark.apache.org/docs/latest/configuration.html)
 
@@ -46,17 +46,24 @@ trait CompressionCodec {
 
 
 private[spark] object CompressionCodec {
+
+  private val shortCompressionCodecNames = Map(
+    "lz4" -> classOf[LZ4CompressionCodec].getName,
+    "lzf" -> classOf[LZFCompressionCodec].getName,
+    "snappy" -> classOf[SnappyCompressionCodec].getName)
+
   def createCodec(conf: SparkConf): CompressionCodec = {
     createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC))
   }
 
   def createCodec(conf: SparkConf, codecName: String): CompressionCodec = {
-    val ctor = Class.forName(codecName, true, Utils.getContextOrSparkClassLoader)
+    val codecClass = shortCompressionCodecNames.getOrElse(codecName.toLowerCase, codecName)
+    val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader)
       .getConstructor(classOf[SparkConf])
     ctor.newInstance(conf).asInstanceOf[CompressionCodec]
   }
 
-  val DEFAULT_COMPRESSION_CODEC = classOf[SnappyCompressionCodec].getName
+  val DEFAULT_COMPRESSION_CODEC = "snappy"
 }
 
 
 
@@ -238,7 +238,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
           // If our vector's size has exceeded the threshold, request more memory
           val currentSize = vector.estimateSize()
           if (currentSize >= memoryThreshold) {
-            val amountToRequest = (currentSize * (memoryGrowthFactor - 1)).toLong
+            val amountToRequest = (currentSize * memoryGrowthFactor - memoryThreshold).toLong
             // Hold the accounting lock, in case another thread concurrently puts a block that
             // takes up the unrolling space we just ensured here
             accountingLock.synchronized {
@@ -254,7 +254,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
               }
             }
             // New threshold is currentSize * memoryGrowthFactor
-            memoryThreshold = currentSize + amountToRequest
+            memoryThreshold += amountToRequest
           }
         }
         elementsUnrolled += 1
 
@@ -153,6 +153,7 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
       val (errorMessage, metrics): (Option[String], Option[TaskMetrics]) =
         taskEnd.reason match {
           case org.apache.spark.Success =>
+            stageData.completedIndices.add(info.index)
             stageData.numCompleteTasks += 1
             (None, Option(taskEnd.taskMetrics))
           case e: ExceptionFailure =>  // Handle ExceptionFailure because we might have metrics
 
@@ -168,7 +168,7 @@ private[ui] class StageTableBase(
     <td valign="middle">{submissionTime}</td>
     <td sorttable_customkey={duration.getOrElse(-1).toString}>{formattedDuration}</td>
     <td class="progress-cell">
-      {makeProgressBar(stageData.numActiveTasks, stageData.numCompleteTasks,
+      {makeProgressBar(stageData.numActiveTasks, stageData.completedIndices.size,
         stageData.numFailedTasks, s.numTasks)}
     </td>
     <td sorttable_customekey={inputRead.toString}>{inputReadWithUnit}</td>
 
@@ -19,6 +19,7 @@ package org.apache.spark.ui.jobs
 
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
+import org.apache.spark.util.collection.OpenHashSet
 
 import scala.collection.mutable.HashMap
 
@@ -38,6 +39,7 @@ private[jobs] object UIData {
   class StageUIData {
     var numActiveTasks: Int = _
     var numCompleteTasks: Int = _
+    var completedIndices = new OpenHashSet[Int]()
     var numFailedTasks: Int = _
 
     var executorRunTime: Long = _
 
@@ -284,17 +284,32 @@ private[spark] object Utils extends Logging {
   /** Copy all data from an InputStream to an OutputStream */
   def copyStream(in: InputStream,
                  out: OutputStream,
-                 closeStreams: Boolean = false)
+                 closeStreams: Boolean = false): Long =
   {
+    var count = 0L
     try {
-      val buf = new Array[Byte](8192)
-      var n = 0
-      while (n != -1) {
-        n = in.read(buf)
-        if (n != -1) {
-          out.write(buf, 0, n)
+      if (in.isInstanceOf[FileInputStream] && out.isInstanceOf[FileOutputStream]) {
+        // When both streams are File stream, use transferTo to improve copy performance.
+        val inChannel = in.asInstanceOf[FileInputStream].getChannel()
+        val outChannel = out.asInstanceOf[FileOutputStream].getChannel()
+        val size = inChannel.size()
+
+        // In case transferTo method transferred less data than we have required.
+        while (count < size) {
+          count += inChannel.transferTo(count, size - count, outChannel)
+        }
+      } else {
+        val buf = new Array[Byte](8192)
+        var n = 0
+        while (n != -1) {
+          n = in.read(buf)
+          if (n != -1) {
+            out.write(buf, 0, n)
+            count += n
+          }
         }
       }
+      count
     } finally {
       if (closeStreams) {
         try {
 
@@ -745,12 +745,11 @@ private[spark] class ExternalSorter[K, V, C](
       try {
         out = new FileOutputStream(outputFile)
         for (i <- 0 until numPartitions) {
-          val file = partitionWriters(i).fileSegment().file
-          in = new FileInputStream(file)
-          org.apache.spark.util.Utils.copyStream(in, out)
+          in = new FileInputStream(partitionWriters(i).fileSegment().file)
+          val size = org.apache.spark.util.Utils.copyStream(in, out, false)
           in.close()
           in = null
-          lengths(i) = file.length()
+          lengths(i) = size
           offsets(i + 1) = offsets(i) + lengths(i)
         }
       } finally {
 
@@ -56,15 +56,33 @@ class CompressionCodecSuite extends FunSuite {
     testCodec(codec)
   }
 
+  test("lz4 compression codec short form") {
+    val codec = CompressionCodec.createCodec(conf, "lz4")
+    assert(codec.getClass === classOf[LZ4CompressionCodec])
+    testCodec(codec)
+  }
+
   test("lzf compression codec") {
     val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
     assert(codec.getClass === classOf[LZFCompressionCodec])
     testCodec(codec)
   }
 
+  test("lzf compression codec short form") {
+    val codec = CompressionCodec.createCodec(conf, "lzf")
+    assert(codec.getClass === classOf[LZFCompressionCodec])
+    testCodec(codec)
+  }
+
   test("snappy compression codec") {
     val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
     assert(codec.getClass === classOf[SnappyCompressionCodec])
     testCodec(codec)
   }
+
+  test("snappy compression codec short form") {
+    val codec = CompressionCodec.createCodec(conf, "snappy")
+    assert(codec.getClass === classOf[SnappyCompressionCodec])
+    testCodec(codec)
+  }
 }
@@ -96,6 +96,15 @@ mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package
 mvn -Pyarn-alpha -Phadoop-2.3 -Dhadoop.version=2.3.0 -Dyarn.version=0.23.7 -DskipTests clean package
 {% endhighlight %}
 
+# Building Thrift JDBC server and CLI for Spark SQL
+
+Spark SQL supports Thrift JDBC server and CLI.
+See sql-programming-guide.md for more information about those features.
+You can use those features by setting `-Phive-thriftserver` when building Spark as follows.
+{% highlight bash %}
+mvn -Phive-thriftserver assembly
+{% endhighlight %}
+
 # Spark Tests in Maven
 
 Tests are run by default via the [ScalaTest Maven plugin](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin).
Original file line number	Diff line number	Diff line change
`@@ -238,7 +238,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)`
`238`	`238`	`// If our vector's size has exceeded the threshold, request more memory`
`239`	`239`	`val currentSize = vector.estimateSize()`
`240`	`240`	`if (currentSize >= memoryThreshold) {`
`241`		`- val amountToRequest = (currentSize * (memoryGrowthFactor - 1)).toLong`
	`241`	`+ val amountToRequest = (currentSize * memoryGrowthFactor - memoryThreshold).toLong`
`242`	`242`	`// Hold the accounting lock, in case another thread concurrently puts a block that`
`243`	`243`	`// takes up the unrolling space we just ensured here`
`244`	`244`	`accountingLock.synchronized {`
`@@ -254,7 +254,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)`
`254`	`254`	`}`
`255`	`255`	`}`
`256`	`256`	`// New threshold is currentSize * memoryGrowthFactor`
`257`		`- memoryThreshold = currentSize + amountToRequest`
	`257`	`+ memoryThreshold += amountToRequest`
`258`	`258`	`}`
`259`	`259`	`}`
`260`	`260`	`elementsUnrolled += 1`