From 86efdade980f63cce9f87696716c5d0c74eef245 Mon Sep 17 00:00:00 2001 From: yaron Date: Wed, 23 Dec 2015 21:32:57 +0200 Subject: [PATCH 1/8] add default RDD name for one create via sc.textFile The feature was first added at commit: 7b877b27053bfb7092e250e01a3b887e1b50a109 but was later removed (probably by mistake) at at commit: fc8b58195afa67fbb75b4c8303e022f703cbf007. here is the symptom: using spark-1.5.2-bin-hadoop2.6 I get: ================================= scala> sc.textFile("/home/root/.bashrc").name res5: String = null scala> sc.binaryFiles("/home/root/.bashrc").name res6: String = /home/root/.bashrc while using Spark 1.3.1: ================================= scala> sc.textFile("/home/root/.bashrc").name res0: String = /home/root/.bashrc scala> sc.binaryFiles("/home/root/.bashrc").name res1: String = /home/root/.bashrc --- core/src/main/scala/org/apache/spark/SparkContext.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 67230f4207b8..6af543639d13 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -836,7 +836,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli minPartitions: Int = defaultMinPartitions): RDD[String] = withScope { assertNotStopped() hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], - minPartitions).map(pair => pair._2.toString) + minPartitions).map(pair => pair._2.toString).setName(path) } /** From 081a723c3e5604f161d6440a105b44cc2350480d Mon Sep 17 00:00:00 2001 From: Yaron Weinsberg Date: Thu, 24 Dec 2015 05:46:39 -0800 Subject: [PATCH 2/8] add unit test for setting default RDD name to its path (SPARK-12517) This change extends SparkContextSuite to verify that RDDs that are created using file paths have their default name set to the path. --- .../scala/org/apache/spark/SparkContextSuite.scala | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index d4f2ea87650a..e0d62cf468b5 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -274,6 +274,19 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext { } } + test("Default path for file based RDDs is properly set (SPARK-12517)") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) + + // Test textFile, wholeTextFiles and binaryFiles for default paths + val mockPath = "default/path/for/wholeTextFile/" + assert(sc.textFile(mockPath + "textFile").name == mockPath + "textFile") + assert(sc.wholeTextFiles(mockPath + "wholeTextFile").name == mockPath + "wholeTextFile") + assert(sc.binaryFiles(mockPath + "binaryFiles").name == mockPath + "binaryFiles") + assert(sc.hadoopFile(mockPath + "hadoopFile").name == mockPath + "hadoopFile") + assert(sc.newAPIHadoopFile(mockPath + "newAPIHadoopFile").name == mockPath + "newAPIHadoopFile") + sc.stop() + } + test("calling multiple sc.stop() must not throw any exception") { noException should be thrownBy { sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) From 1bc5ff8f0628c609c6cd9ba8ff409b231bbce737 Mon Sep 17 00:00:00 2001 From: Yaron Weinsberg Date: Thu, 24 Dec 2015 05:46:39 -0800 Subject: [PATCH 3/8] add unit test for setting default RDD name to its path (SPARK-12517) This change extends SparkContextSuite to verify that RDDs that are created using file paths have their default name set to the path. --- .../scala/org/apache/spark/SparkContextSuite.scala | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index d4f2ea87650a..e0d62cf468b5 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -274,6 +274,19 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext { } } + test("Default path for file based RDDs is properly set (SPARK-12517)") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) + + // Test textFile, wholeTextFiles and binaryFiles for default paths + val mockPath = "default/path/for/wholeTextFile/" + assert(sc.textFile(mockPath + "textFile").name == mockPath + "textFile") + assert(sc.wholeTextFiles(mockPath + "wholeTextFile").name == mockPath + "wholeTextFile") + assert(sc.binaryFiles(mockPath + "binaryFiles").name == mockPath + "binaryFiles") + assert(sc.hadoopFile(mockPath + "hadoopFile").name == mockPath + "hadoopFile") + assert(sc.newAPIHadoopFile(mockPath + "newAPIHadoopFile").name == mockPath + "newAPIHadoopFile") + sc.stop() + } + test("calling multiple sc.stop() must not throw any exception") { noException should be thrownBy { sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) From fe3d87d5051b1ba57c9d7dd2e7718cfb0a1961ec Mon Sep 17 00:00:00 2001 From: Yaron Weinsberg Date: Thu, 24 Dec 2015 06:13:53 -0800 Subject: [PATCH 4/8] add unit test for setting default RDD name to its path (SPARK-12517) --- core/src/test/scala/org/apache/spark/SparkContextSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index e0d62cf468b5..39d871f757cf 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -278,7 +278,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext { sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) // Test textFile, wholeTextFiles and binaryFiles for default paths - val mockPath = "default/path/for/wholeTextFile/" + val mockPath = "default/path/for/" assert(sc.textFile(mockPath + "textFile").name == mockPath + "textFile") assert(sc.wholeTextFiles(mockPath + "wholeTextFile").name == mockPath + "wholeTextFile") assert(sc.binaryFiles(mockPath + "binaryFiles").name == mockPath + "binaryFiles") From a2f5f0733b019e036ca1ea2ac30dac53620310fe Mon Sep 17 00:00:00 2001 From: yaron Date: Thu, 24 Dec 2015 16:45:32 +0200 Subject: [PATCH 5/8] restructure code to overcome 100 chars per line limit --- .../org/apache/spark/SparkContextSuite.scala | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index 39d871f757cf..693bfb58c547 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -277,13 +277,25 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext { test("Default path for file based RDDs is properly set (SPARK-12517)") { sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) - // Test textFile, wholeTextFiles and binaryFiles for default paths + // Test filetextFile, wholeTextFiles, binaryFiles, hadoopFile and + // newAPIHadoopFile for setting the default path as the RDD name val mockPath = "default/path/for/" - assert(sc.textFile(mockPath + "textFile").name == mockPath + "textFile") - assert(sc.wholeTextFiles(mockPath + "wholeTextFile").name == mockPath + "wholeTextFile") - assert(sc.binaryFiles(mockPath + "binaryFiles").name == mockPath + "binaryFiles") - assert(sc.hadoopFile(mockPath + "hadoopFile").name == mockPath + "hadoopFile") - assert(sc.newAPIHadoopFile(mockPath + "newAPIHadoopFile").name == mockPath + "newAPIHadoopFile") + + var targetPath = mockPath + "textFile" + assert(sc.textFile(targetPath).name == targetPath") + + targetPath = mockPath + "wholeTextFile" + assert(sc.wholeTextFile(targetPath).name == targetPath") + + targetPath = mockPath + "binaryFiles" + assert(sc.binaryFiles(targetPath).name == targetPath") + + targetPath = mockPath + "hadoopFile" + assert(sc.hadoopFile(targetPath).name == targetPath") + + targetPath = mockPath + "newAPIHadoopFile" + assert(sc.newAPIHadoopFile(targetPath).name == targetPath") + sc.stop() } From 50cd1190d21c3721dc94d12288b677b7ef8276cd Mon Sep 17 00:00:00 2001 From: Yaron Weinsberg Date: Thu, 24 Dec 2015 07:19:55 -0800 Subject: [PATCH 6/8] fix compile error in test unit --- .../scala/org/apache/spark/SparkContextSuite.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index 693bfb58c547..6376d9d88c39 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -282,19 +282,19 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext { val mockPath = "default/path/for/" var targetPath = mockPath + "textFile" - assert(sc.textFile(targetPath).name == targetPath") + assert(sc.textFile(targetPath).name == targetPath) targetPath = mockPath + "wholeTextFile" - assert(sc.wholeTextFile(targetPath).name == targetPath") + assert(sc.wholeTextFile(targetPath).name == targetPath) targetPath = mockPath + "binaryFiles" - assert(sc.binaryFiles(targetPath).name == targetPath") + assert(sc.binaryFiles(targetPath).name == targetPath) targetPath = mockPath + "hadoopFile" - assert(sc.hadoopFile(targetPath).name == targetPath") + assert(sc.hadoopFile(targetPath).name == targetPath) targetPath = mockPath + "newAPIHadoopFile" - assert(sc.newAPIHadoopFile(targetPath).name == targetPath") + assert(sc.newAPIHadoopFile(targetPath).name == targetPath) sc.stop() } From 825f3f3d38128b520544de690fc8c04605ffa51c Mon Sep 17 00:00:00 2001 From: Yaron Weinsberg Date: Thu, 24 Dec 2015 08:21:15 -0800 Subject: [PATCH 7/8] yet another compile and style fix for unit test --- .../test/scala/org/apache/spark/SparkContextSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index 6376d9d88c39..c776c2c6fe21 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -284,15 +284,15 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext { var targetPath = mockPath + "textFile" assert(sc.textFile(targetPath).name == targetPath) - targetPath = mockPath + "wholeTextFile" - assert(sc.wholeTextFile(targetPath).name == targetPath) + targetPath = mockPath + "wholeTextFiles" + assert(sc.wholeTextFiles(targetPath).name == targetPath) targetPath = mockPath + "binaryFiles" assert(sc.binaryFiles(targetPath).name == targetPath) - + targetPath = mockPath + "hadoopFile" assert(sc.hadoopFile(targetPath).name == targetPath) - + targetPath = mockPath + "newAPIHadoopFile" assert(sc.newAPIHadoopFile(targetPath).name == targetPath) From 08d3fe25e4621812d963af502815a508da8930c8 Mon Sep 17 00:00:00 2001 From: Yaron Weinsberg Date: Thu, 24 Dec 2015 11:24:11 -0800 Subject: [PATCH 8/8] also fix default path for sc.wholeTextFiles and identation based on CR --- .../scala/org/apache/spark/SparkContext.scala | 2 +- .../org/apache/spark/SparkContextSuite.scala | 32 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 5eb892c6317f..bbdc9158d8e2 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -885,7 +885,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli classOf[Text], classOf[Text], updateConf, - minPartitions).setName(path).map(record => (record._1.toString, record._2.toString)) + minPartitions).map(record => (record._1.toString, record._2.toString)).setName(path) } /** diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index c776c2c6fe21..172ef050cc27 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -274,29 +274,29 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext { } } - test("Default path for file based RDDs is properly set (SPARK-12517)") { - sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) + test("Default path for file based RDDs is properly set (SPARK-12517)") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) - // Test filetextFile, wholeTextFiles, binaryFiles, hadoopFile and - // newAPIHadoopFile for setting the default path as the RDD name - val mockPath = "default/path/for/" + // Test filetextFile, wholeTextFiles, binaryFiles, hadoopFile and + // newAPIHadoopFile for setting the default path as the RDD name + val mockPath = "default/path/for/" - var targetPath = mockPath + "textFile" - assert(sc.textFile(targetPath).name == targetPath) + var targetPath = mockPath + "textFile" + assert(sc.textFile(targetPath).name === targetPath) - targetPath = mockPath + "wholeTextFiles" - assert(sc.wholeTextFiles(targetPath).name == targetPath) + targetPath = mockPath + "wholeTextFiles" + assert(sc.wholeTextFiles(targetPath).name === targetPath) - targetPath = mockPath + "binaryFiles" - assert(sc.binaryFiles(targetPath).name == targetPath) + targetPath = mockPath + "binaryFiles" + assert(sc.binaryFiles(targetPath).name === targetPath) - targetPath = mockPath + "hadoopFile" - assert(sc.hadoopFile(targetPath).name == targetPath) + targetPath = mockPath + "hadoopFile" + assert(sc.hadoopFile(targetPath).name === targetPath) - targetPath = mockPath + "newAPIHadoopFile" - assert(sc.newAPIHadoopFile(targetPath).name == targetPath) + targetPath = mockPath + "newAPIHadoopFile" + assert(sc.newAPIHadoopFile(targetPath).name === targetPath) - sc.stop() + sc.stop() } test("calling multiple sc.stop() must not throw any exception") {