You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sa...@apache.org on 2015/12/28 21:19:21 UTC
spark git commit: [SPARK-12517] add default RDD name for one created
via sc.textFile
Repository: spark
Updated Branches:
refs/heads/master fd50df413 -> 73b70f076
[SPARK-12517] add default RDD name for one created via sc.textFile
The feature was first added at commit: 7b877b27053bfb7092e250e01a3b887e1b50a109 but was later removed (probably by mistake) at commit: fc8b58195afa67fbb75b4c8303e022f703cbf007.
This change sets the default path of RDDs created via sc.textFile(...) to the path argument.
Here is the symptom:
* Using spark-1.5.2-bin-hadoop2.6:
scala> sc.textFile("/home/root/.bashrc").name
res5: String = null
scala> sc.binaryFiles("/home/root/.bashrc").name
res6: String = /home/root/.bashrc
* while using Spark 1.3.1:
scala> sc.textFile("/home/root/.bashrc").name
res0: String = /home/root/.bashrc
scala> sc.binaryFiles("/home/root/.bashrc").name
res1: String = /home/root/.bashrc
Author: Yaron Weinsberg <wy...@gmail.com>
Author: yaron <ya...@il.ibm.com>
Closes #10456 from wyaron/master.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/73b70f07
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/73b70f07
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/73b70f07
Branch: refs/heads/master
Commit: 73b70f076d4e22396b7e145f2ce5974fbf788048
Parents: fd50df4
Author: Yaron Weinsberg <wy...@gmail.com>
Authored: Tue Dec 29 05:19:11 2015 +0900
Committer: Kousuke Saruta <sa...@oss.nttdata.co.jp>
Committed: Tue Dec 29 05:19:11 2015 +0900
----------------------------------------------------------------------
.../scala/org/apache/spark/SparkContext.scala | 4 ++--
.../org/apache/spark/SparkContextSuite.scala | 25 ++++++++++++++++++++
2 files changed, 27 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/73b70f07/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d506782..bbdc915 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -836,7 +836,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
minPartitions: Int = defaultMinPartitions): RDD[String] = withScope {
assertNotStopped()
hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
- minPartitions).map(pair => pair._2.toString)
+ minPartitions).map(pair => pair._2.toString).setName(path)
}
/**
@@ -885,7 +885,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
classOf[Text],
classOf[Text],
updateConf,
- minPartitions).setName(path).map(record => (record._1.toString, record._2.toString))
+ minPartitions).map(record => (record._1.toString, record._2.toString)).setName(path)
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/73b70f07/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index d4f2ea8..172ef05 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -274,6 +274,31 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext {
}
}
+ test("Default path for file based RDDs is properly set (SPARK-12517)") {
+ sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+
+ // Test filetextFile, wholeTextFiles, binaryFiles, hadoopFile and
+ // newAPIHadoopFile for setting the default path as the RDD name
+ val mockPath = "default/path/for/"
+
+ var targetPath = mockPath + "textFile"
+ assert(sc.textFile(targetPath).name === targetPath)
+
+ targetPath = mockPath + "wholeTextFiles"
+ assert(sc.wholeTextFiles(targetPath).name === targetPath)
+
+ targetPath = mockPath + "binaryFiles"
+ assert(sc.binaryFiles(targetPath).name === targetPath)
+
+ targetPath = mockPath + "hadoopFile"
+ assert(sc.hadoopFile(targetPath).name === targetPath)
+
+ targetPath = mockPath + "newAPIHadoopFile"
+ assert(sc.newAPIHadoopFile(targetPath).name === targetPath)
+
+ sc.stop()
+ }
+
test("calling multiple sc.stop() must not throw any exception") {
noException should be thrownBy {
sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org