You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2014/12/22 20:20:04 UTC
spark git commit: [SPARK-4918][Core] Reuse Text in saveAsTextFile
Repository: spark
Updated Branches:
refs/heads/master 6ee6aa70b -> 93b2f3a88
[SPARK-4918][Core] Reuse Text in saveAsTextFile
Reuse Text in saveAsTextFile to reduce GC.
/cc rxin
Author: zsxwing <zs...@gmail.com>
Closes #3762 from zsxwing/SPARK-4918 and squashes the following commits:
59f03eb [zsxwing] Reuse Text in saveAsTextFile
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/93b2f3a8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/93b2f3a8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/93b2f3a8
Branch: refs/heads/master
Commit: 93b2f3a8826e189f6398c9b30bc00de205a3c64a
Parents: 6ee6aa7
Author: zsxwing <zs...@gmail.com>
Authored: Mon Dec 22 11:20:00 2014 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Mon Dec 22 11:20:00 2014 -0800
----------------------------------------------------------------------
core/src/main/scala/org/apache/spark/rdd/RDD.scala | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/93b2f3a8/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index a942069..f47c2d1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1186,7 +1186,13 @@ abstract class RDD[T: ClassTag](
// same bytecodes for `saveAsTextFile`.
val nullWritableClassTag = implicitly[ClassTag[NullWritable]]
val textClassTag = implicitly[ClassTag[Text]]
- val r = this.map(x => (NullWritable.get(), new Text(x.toString)))
+ val r = this.mapPartitions { iter =>
+ val text = new Text()
+ iter.map { x =>
+ text.set(x.toString)
+ (NullWritable.get(), text)
+ }
+ }
RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null)
.saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path)
}
@@ -1198,7 +1204,13 @@ abstract class RDD[T: ClassTag](
// https://issues.apache.org/jira/browse/SPARK-2075
val nullWritableClassTag = implicitly[ClassTag[NullWritable]]
val textClassTag = implicitly[ClassTag[Text]]
- val r = this.map(x => (NullWritable.get(), new Text(x.toString)))
+ val r = this.mapPartitions { iter =>
+ val text = new Text()
+ iter.map { x =>
+ text.set(x.toString)
+ (NullWritable.get(), text)
+ }
+ }
RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null)
.saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path, codec)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org