You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2013/12/05 00:58:37 UTC
[1/2] git commit: Merge pull request #223 from rxin/transient
Updated Branches:
refs/heads/branch-0.8 31da065b1 -> cc33f9fc8
Merge pull request #223 from rxin/transient
Mark partitioner, name, and generator field in RDD as @transient.
As part of the effort to reduce serialized task size.
(cherry picked from commit d6e5473872f405a6f4e466705e33cf893af915c1)
Signed-off-by: Patrick Wendell <pw...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/ba44f21d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/ba44f21d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/ba44f21d
Branch: refs/heads/branch-0.8
Commit: ba44f21dd7848f1689948d032a34ae92ffadeb31
Parents: 31da065
Author: Matei Zaharia <ma...@eecs.berkeley.edu>
Authored: Wed Dec 4 10:28:50 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Wed Dec 4 15:56:58 2013 -0800
----------------------------------------------------------------------
core/src/main/scala/org/apache/spark/rdd/RDD.scala | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/ba44f21d/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 852c131..717247b 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -101,7 +101,7 @@ abstract class RDD[T: ClassManifest](
protected def getPreferredLocations(split: Partition): Seq[String] = Nil
/** Optionally overridden by subclasses to specify how they are partitioned. */
- val partitioner: Option[Partitioner] = None
+ @transient val partitioner: Option[Partitioner] = None
// =======================================================================
// Methods and fields available on all RDDs
@@ -114,7 +114,7 @@ abstract class RDD[T: ClassManifest](
val id: Int = sc.newRddId()
/** A friendly name for this RDD */
- var name: String = null
+ @transient var name: String = null
/** Assign a name to this RDD */
def setName(_name: String) = {
@@ -123,7 +123,7 @@ abstract class RDD[T: ClassManifest](
}
/** User-defined generator of this RDD*/
- var generator = Utils.getCallSiteInfo.firstUserClass
+ @transient var generator = Utils.getCallSiteInfo.firstUserClass
/** Reset generator*/
def setGenerator(_generator: String) = {
@@ -925,7 +925,7 @@ abstract class RDD[T: ClassManifest](
private var storageLevel: StorageLevel = StorageLevel.NONE
/** Record user function generating this RDD. */
- private[spark] val origin = Utils.formatSparkCallSite
+ @transient private[spark] val origin = Utils.formatSparkCallSite
private[spark] def elementClassManifest: ClassManifest[T] = classManifest[T]
@@ -940,7 +940,7 @@ abstract class RDD[T: ClassManifest](
def context = sc
// Avoid handling doCheckpoint multiple times to prevent excessive recursion
- private var doCheckpointCalled = false
+ @transient private var doCheckpointCalled = false
/**
* Performs the checkpointing of this RDD by saving this. It is called by the DAGScheduler
[2/2] git commit: Merge pull request #227 from pwendell/master
Posted by pw...@apache.org.
Merge pull request #227 from pwendell/master
Fix small bug in web UI and minor clean-up.
There was a bug where sorting order didn't work correctly for write time metrics.
I also cleaned up some earlier code that fixed the same issue for read and
write bytes.
(cherry picked from commit 182f9baeed8e4cc62ca14ae04413394477a7ccfb)
Signed-off-by: Patrick Wendell <pw...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/cc33f9fc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/cc33f9fc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/cc33f9fc
Branch: refs/heads/branch-0.8
Commit: cc33f9fc8a5b33addf23d6ce491aeee2c2ad1b43
Parents: ba44f21
Author: Patrick Wendell <pw...@gmail.com>
Authored: Wed Dec 4 15:52:07 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Wed Dec 4 15:57:47 2013 -0800
----------------------------------------------------------------------
.../org/apache/spark/ui/jobs/StagePage.scala | 29 +++++++++-----------
1 file changed, 13 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/cc33f9fc/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index c1c7aa7..c862571 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -152,21 +152,18 @@ private[spark] class StagePage(parent: JobProgressUI) {
else metrics.map(m => parent.formatDuration(m.executorRunTime)).getOrElse("")
val gcTime = metrics.map(m => m.jvmGCTime).getOrElse(0L)
- var shuffleReadSortable: String = ""
- var shuffleReadReadable: String = ""
- if (shuffleRead) {
- shuffleReadSortable = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => s.remoteBytesRead}.toString()
- shuffleReadReadable = metrics.flatMap{m => m.shuffleReadMetrics}.map{s =>
- Utils.bytesToString(s.remoteBytesRead)}.getOrElse("")
- }
+ val maybeShuffleRead = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => s.remoteBytesRead}
+ val shuffleReadSortable = maybeShuffleRead.map(_.toString).getOrElse("")
+ val shuffleReadReadable = maybeShuffleRead.map{Utils.bytesToString(_)}.getOrElse("")
- var shuffleWriteSortable: String = ""
- var shuffleWriteReadable: String = ""
- if (shuffleWrite) {
- shuffleWriteSortable = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleBytesWritten}.toString()
- shuffleWriteReadable = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s =>
- Utils.bytesToString(s.shuffleBytesWritten)}.getOrElse("")
- }
+ val maybeShuffleWrite = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleBytesWritten}
+ val shuffleWriteSortable = maybeShuffleWrite.map(_.toString).getOrElse("")
+ val shuffleWriteReadable = maybeShuffleWrite.map{Utils.bytesToString(_)}.getOrElse("")
+
+ val maybeWriteTime = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleWriteTime}
+ val writeTimeSortable = maybeWriteTime.map(_.toString).getOrElse("")
+ val writeTimeReadable = maybeWriteTime.map{ t => t / (1000 * 1000)}.map{ ms =>
+ if (ms == 0) "" else parent.formatDuration(ms)}.getOrElse("")
<tr>
<td>{info.index}</td>
@@ -187,8 +184,8 @@ private[spark] class StagePage(parent: JobProgressUI) {
</td>
}}
{if (shuffleWrite) {
- <td>{metrics.flatMap{m => m.shuffleWriteMetrics}.map{s =>
- parent.formatDuration(s.shuffleWriteTime / (1000 * 1000))}.getOrElse("")}
+ <td sorttable_customkey={writeTimeSortable}>
+ {writeTimeReadable}
</td>
<td sorttable_customkey={shuffleWriteSortable}>
{shuffleWriteReadable}