You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by an...@apache.org on 2015/06/18 07:31:33 UTC
spark git commit: [SPARK-8392] RDDOperationGraph: getting cached
nodes is slow
Repository: spark
Updated Branches:
refs/heads/master 22732e1ec -> e2cdb0568
[SPARK-8392] RDDOperationGraph: getting cached nodes is slow
```def getAllNodes: Seq[RDDOperationNode] =
{ _childNodes ++ _childClusters.flatMap(_.childNodes) }```
when the ```_childClusters``` has so many nodes, the process will hang on. I think we can improve the efficiency here.
Author: xutingjun <xu...@huawei.com>
Closes #6839 from XuTingjun/DAGImprove and squashes the following commits:
53b03ea [xutingjun] change code to more concise and easier to read
f98728b [xutingjun] fix words: node -> nodes
f87c663 [xutingjun] put the filter inside
81f9fd2 [xutingjun] put the filter inside
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e2cdb056
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e2cdb056
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e2cdb056
Branch: refs/heads/master
Commit: e2cdb0568b14df29bbdb1ee9a13ee361c9ddad9c
Parents: 22732e1
Author: xutingjun <xu...@huawei.com>
Authored: Wed Jun 17 22:31:01 2015 -0700
Committer: Andrew Or <an...@databricks.com>
Committed: Wed Jun 17 22:31:01 2015 -0700
----------------------------------------------------------------------
core/src/main/scala/org/apache/spark/ui/UIUtils.scala | 2 +-
.../scala/org/apache/spark/ui/scope/RDDOperationGraph.scala | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/e2cdb056/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 65162f4..7898039 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -362,7 +362,7 @@ private[spark] object UIUtils extends Logging {
{ g.incomingEdges.map { e => <div class="incoming-edge">{e.fromId},{e.toId}</div> } }
{ g.outgoingEdges.map { e => <div class="outgoing-edge">{e.fromId},{e.toId}</div> } }
{
- g.rootCluster.getAllNodes.filter(_.cached).map { n =>
+ g.rootCluster.getCachedNodes.map { n =>
<div class="cached-rdd">{n.id}</div>
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e2cdb056/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index d6a5085..ffea981 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -66,9 +66,9 @@ private[ui] class RDDOperationCluster(val id: String, private var _name: String)
_childClusters += childCluster
}
- /** Return all the nodes container in this cluster, including ones nested in other clusters. */
- def getAllNodes: Seq[RDDOperationNode] = {
- _childNodes ++ _childClusters.flatMap(_.childNodes)
+ /** Return all the nodes which are cached. */
+ def getCachedNodes: Seq[RDDOperationNode] = {
+ _childNodes.filter(_.cached) ++ _childClusters.flatMap(_.getCachedNodes)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org