You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Edward Seidl (JIRA)" <ji...@apache.org> on 2016/01/05 01:26:40 UTC

[jira] [Commented] (SPARK-12431) add local checkpointing to GraphX

    [ https://issues.apache.org/jira/browse/SPARK-12431?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15082081#comment-15082081 ] 

Edward Seidl commented on SPARK-12431:
--------------------------------------

here's a patch demonstrating what I'm on about...

{code}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 869caa3..10a3d7c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -103,6 +103,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * memory, otherwise saving it on a file will require recomputation.
    */
   def checkpoint(): Unit
+  def localCheckpoint(): Unit
 
   /**
    * Return whether this Graph has been checkpointed or not.
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
index c88b2f6..65e24e5 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -76,6 +76,11 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] (
     partitionsRDD.checkpoint()
   }
 
+  override def localCheckpoint() : this.type = {
+      partitionsRDD.localCheckpoint()
+      this
+  }
+
   override def isCheckpointed: Boolean = {
     firstParent[(PartitionID, EdgePartition[ED, VD])].isCheckpointed
   }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index da95314..cc228ef 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -70,6 +70,11 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
     replicatedVertexView.edges.checkpoint()
   }
 
+  override def localCheckpoint(): Unit = {
+      vertices.localCheckpoint()
+      replicatedVertexView.edges.localCheckpoint()
+  }
+
   override def isCheckpointed: Boolean = {
     vertices.isCheckpointed && replicatedVertexView.edges.isCheckpointed
   }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
index 7f4e7e9..54e8406 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
@@ -77,6 +77,11 @@ class VertexRDDImpl[VD] private[graphx] (
     partitionsRDD.checkpoint()
   }
 
+  override def localCheckpoint(): this.type = {
+      partitionsRDD.localCheckpoint()
+      this
+  }
+
   override def isCheckpointed: Boolean = {
     firstParent[ShippableVertexPartition[VD]].isCheckpointed
   }
{code}

> add local checkpointing to GraphX
> ---------------------------------
>
>                 Key: SPARK-12431
>                 URL: https://issues.apache.org/jira/browse/SPARK-12431
>             Project: Spark
>          Issue Type: Improvement
>          Components: GraphX
>    Affects Versions: 1.5.2
>            Reporter: Edward Seidl
>
> local checkpointing was added to RDD to speed up iterative spark jobs, but this capability hasn't been added to GraphX.  Adding localCheckpoint to GraphImpl, EdgeRDDImpl, and VertexRDDImpl greatly improved the speed of a k-core algorithm I'm using (at the cost of fault tolerance, of course).



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org