You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/01/02 22:21:16 UTC

[12/13] git commit: Added Apache boilerplate and class docs to PartitionerAwareUnionRDD.

Added Apache boilerplate and class docs to PartitionerAwareUnionRDD.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/5fde4566
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/5fde4566
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/5fde4566

Branch: refs/heads/master
Commit: 5fde4566ea48e5c6d6c50af032a29eaded2d7c43
Parents: 577c8cc
Author: Tathagata Das <ta...@gmail.com>
Authored: Thu Dec 26 14:33:37 2013 -0800
Committer: Tathagata Das <ta...@gmail.com>
Committed: Thu Dec 26 14:33:37 2013 -0800

----------------------------------------------------------------------
 .../spark/rdd/PartitionerAwareUnionRDD.scala    | 36 ++++++++++++++++++--
 1 file changed, 33 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5fde4566/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
index 3cbf3b4..4c625d0 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
@@ -1,9 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.rdd
 
-import org.apache.spark.{TaskContext, OneToOneDependency, SparkContext, Partition}
 import scala.reflect.ClassTag
 import java.io.{ObjectOutputStream, IOException}
+import org.apache.spark.{TaskContext, OneToOneDependency, SparkContext, Partition}
+
 
+/**
+ * Class representing partitions of PartitionerAwareUnionRDD, which maintains the list of corresponding partitions
+ * of parent RDDs.
+ */
 private[spark]
 class PartitionerAwareUnionRDDPartition(
     @transient val rdds: Seq[RDD[_]],
@@ -22,6 +44,14 @@ class PartitionerAwareUnionRDDPartition(
   }
 }
 
+/**
+ * Class representing an RDD that can take multiple RDDs partitioned by the same partitioner and
+ * unify them into a single RDD while preserving the partitioner. So m RDDs with p partitions each
+ * will be unified to a single RDD with p partitions and the same partitioner. The preferred
+ * location for each partition of the unified RDD will be the most common preferred location
+ * of the corresponding partitions of the parent RDDs. For example, location of partition 0
+ * of the unified RDD will be where most of partition 0 of the parent RDDs are located.
+ */
 private[spark]
 class PartitionerAwareUnionRDD[T: ClassTag](
     sc: SparkContext,
@@ -54,7 +84,7 @@ class PartitionerAwareUnionRDD[T: ClassTag](
     val location = if (locations.isEmpty) {
       None
     } else  {
-      // Find the location where maximum number of parent partitions prefer 
+      // Find the location that maximum number of parent partitions prefer
       Some(locations.groupBy(x => x).maxBy(_._2.length)._1)
     }
     logDebug("Selected location for " + this + ", partition " + s.index + " = " + location)
@@ -73,7 +103,7 @@ class PartitionerAwareUnionRDD[T: ClassTag](
     rdds = null
   }
 
-  // gets the *current* preferred locations from the DAGScheduler (as opposed to the static ones)
+  // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones)
   private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = {
     rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host)
   }