You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by an...@apache.org on 2014/12/08 04:37:29 UTC
spark git commit: [SPARK-4646] Replace Scala.util.Sorting.quickSort
with Sorter(TimSort) in Spark
Repository: spark
Updated Branches:
refs/heads/master e895e0cbe -> 2e6b736b0
[SPARK-4646] Replace Scala.util.Sorting.quickSort with Sorter(TimSort) in Spark
This patch just replaces a native quick sorter with Sorter(TimSort) in Spark.
It could get performance gains by ~8% in my quick experiments.
Author: Takeshi Yamamuro <li...@gmail.com>
Closes #3507 from maropu/TimSortInEdgePartitionBuilderSpike and squashes the following commits:
8d4e5d2 [Takeshi Yamamuro] Remove a wildcard import
3527e00 [Takeshi Yamamuro] Replace Scala.util.Sorting.quickSort with Sorter(TimSort) in Spark
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2e6b736b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2e6b736b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2e6b736b
Branch: refs/heads/master
Commit: 2e6b736b0e6e5920d0523533c87832a53211db42
Parents: e895e0c
Author: Takeshi Yamamuro <li...@gmail.com>
Authored: Sun Dec 7 19:36:08 2014 -0800
Committer: Ankur Dave <an...@gmail.com>
Committed: Sun Dec 7 19:37:14 2014 -0800
----------------------------------------------------------------------
.../scala/org/apache/spark/graphx/Edge.scala | 30 +++++++++++++++
.../graphx/impl/EdgePartitionBuilder.scala | 39 +++++++++++++++++---
2 files changed, 64 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2e6b736b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
index 7e842ec..ecc37dc 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
@@ -17,6 +17,8 @@
package org.apache.spark.graphx
+import org.apache.spark.util.collection.SortDataFormat
+
/**
* A single directed edge consisting of a source id, target id,
* and the data associated with the edge.
@@ -65,4 +67,32 @@ object Edge {
else 1
}
}
+
+ private[graphx] def edgeArraySortDataFormat[ED] = new SortDataFormat[Edge[ED], Array[Edge[ED]]] {
+ override def getKey(data: Array[Edge[ED]], pos: Int): Edge[ED] = {
+ data(pos)
+ }
+
+ override def swap(data: Array[Edge[ED]], pos0: Int, pos1: Int): Unit = {
+ val tmp = data(pos0)
+ data(pos0) = data(pos1)
+ data(pos1) = tmp
+ }
+
+ override def copyElement(
+ src: Array[Edge[ED]], srcPos: Int,
+ dst: Array[Edge[ED]], dstPos: Int) {
+ dst(dstPos) = src(srcPos)
+ }
+
+ override def copyRange(
+ src: Array[Edge[ED]], srcPos: Int,
+ dst: Array[Edge[ED]], dstPos: Int, length: Int) {
+ System.arraycopy(src, srcPos, dst, dstPos, length)
+ }
+
+ override def allocate(length: Int): Array[Edge[ED]] = {
+ new Array[Edge[ED]](length)
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/2e6b736b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
index b0cb0fe..409cf60 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -18,12 +18,10 @@
package org.apache.spark.graphx.impl
import scala.reflect.ClassTag
-import scala.util.Sorting
-
-import org.apache.spark.util.collection.{BitSet, OpenHashSet, PrimitiveVector}
import org.apache.spark.graphx._
import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
+import org.apache.spark.util.collection.{SortDataFormat, Sorter, PrimitiveVector}
/** Constructs an EdgePartition from scratch. */
private[graphx]
@@ -38,7 +36,8 @@ class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: Cla
def toEdgePartition: EdgePartition[ED, VD] = {
val edgeArray = edges.trim().array
- Sorting.quickSort(edgeArray)(Edge.lexicographicOrdering)
+ new Sorter(Edge.edgeArraySortDataFormat[ED])
+ .sort(edgeArray, 0, edgeArray.length, Edge.lexicographicOrdering)
val localSrcIds = new Array[Int](edgeArray.size)
val localDstIds = new Array[Int](edgeArray.size)
val data = new Array[ED](edgeArray.size)
@@ -97,7 +96,8 @@ class ExistingEdgePartitionBuilder[
def toEdgePartition: EdgePartition[ED, VD] = {
val edgeArray = edges.trim().array
- Sorting.quickSort(edgeArray)(EdgeWithLocalIds.lexicographicOrdering)
+ new Sorter(EdgeWithLocalIds.edgeArraySortDataFormat[ED])
+ .sort(edgeArray, 0, edgeArray.length, EdgeWithLocalIds.lexicographicOrdering)
val localSrcIds = new Array[Int](edgeArray.size)
val localDstIds = new Array[Int](edgeArray.size)
val data = new Array[ED](edgeArray.size)
@@ -140,4 +140,33 @@ private[impl] object EdgeWithLocalIds {
}
}
+ private[graphx] def edgeArraySortDataFormat[ED]
+ = new SortDataFormat[EdgeWithLocalIds[ED], Array[EdgeWithLocalIds[ED]]] {
+ override def getKey(
+ data: Array[EdgeWithLocalIds[ED]], pos: Int): EdgeWithLocalIds[ED] = {
+ data(pos)
+ }
+
+ override def swap(data: Array[EdgeWithLocalIds[ED]], pos0: Int, pos1: Int): Unit = {
+ val tmp = data(pos0)
+ data(pos0) = data(pos1)
+ data(pos1) = tmp
+ }
+
+ override def copyElement(
+ src: Array[EdgeWithLocalIds[ED]], srcPos: Int,
+ dst: Array[EdgeWithLocalIds[ED]], dstPos: Int) {
+ dst(dstPos) = src(srcPos)
+ }
+
+ override def copyRange(
+ src: Array[EdgeWithLocalIds[ED]], srcPos: Int,
+ dst: Array[EdgeWithLocalIds[ED]], dstPos: Int, length: Int) {
+ System.arraycopy(src, srcPos, dst, dstPos, length)
+ }
+
+ override def allocate(length: Int): Array[EdgeWithLocalIds[ED]] = {
+ new Array[EdgeWithLocalIds[ED]](length)
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org