You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2013/11/18 03:42:27 UTC

[1/5] git commit: Slightly enhanced PrimitiveVector: 1. Added trim() method 2. Added size method. 3. Renamed getUnderlyingArray to array. 4. Minor documentation update.

Updated Branches:
  refs/heads/master 1b5b35830 -> e2ebc3a9d


Slightly enhanced PrimitiveVector:
1. Added trim() method
2. Added size method.
3. Renamed getUnderlyingArray to array.
4. Minor documentation update.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/c30979c7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/c30979c7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/c30979c7

Branch: refs/heads/master
Commit: c30979c7d6009936853e731bfde38ec9d04ea347
Parents: 1b5b358
Author: Reynold Xin <rx...@apache.org>
Authored: Sun Nov 17 17:09:40 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Nov 17 17:09:40 2013 -0800

----------------------------------------------------------------------
 .../spark/util/collection/PrimitiveVector.scala | 40 +++++++++++++-------
 1 file changed, 26 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c30979c7/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
index 369519c..54a5569 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
@@ -17,35 +17,47 @@
 
 package org.apache.spark.util.collection
 
-/** Provides a simple, non-threadsafe, array-backed vector that can store primitives. */
+/**
+ * An append-only, non-threadsafe, array-backed vector that is optimized for primitive types.
+ */
 private[spark]
 class PrimitiveVector[@specialized(Long, Int, Double) V: ClassManifest](initialSize: Int = 64) {
-  private var numElements = 0
-  private var array: Array[V] = _
+  private var _numElements = 0
+  private var _array: Array[V] = _
 
   // NB: This must be separate from the declaration, otherwise the specialized parent class
-  // will get its own array with the same initial size. TODO: Figure out why...
-  array = new Array[V](initialSize)
+  // will get its own array with the same initial size.
+  _array = new Array[V](initialSize)
 
   def apply(index: Int): V = {
-    require(index < numElements)
-    array(index)
+    require(index < _numElements)
+    _array(index)
   }
 
   def +=(value: V) {
-    if (numElements == array.length) { resize(array.length * 2) }
-    array(numElements) = value
-    numElements += 1
+    if (_numElements == _array.length) {
+      resize(_array.length * 2)
+    }
+    _array(_numElements) = value
+    _numElements += 1
   }
 
-  def length = numElements
+  def capacity: Int = _array.length
+
+  def length: Int = _numElements
+
+  def size: Int = _numElements
+
+  /** Get the underlying array backing this vector. */
+  def array: Array[V] = _array
 
-  def getUnderlyingArray = array
+  /** Trims this vector so that the capacity is equal to the size. */
+  def trim(): Unit = resize(size)
 
   /** Resizes the array, dropping elements if the total length decreases. */
   def resize(newLength: Int) {
     val newArray = new Array[V](newLength)
-    array.copyToArray(newArray)
-    array = newArray
+    _array.copyToArray(newArray)
+    _array = newArray
   }
 }


[3/5] git commit: Add PrimitiveVectorSuite and fix bug in resize()

Posted by rx...@apache.org.
Add PrimitiveVectorSuite and fix bug in resize()


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/85763f49
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/85763f49
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/85763f49

Branch: refs/heads/master
Commit: 85763f4942afc095595dc32c853d077bdbf49644
Parents: 16a2286
Author: Aaron Davidson <aa...@databricks.com>
Authored: Sun Nov 17 17:59:18 2013 -0800
Committer: Aaron Davidson <aa...@databricks.com>
Committed: Sun Nov 17 18:16:51 2013 -0800

----------------------------------------------------------------------
 .../spark/util/collection/PrimitiveVector.scala |   3 +
 .../util/collection/PrimitiveVectorSuite.scala  | 117 +++++++++++++++++++
 2 files changed, 120 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/85763f49/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
index b4fcc92..20554f0 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
@@ -59,6 +59,9 @@ class PrimitiveVector[@specialized(Long, Int, Double) V: ClassManifest](initialS
     val newArray = new Array[V](newLength)
     _array.copyToArray(newArray)
     _array = newArray
+    if (newLength < _numElements) {
+      _numElements = newLength
+    }
     this
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/85763f49/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala
new file mode 100644
index 0000000..970dade
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.util.SizeEstimator
+
+class PrimitiveVectorSuite extends FunSuite {
+
+  test("primitive value") {
+    val vector = new PrimitiveVector[Int]
+
+    for (i <- 0 until 1000) {
+      vector += i
+      assert(vector(i) === i)
+    }
+
+    assert(vector.size === 1000)
+    assert(vector.size == vector.length)
+    intercept[IllegalArgumentException] {
+      vector(1000)
+    }
+
+    for (i <- 0 until 1000) {
+      assert(vector(i) == i)
+    }
+  }
+
+  test("non-primitive value") {
+    val vector = new PrimitiveVector[String]
+
+    for (i <- 0 until 1000) {
+      vector += i.toString
+      assert(vector(i) === i.toString)
+    }
+
+    assert(vector.size === 1000)
+    assert(vector.size == vector.length)
+    intercept[IllegalArgumentException] {
+      vector(1000)
+    }
+
+    for (i <- 0 until 1000) {
+      assert(vector(i) == i.toString)
+    }
+  }
+
+  test("ideal growth") {
+    val vector = new PrimitiveVector[Long](initialSize = 1)
+    vector += 1
+    for (i <- 1 until 1024) {
+      vector += i
+      assert(vector.size === i + 1)
+      assert(vector.capacity === Integer.highestOneBit(i) * 2)
+    }
+    assert(vector.capacity === 1024)
+    vector += 1024
+    assert(vector.capacity === 2048)
+  }
+
+  test("ideal size") {
+    val vector = new PrimitiveVector[Long](8192)
+    for (i <- 0 until 8192) {
+      vector += i
+    }
+    assert(vector.size === 8192)
+    assert(vector.capacity === 8192)
+    val actualSize = SizeEstimator.estimate(vector)
+    val expectedSize = 8192 * 8
+    // Make sure we are not allocating a significant amount of memory beyond our expected.
+    // Due to specialization wonkiness, we need to ensure we don't have 2 copies of the array.
+    assert(actualSize < expectedSize * 1.1)
+  }
+
+  test("resizing") {
+    val vector = new PrimitiveVector[Long]
+    for (i <- 0 until 4097) {
+      vector += i
+    }
+    assert(vector.size === 4097)
+    assert(vector.capacity === 8192)
+    vector.trim()
+    assert(vector.size === 4097)
+    assert(vector.capacity === 4097)
+    vector.resize(5000)
+    assert(vector.size === 4097)
+    assert(vector.capacity === 5000)
+    vector.resize(4000)
+    assert(vector.size === 4000)
+    assert(vector.capacity === 4000)
+    vector.resize(5000)
+    assert(vector.size === 4000)
+    assert(vector.capacity === 5000)
+    for (i <- 0 until 4000) {
+      assert(vector(i) == i)
+    }
+    intercept[IllegalArgumentException] {
+      vector(4000)
+    }
+  }
+}


[4/5] git commit: Merge pull request #3 from aarondav/pv-test

Posted by rx...@apache.org.
Merge pull request #3 from aarondav/pv-test

Add PrimitiveVectorSuite and fix bug in resize()

Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/26f616d7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/26f616d7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/26f616d7

Branch: refs/heads/master
Commit: 26f616d73a3441cec749335913890b8c721de9b1
Parents: 16a2286 85763f4
Author: Reynold Xin <rx...@apache.org>
Authored: Sun Nov 17 18:18:16 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Nov 17 18:18:16 2013 -0800

----------------------------------------------------------------------
 .../spark/util/collection/PrimitiveVector.scala |   3 +
 .../util/collection/PrimitiveVectorSuite.scala  | 117 +++++++++++++++++++
 2 files changed, 120 insertions(+)
----------------------------------------------------------------------



[5/5] git commit: Merge pull request #182 from rxin/vector

Posted by rx...@apache.org.
Merge pull request #182 from rxin/vector

Slightly enhanced PrimitiveVector:

1. Added trim() method
2. Added size method.
3. Renamed getUnderlyingArray to array.
4. Minor documentation update.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/e2ebc3a9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/e2ebc3a9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/e2ebc3a9

Branch: refs/heads/master
Commit: e2ebc3a9d8bca83bf842b134f2f056c1af0ad2be
Parents: 1b5b358 26f616d
Author: Reynold Xin <rx...@apache.org>
Authored: Sun Nov 17 18:42:18 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Nov 17 18:42:18 2013 -0800

----------------------------------------------------------------------
 .../spark/util/collection/PrimitiveVector.scala |  46 +++++---
 .../util/collection/PrimitiveVectorSuite.scala  | 117 +++++++++++++++++++
 2 files changed, 148 insertions(+), 15 deletions(-)
----------------------------------------------------------------------



[2/5] git commit: Return the vector itself for trim and resize method in PrimitiveVector.

Posted by rx...@apache.org.
Return the vector itself for trim and resize method in PrimitiveVector.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/16a2286d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/16a2286d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/16a2286d

Branch: refs/heads/master
Commit: 16a2286d6d0e692e0d2e2d568a3c72c053f5047a
Parents: c30979c
Author: Reynold Xin <rx...@apache.org>
Authored: Sun Nov 17 17:52:02 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Nov 17 17:52:02 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/util/collection/PrimitiveVector.scala    | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/16a2286d/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
index 54a5569..b4fcc92 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
@@ -48,16 +48,17 @@ class PrimitiveVector[@specialized(Long, Int, Double) V: ClassManifest](initialS
 
   def size: Int = _numElements
 
-  /** Get the underlying array backing this vector. */
+  /** Gets the underlying array backing this vector. */
   def array: Array[V] = _array
 
   /** Trims this vector so that the capacity is equal to the size. */
-  def trim(): Unit = resize(size)
+  def trim(): PrimitiveVector[V] = resize(size)
 
   /** Resizes the array, dropping elements if the total length decreases. */
-  def resize(newLength: Int) {
+  def resize(newLength: Int): PrimitiveVector[V] = {
     val newArray = new Array[V](newLength)
     _array.copyToArray(newArray)
     _array = newArray
+    this
   }
 }