You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2013/11/18 03:42:27 UTC
[1/5] git commit: Slightly enhanced PrimitiveVector: 1. Added trim()
method 2. Added size method. 3. Renamed getUnderlyingArray to array. 4. Minor
documentation update.
Updated Branches:
refs/heads/master 1b5b35830 -> e2ebc3a9d
Slightly enhanced PrimitiveVector:
1. Added trim() method
2. Added size method.
3. Renamed getUnderlyingArray to array.
4. Minor documentation update.
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/c30979c7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/c30979c7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/c30979c7
Branch: refs/heads/master
Commit: c30979c7d6009936853e731bfde38ec9d04ea347
Parents: 1b5b358
Author: Reynold Xin <rx...@apache.org>
Authored: Sun Nov 17 17:09:40 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Nov 17 17:09:40 2013 -0800
----------------------------------------------------------------------
.../spark/util/collection/PrimitiveVector.scala | 40 +++++++++++++-------
1 file changed, 26 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c30979c7/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
index 369519c..54a5569 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
@@ -17,35 +17,47 @@
package org.apache.spark.util.collection
-/** Provides a simple, non-threadsafe, array-backed vector that can store primitives. */
+/**
+ * An append-only, non-threadsafe, array-backed vector that is optimized for primitive types.
+ */
private[spark]
class PrimitiveVector[@specialized(Long, Int, Double) V: ClassManifest](initialSize: Int = 64) {
- private var numElements = 0
- private var array: Array[V] = _
+ private var _numElements = 0
+ private var _array: Array[V] = _
// NB: This must be separate from the declaration, otherwise the specialized parent class
- // will get its own array with the same initial size. TODO: Figure out why...
- array = new Array[V](initialSize)
+ // will get its own array with the same initial size.
+ _array = new Array[V](initialSize)
def apply(index: Int): V = {
- require(index < numElements)
- array(index)
+ require(index < _numElements)
+ _array(index)
}
def +=(value: V) {
- if (numElements == array.length) { resize(array.length * 2) }
- array(numElements) = value
- numElements += 1
+ if (_numElements == _array.length) {
+ resize(_array.length * 2)
+ }
+ _array(_numElements) = value
+ _numElements += 1
}
- def length = numElements
+ def capacity: Int = _array.length
+
+ def length: Int = _numElements
+
+ def size: Int = _numElements
+
+ /** Get the underlying array backing this vector. */
+ def array: Array[V] = _array
- def getUnderlyingArray = array
+ /** Trims this vector so that the capacity is equal to the size. */
+ def trim(): Unit = resize(size)
/** Resizes the array, dropping elements if the total length decreases. */
def resize(newLength: Int) {
val newArray = new Array[V](newLength)
- array.copyToArray(newArray)
- array = newArray
+ _array.copyToArray(newArray)
+ _array = newArray
}
}
[3/5] git commit: Add PrimitiveVectorSuite and fix bug in resize()
Posted by rx...@apache.org.
Add PrimitiveVectorSuite and fix bug in resize()
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/85763f49
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/85763f49
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/85763f49
Branch: refs/heads/master
Commit: 85763f4942afc095595dc32c853d077bdbf49644
Parents: 16a2286
Author: Aaron Davidson <aa...@databricks.com>
Authored: Sun Nov 17 17:59:18 2013 -0800
Committer: Aaron Davidson <aa...@databricks.com>
Committed: Sun Nov 17 18:16:51 2013 -0800
----------------------------------------------------------------------
.../spark/util/collection/PrimitiveVector.scala | 3 +
.../util/collection/PrimitiveVectorSuite.scala | 117 +++++++++++++++++++
2 files changed, 120 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/85763f49/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
index b4fcc92..20554f0 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
@@ -59,6 +59,9 @@ class PrimitiveVector[@specialized(Long, Int, Double) V: ClassManifest](initialS
val newArray = new Array[V](newLength)
_array.copyToArray(newArray)
_array = newArray
+ if (newLength < _numElements) {
+ _numElements = newLength
+ }
this
}
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/85763f49/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala
new file mode 100644
index 0000000..970dade
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.util.SizeEstimator
+
+class PrimitiveVectorSuite extends FunSuite {
+
+ test("primitive value") {
+ val vector = new PrimitiveVector[Int]
+
+ for (i <- 0 until 1000) {
+ vector += i
+ assert(vector(i) === i)
+ }
+
+ assert(vector.size === 1000)
+ assert(vector.size == vector.length)
+ intercept[IllegalArgumentException] {
+ vector(1000)
+ }
+
+ for (i <- 0 until 1000) {
+ assert(vector(i) == i)
+ }
+ }
+
+ test("non-primitive value") {
+ val vector = new PrimitiveVector[String]
+
+ for (i <- 0 until 1000) {
+ vector += i.toString
+ assert(vector(i) === i.toString)
+ }
+
+ assert(vector.size === 1000)
+ assert(vector.size == vector.length)
+ intercept[IllegalArgumentException] {
+ vector(1000)
+ }
+
+ for (i <- 0 until 1000) {
+ assert(vector(i) == i.toString)
+ }
+ }
+
+ test("ideal growth") {
+ val vector = new PrimitiveVector[Long](initialSize = 1)
+ vector += 1
+ for (i <- 1 until 1024) {
+ vector += i
+ assert(vector.size === i + 1)
+ assert(vector.capacity === Integer.highestOneBit(i) * 2)
+ }
+ assert(vector.capacity === 1024)
+ vector += 1024
+ assert(vector.capacity === 2048)
+ }
+
+ test("ideal size") {
+ val vector = new PrimitiveVector[Long](8192)
+ for (i <- 0 until 8192) {
+ vector += i
+ }
+ assert(vector.size === 8192)
+ assert(vector.capacity === 8192)
+ val actualSize = SizeEstimator.estimate(vector)
+ val expectedSize = 8192 * 8
+ // Make sure we are not allocating a significant amount of memory beyond our expected.
+ // Due to specialization wonkiness, we need to ensure we don't have 2 copies of the array.
+ assert(actualSize < expectedSize * 1.1)
+ }
+
+ test("resizing") {
+ val vector = new PrimitiveVector[Long]
+ for (i <- 0 until 4097) {
+ vector += i
+ }
+ assert(vector.size === 4097)
+ assert(vector.capacity === 8192)
+ vector.trim()
+ assert(vector.size === 4097)
+ assert(vector.capacity === 4097)
+ vector.resize(5000)
+ assert(vector.size === 4097)
+ assert(vector.capacity === 5000)
+ vector.resize(4000)
+ assert(vector.size === 4000)
+ assert(vector.capacity === 4000)
+ vector.resize(5000)
+ assert(vector.size === 4000)
+ assert(vector.capacity === 5000)
+ for (i <- 0 until 4000) {
+ assert(vector(i) == i)
+ }
+ intercept[IllegalArgumentException] {
+ vector(4000)
+ }
+ }
+}
[4/5] git commit: Merge pull request #3 from aarondav/pv-test
Posted by rx...@apache.org.
Merge pull request #3 from aarondav/pv-test
Add PrimitiveVectorSuite and fix bug in resize()
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/26f616d7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/26f616d7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/26f616d7
Branch: refs/heads/master
Commit: 26f616d73a3441cec749335913890b8c721de9b1
Parents: 16a2286 85763f4
Author: Reynold Xin <rx...@apache.org>
Authored: Sun Nov 17 18:18:16 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Nov 17 18:18:16 2013 -0800
----------------------------------------------------------------------
.../spark/util/collection/PrimitiveVector.scala | 3 +
.../util/collection/PrimitiveVectorSuite.scala | 117 +++++++++++++++++++
2 files changed, 120 insertions(+)
----------------------------------------------------------------------
[5/5] git commit: Merge pull request #182 from rxin/vector
Posted by rx...@apache.org.
Merge pull request #182 from rxin/vector
Slightly enhanced PrimitiveVector:
1. Added trim() method
2. Added size method.
3. Renamed getUnderlyingArray to array.
4. Minor documentation update.
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/e2ebc3a9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/e2ebc3a9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/e2ebc3a9
Branch: refs/heads/master
Commit: e2ebc3a9d8bca83bf842b134f2f056c1af0ad2be
Parents: 1b5b358 26f616d
Author: Reynold Xin <rx...@apache.org>
Authored: Sun Nov 17 18:42:18 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Nov 17 18:42:18 2013 -0800
----------------------------------------------------------------------
.../spark/util/collection/PrimitiveVector.scala | 46 +++++---
.../util/collection/PrimitiveVectorSuite.scala | 117 +++++++++++++++++++
2 files changed, 148 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
[2/5] git commit: Return the vector itself for trim and resize method
in PrimitiveVector.
Posted by rx...@apache.org.
Return the vector itself for trim and resize method in PrimitiveVector.
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/16a2286d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/16a2286d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/16a2286d
Branch: refs/heads/master
Commit: 16a2286d6d0e692e0d2e2d568a3c72c053f5047a
Parents: c30979c
Author: Reynold Xin <rx...@apache.org>
Authored: Sun Nov 17 17:52:02 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Nov 17 17:52:02 2013 -0800
----------------------------------------------------------------------
.../org/apache/spark/util/collection/PrimitiveVector.scala | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/16a2286d/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
index 54a5569..b4fcc92 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
@@ -48,16 +48,17 @@ class PrimitiveVector[@specialized(Long, Int, Double) V: ClassManifest](initialS
def size: Int = _numElements
- /** Get the underlying array backing this vector. */
+ /** Gets the underlying array backing this vector. */
def array: Array[V] = _array
/** Trims this vector so that the capacity is equal to the size. */
- def trim(): Unit = resize(size)
+ def trim(): PrimitiveVector[V] = resize(size)
/** Resizes the array, dropping elements if the total length decreases. */
- def resize(newLength: Int) {
+ def resize(newLength: Int): PrimitiveVector[V] = {
val newArray = new Array[V](newLength)
_array.copyToArray(newArray)
_array = newArray
+ this
}
}