You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/01/17 22:26:47 UTC
spark git commit: [SPARK-23047][PYTHON][SQL] Change MapVector to
NullableMapVector in ArrowColumnVector
Repository: spark
Updated Branches:
refs/heads/master e946c63dd -> 4e6f8fb15
[SPARK-23047][PYTHON][SQL] Change MapVector to NullableMapVector in ArrowColumnVector
## What changes were proposed in this pull request?
This PR changes usage of `MapVector` in Spark codebase to use `NullableMapVector`.
`MapVector` is an internal Arrow class that is not supposed to be used directly. We should use `NullableMapVector` instead.
## How was this patch tested?
Existing test.
Author: Li Jin <ic...@gmail.com>
Closes #20239 from icexelloss/arrow-map-vector.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e6f8fb1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e6f8fb1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e6f8fb1
Branch: refs/heads/master
Commit: 4e6f8fb150ae09c7d1de6beecb2b98e5afa5da19
Parents: e946c63
Author: Li Jin <ic...@gmail.com>
Authored: Thu Jan 18 07:26:43 2018 +0900
Committer: hyukjinkwon <gu...@gmail.com>
Committed: Thu Jan 18 07:26:43 2018 +0900
----------------------------------------------------------------------
.../spark/sql/vectorized/ArrowColumnVector.java | 13 +++++--
.../vectorized/ArrowColumnVectorSuite.scala | 36 ++++++++++++++++++++
2 files changed, 46 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/4e6f8fb1/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
----------------------------------------------------------------------
diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
index 7083332..eb69001 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
@@ -247,8 +247,8 @@ public final class ArrowColumnVector extends ColumnVector {
childColumns = new ArrowColumnVector[1];
childColumns[0] = new ArrowColumnVector(listVector.getDataVector());
- } else if (vector instanceof MapVector) {
- MapVector mapVector = (MapVector) vector;
+ } else if (vector instanceof NullableMapVector) {
+ NullableMapVector mapVector = (NullableMapVector) vector;
accessor = new StructAccessor(mapVector);
childColumns = new ArrowColumnVector[mapVector.size()];
@@ -553,9 +553,16 @@ public final class ArrowColumnVector extends ColumnVector {
}
}
+ /**
+ * Any call to "get" method will throw UnsupportedOperationException.
+ *
+ * Access struct values in a ArrowColumnVector doesn't use this accessor. Instead, it uses getStruct() method defined
+ * in the parent class. Any call to "get" method in this class is a bug in the code.
+ *
+ */
private static class StructAccessor extends ArrowVectorAccessor {
- StructAccessor(MapVector vector) {
+ StructAccessor(NullableMapVector vector) {
super(vector);
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/4e6f8fb1/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
index 7304803..5343266 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
@@ -322,6 +322,42 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
allocator.close()
}
+ test("non nullable struct") {
+ val allocator = ArrowUtils.rootAllocator.newChildAllocator("struct", 0, Long.MaxValue)
+ val schema = new StructType().add("int", IntegerType).add("long", LongType)
+ val vector = ArrowUtils.toArrowField("struct", schema, nullable = false, null)
+ .createVector(allocator).asInstanceOf[NullableMapVector]
+
+ vector.allocateNew()
+ val intVector = vector.getChildByOrdinal(0).asInstanceOf[IntVector]
+ val longVector = vector.getChildByOrdinal(1).asInstanceOf[BigIntVector]
+
+ vector.setIndexDefined(0)
+ intVector.setSafe(0, 1)
+ longVector.setSafe(0, 1L)
+
+ vector.setIndexDefined(1)
+ intVector.setSafe(1, 2)
+ longVector.setNull(1)
+
+ vector.setValueCount(2)
+
+ val columnVector = new ArrowColumnVector(vector)
+ assert(columnVector.dataType === schema)
+ assert(columnVector.numNulls === 0)
+
+ val row0 = columnVector.getStruct(0, 2)
+ assert(row0.getInt(0) === 1)
+ assert(row0.getLong(1) === 1L)
+
+ val row1 = columnVector.getStruct(1, 2)
+ assert(row1.getInt(0) === 2)
+ assert(row1.isNullAt(1))
+
+ columnVector.close()
+ allocator.close()
+ }
+
test("struct") {
val allocator = ArrowUtils.rootAllocator.newChildAllocator("struct", 0, Long.MaxValue)
val schema = new StructType().add("int", IntegerType).add("long", LongType)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org