You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by em...@apache.org on 2019/06/27 07:29:39 UTC
[arrow] branch master updated: ARROW-1279: [Integration] Enable
MapType integration tests
This is an automated email from the ASF dual-hosted git repository.
emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 19b9ccc ARROW-1279: [Integration] Enable MapType integration tests
19b9ccc is described below
commit 19b9ccc3add079768e98e84737eefd7357764150
Author: Bryan Cutler <cu...@gmail.com>
AuthorDate: Thu Jun 27 00:26:59 2019 -0700
ARROW-1279: [Integration] Enable MapType integration tests
This PR enables MapType integration tests for C++ and Java, and fixes remaining issues to pass. Previously, C++ created a MapType with a StructType field that is nullable, which is required to be non-nullable from the spec. The struct field has the name "entries" and the struct data pairs are named "key" and "value."
Author: Bryan Cutler <cu...@gmail.com>
Closes #4684 from BryanCutler/map-type-integration-tests-ARROW-1279 and squashes the following commits:
6259a4574 <Bryan Cutler> rename MapType fields for internal test
30777804f <Bryan Cutler> rename MapType struct field to 'entries'
3185af9fa <Bryan Cutler> Fix MapType to create a non-nullable struct field
---
cpp/src/arrow/ipc/json-internal.cc | 4 +--
cpp/src/arrow/ipc/json-test.cc | 2 +-
cpp/src/arrow/ipc/metadata-internal.cc | 4 +--
cpp/src/arrow/ipc/test-common.cc | 2 +-
cpp/src/arrow/type.cc | 7 +++--
integration/integration_test.py | 12 ++++-----
.../org/apache/arrow/vector/complex/MapVector.java | 31 ++++++++++++++++++++--
7 files changed, 44 insertions(+), 18 deletions(-)
diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc
index 42663c0..1352965 100644
--- a/cpp/src/arrow/ipc/json-internal.cc
+++ b/cpp/src/arrow/ipc/json-internal.cc
@@ -1274,8 +1274,8 @@ class ArrayReader {
Status Visit(const MapType& type) {
auto list_type = std::make_shared<ListType>(field(
- "item",
- struct_({field("key", type.key_type(), false), field("item", type.item_type())}),
+ "entries",
+ struct_({field("key", type.key_type(), false), field("value", type.item_type())}),
false));
std::shared_ptr<Array> list_array;
RETURN_NOT_OK(CreateList(list_type, &list_array));
diff --git a/cpp/src/arrow/ipc/json-test.cc b/cpp/src/arrow/ipc/json-test.cc
index fb57fa7..338552d 100644
--- a/cpp/src/arrow/ipc/json-test.cc
+++ b/cpp/src/arrow/ipc/json-test.cc
@@ -204,7 +204,7 @@ TEST(TestJsonArrayWriter, NestedTypes) {
TestArrayRoundTrip(list_array);
- // List
+ // Map
auto map_type = map(utf8(), int32());
auto keys_array = ArrayFromJSON(utf8(), R"(["a", "b", "c", "d", "a", "b", "c"])");
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 4b349cb..4e1a157 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -320,9 +320,7 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
if (children.size() != 1) {
return Status::Invalid("Map must have exactly 1 child field");
}
- if ( // FIXME(bkietz) temporarily disabled: this field is sometimes read nullable
- // children[0]->nullable() ||
- children[0]->type()->id() != Type::STRUCT ||
+ if (children[0]->nullable() || children[0]->type()->id() != Type::STRUCT ||
children[0]->type()->num_children() != 2) {
return Status::Invalid("Map's key-item pairs must be non-nullable structs");
}
diff --git a/cpp/src/arrow/ipc/test-common.cc b/cpp/src/arrow/ipc/test-common.cc
index 12adebc..47c3076 100644
--- a/cpp/src/arrow/ipc/test-common.cc
+++ b/cpp/src/arrow/ipc/test-common.cc
@@ -120,7 +120,7 @@ Status MakeRandomMapArray(const std::shared_ptr<Array>& key_array,
bool include_nulls, MemoryPool* pool,
std::shared_ptr<Array>* out) {
auto pair_type = struct_(
- {field("key", key_array->type(), false), field("item", item_array->type())});
+ {field("key", key_array->type(), false), field("value", item_array->type())});
auto pair_array = std::make_shared<StructArray>(pair_type, num_maps,
ArrayVector{key_array, item_array});
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index b215331..b3fcb0c 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -153,8 +153,11 @@ std::string ListType::ToString() const {
MapType::MapType(const std::shared_ptr<DataType>& key_type,
const std::shared_ptr<DataType>& item_type, bool keys_sorted)
- : ListType(struct_({std::make_shared<Field>("key", key_type, false),
- std::make_shared<Field>("item", item_type)})),
+ : ListType(std::make_shared<Field>(
+ "entries",
+ struct_({std::make_shared<Field>("key", key_type, false),
+ std::make_shared<Field>("value", item_type)}),
+ false)),
keys_sorted_(keys_sorted) {
id_ = type_id;
}
diff --git a/integration/integration_test.py b/integration/integration_test.py
index aca0574..dbc03c7 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -714,7 +714,7 @@ class MapType(DataType):
assert not key_type.nullable
self.key_type = key_type
self.item_type = item_type
- self.pair_type = StructType('item', [key_type, item_type], False)
+ self.pair_type = StructType('entries', [key_type, item_type], False)
self.keysSorted = keysSorted
def _get_type(self):
@@ -1060,10 +1060,10 @@ def generate_interval_case():
def generate_map_case():
# TODO(bkietz): separated from nested_case so it can be
- # independently skipped, consolidate after Java supports map
+ # independently skipped, consolidate after JS supports map
fields = [
MapType('map_nullable', get_field('key', 'utf8', False),
- get_field('item', 'int32')),
+ get_field('value', 'int32')),
]
batch_sizes = [7, 10]
@@ -1220,12 +1220,10 @@ class IntegrationRunner(object):
file_id = guid()[:8]
- if (('JS' in (producer.name, consumer.name) or
- 'Java' in (producer.name, consumer.name)) and
+ if ('JS' in (producer.name, consumer.name) and
"map" in test_case.name):
print('TODO(ARROW-1279): Enable map tests ' +
- ' for Java and JS once Java supports them and JS\'' +
- ' are unbroken')
+ ' for JS once they are unbroken')
continue
if ('JS' in (producer.name, consumer.name) and
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
index 340fb2a..a367150 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
@@ -25,14 +25,17 @@ import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.AddOrGetResult;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
import org.apache.arrow.vector.complex.impl.UnionMapReader;
import org.apache.arrow.vector.complex.impl.UnionMapWriter;
import org.apache.arrow.vector.types.Types;
import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
import org.apache.arrow.vector.types.pojo.ArrowType.Map;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
/**
* A MapVector is used to store entries of key/value pairs. It is a container vector that is
@@ -46,6 +49,9 @@ public class MapVector extends ListVector {
public static final String KEY_NAME = "key";
public static final String VALUE_NAME = "value";
+ // TODO: this is only used for addOrGetVector because ListVector declares it private
+ protected CallBack callBack;
+
/**
* Construct an empty MapVector with no data. Child vectors must be added subsequently.
*
@@ -68,6 +74,7 @@ public class MapVector extends ListVector {
*/
public MapVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
super(name, allocator, fieldType, callBack);
+ this.callBack = callBack;
reader = new UnionMapReader(this);
}
@@ -121,9 +128,29 @@ public class MapVector extends ListVector {
*/
@Override
public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
- AddOrGetResult<T> result = super.addOrGetVector(fieldType);
+
+ // TODO: can call super method once DATA_VECTOR_NAME is configurable
+ boolean created = false;
+ if (vector instanceof ZeroVector) {
+ vector = fieldType.createNewSingleVector("entries", allocator, callBack);
+ // returned vector must have the same field
+ created = true;
+ if (callBack != null &&
+ // not a schema change if changing from ZeroVector to ZeroVector
+ (fieldType.getType().getTypeID() != ArrowTypeID.Null)) {
+ callBack.doWork();
+ }
+ }
+
+ if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) {
+ final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]",
+ fieldType.getType().getTypeID(), vector.getField().getType().getTypeID());
+ throw new SchemaChangeRuntimeException(msg);
+ }
+
reader = new UnionMapReader(this);
- return result;
+
+ return new AddOrGetResult<>((T) vector, created);
}
/**