You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/05/17 22:13:19 UTC
[arrow] branch master updated: ARROW-2412: [Integration] Add nested
dictionary test case, skipped for now
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 2735683 ARROW-2412: [Integration] Add nested dictionary test case, skipped for now
2735683 is described below
commit 2735683be0235f9d9ba9b318acad9b35436faa34
Author: Brian Hulette <hu...@gmail.com>
AuthorDate: Fri May 17 17:13:10 2019 -0500
ARROW-2412: [Integration] Add nested dictionary test case, skipped for now
Adds a test case that contains a dictionary-encoded struct and list, both with dictionary-encoded children. As expected Java, C++ and JS all complain about the generated JSON.
Author: Brian Hulette <hu...@gmail.com>
Author: Brian Hulette <br...@ccri.com>
Closes #1848 from TheNeuralBit/nested-dictionary-integration and squashes the following commits:
eac016bbe <Brian Hulette> Remove checks from IPC readers preventing nested dictionaries
0118362ce <Brian Hulette> skip nested dictionary test
5e5276334 <Brian Hulette> Add nested dictionary test case to the integration test
---
integration/integration_test.py | 31 +++++++++++++++++++++++++++++++
js/src/ipc/metadata/json.ts | 4 ++--
js/src/ipc/metadata/message.ts | 4 ++--
3 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/integration/integration_test.py b/integration/integration_test.py
index 9aafb6c..b2b1b5e 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -994,6 +994,35 @@ def generate_dictionary_case():
dictionaries=[dict0, dict1, dict2])
+def generate_nested_dictionary_case():
+ str_type = StringType('str')
+ dict0 = Dictionary(0, str_type, str_type.generate_column(10, name='DICT0'))
+
+ list_type = ListType(
+ 'list',
+ DictionaryType('str_dict', get_field('', 'int8'), dict0))
+ dict1 = Dictionary(1,
+ list_type,
+ list_type.generate_column(30, name='DICT1'))
+
+ struct_type = StructType('struct', [
+ DictionaryType('str_dict_a', get_field('', 'int8'), dict0),
+ DictionaryType('str_dict_b', get_field('', 'int8'), dict0)
+ ])
+ dict2 = Dictionary(2,
+ struct_type,
+ struct_type.generate_column(30, name='DICT2'))
+
+ fields = [
+ DictionaryType('list_dict', get_field('', 'int8'), dict1),
+ DictionaryType('struct_dict', get_field('', 'int8'), dict2)
+ ]
+
+ batch_sizes = [10, 13]
+ return _generate_file("nested_dictionary", fields, batch_sizes,
+ dictionaries=[dict0, dict1, dict2])
+
+
def get_generated_json_files(tempdir=None, flight=False):
tempdir = tempdir or tempfile.mkdtemp()
@@ -1008,6 +1037,8 @@ def get_generated_json_files(tempdir=None, flight=False):
generate_interval_case(),
generate_nested_case(),
generate_dictionary_case().skip_category(SKIP_FLIGHT),
+ generate_nested_dictionary_case().skip_category(SKIP_ARROW)
+ .skip_category(SKIP_FLIGHT),
]
if flight:
diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts
index fa219b3..0e9ca19 100644
--- a/js/src/ipc/metadata/json.ts
+++ b/js/src/ipc/metadata/json.ts
@@ -103,7 +103,7 @@ export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>,
let dictType: Dictionary;
let dictField: Field<Dictionary>;
- // If no dictionary encoding, or in the process of decoding the children of a dictionary-encoded field
+ // If no dictionary encoding
if (!dictionaries || !dictionaryFields || !(dictMeta = _field['dictionary'])) {
type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries, dictionaryFields));
field = new Field(_field['name'], type, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
@@ -115,7 +115,7 @@ export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>,
else if (!dictionaries.has(id = dictMeta['id'])) {
// a dictionary index defaults to signed 32 bit int if unspecified
keys = (keys = dictMeta['indexType']) ? indexTypeFromJSON(keys) as TKeys : new Int32();
- dictionaries.set(id, type = typeFromJSON(_field, fieldChildrenFromJSON(_field)));
+ dictionaries.set(id, type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries, dictionaryFields)));
dictType = new Dictionary(type, keys, id, dictMeta['isOrdered']);
dictField = new Field(_field['name'], dictType, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
dictionaryFields.set(id, [field = dictField]);
diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts
index d1ab219..4298c28 100644
--- a/js/src/ipc/metadata/message.ts
+++ b/js/src/ipc/metadata/message.ts
@@ -351,7 +351,7 @@ function decodeField(f: _Field, dictionaries?: Map<number, DataType>, dictionary
let dictMeta: _DictionaryEncoding | null;
let dictField: Field<Dictionary>;
- // If no dictionary encoding, or in the process of decoding the children of a dictionary-encoded field
+ // If no dictionary encoding
if (!dictionaries || !dictionaryFields || !(dictMeta = f.dictionary())) {
type = decodeFieldType(f, decodeFieldChildren(f, dictionaries, dictionaryFields));
field = new Field(f.name()!, type, f.nullable(), decodeCustomMetadata(f));
@@ -363,7 +363,7 @@ function decodeField(f: _Field, dictionaries?: Map<number, DataType>, dictionary
else if (!dictionaries.has(id = dictMeta.id().low)) {
// a dictionary index defaults to signed 32 bit int if unspecified
keys = (keys = dictMeta.indexType()) ? decodeIndexType(keys) as TKeys : new Int32();
- dictionaries.set(id, type = decodeFieldType(f, decodeFieldChildren(f)));
+ dictionaries.set(id, type = decodeFieldType(f, decodeFieldChildren(f, dictionaries, dictionaryFields)));
dictType = new Dictionary(type, keys, id, dictMeta.isOrdered());
dictField = new Field(f.name()!, dictType, f.nullable(), decodeCustomMetadata(f));
dictionaryFields.set(id, [field = dictField]);