You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/05/17 22:13:19 UTC

[arrow] branch master updated: ARROW-2412: [Integration] Add nested dictionary test case, skipped for now

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2735683  ARROW-2412: [Integration] Add nested dictionary test case, skipped for now
2735683 is described below

commit 2735683be0235f9d9ba9b318acad9b35436faa34
Author: Brian Hulette <hu...@gmail.com>
AuthorDate: Fri May 17 17:13:10 2019 -0500

    ARROW-2412: [Integration] Add nested dictionary test case, skipped for now
    
    Adds a test case that contains a dictionary-encoded struct and list, both with dictionary-encoded children. As expected Java, C++ and JS all complain about the generated JSON.
    
    Author: Brian Hulette <hu...@gmail.com>
    Author: Brian Hulette <br...@ccri.com>
    
    Closes #1848 from TheNeuralBit/nested-dictionary-integration and squashes the following commits:
    
    eac016bbe <Brian Hulette> Remove checks from IPC readers preventing nested dictionaries
    0118362ce <Brian Hulette> skip nested dictionary test
    5e5276334 <Brian Hulette> Add nested dictionary test case to the integration test
---
 integration/integration_test.py | 31 +++++++++++++++++++++++++++++++
 js/src/ipc/metadata/json.ts     |  4 ++--
 js/src/ipc/metadata/message.ts  |  4 ++--
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/integration/integration_test.py b/integration/integration_test.py
index 9aafb6c..b2b1b5e 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -994,6 +994,35 @@ def generate_dictionary_case():
                           dictionaries=[dict0, dict1, dict2])
 
 
+def generate_nested_dictionary_case():
+    str_type = StringType('str')
+    dict0 = Dictionary(0, str_type, str_type.generate_column(10, name='DICT0'))
+
+    list_type = ListType(
+        'list',
+        DictionaryType('str_dict', get_field('', 'int8'), dict0))
+    dict1 = Dictionary(1,
+                       list_type,
+                       list_type.generate_column(30, name='DICT1'))
+
+    struct_type = StructType('struct', [
+            DictionaryType('str_dict_a', get_field('', 'int8'), dict0),
+            DictionaryType('str_dict_b', get_field('', 'int8'), dict0)
+        ])
+    dict2 = Dictionary(2,
+                       struct_type,
+                       struct_type.generate_column(30, name='DICT2'))
+
+    fields = [
+        DictionaryType('list_dict', get_field('', 'int8'), dict1),
+        DictionaryType('struct_dict', get_field('', 'int8'), dict2)
+    ]
+
+    batch_sizes = [10, 13]
+    return _generate_file("nested_dictionary", fields, batch_sizes,
+                          dictionaries=[dict0, dict1, dict2])
+
+
 def get_generated_json_files(tempdir=None, flight=False):
     tempdir = tempdir or tempfile.mkdtemp()
 
@@ -1008,6 +1037,8 @@ def get_generated_json_files(tempdir=None, flight=False):
         generate_interval_case(),
         generate_nested_case(),
         generate_dictionary_case().skip_category(SKIP_FLIGHT),
+        generate_nested_dictionary_case().skip_category(SKIP_ARROW)
+                                         .skip_category(SKIP_FLIGHT),
     ]
 
     if flight:
diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts
index fa219b3..0e9ca19 100644
--- a/js/src/ipc/metadata/json.ts
+++ b/js/src/ipc/metadata/json.ts
@@ -103,7 +103,7 @@ export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>,
     let dictType: Dictionary;
     let dictField: Field<Dictionary>;
 
-    // If no dictionary encoding, or in the process of decoding the children of a dictionary-encoded field
+    // If no dictionary encoding
     if (!dictionaries || !dictionaryFields || !(dictMeta = _field['dictionary'])) {
         type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries, dictionaryFields));
         field = new Field(_field['name'], type, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
@@ -115,7 +115,7 @@ export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>,
     else if (!dictionaries.has(id = dictMeta['id'])) {
         // a dictionary index defaults to signed 32 bit int if unspecified
         keys = (keys = dictMeta['indexType']) ? indexTypeFromJSON(keys) as TKeys : new Int32();
-        dictionaries.set(id, type = typeFromJSON(_field, fieldChildrenFromJSON(_field)));
+        dictionaries.set(id, type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries, dictionaryFields)));
         dictType = new Dictionary(type, keys, id, dictMeta['isOrdered']);
         dictField = new Field(_field['name'], dictType, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
         dictionaryFields.set(id, [field = dictField]);
diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts
index d1ab219..4298c28 100644
--- a/js/src/ipc/metadata/message.ts
+++ b/js/src/ipc/metadata/message.ts
@@ -351,7 +351,7 @@ function decodeField(f: _Field, dictionaries?: Map<number, DataType>, dictionary
     let dictMeta: _DictionaryEncoding | null;
     let dictField: Field<Dictionary>;
 
-    // If no dictionary encoding, or in the process of decoding the children of a dictionary-encoded field
+    // If no dictionary encoding
     if (!dictionaries || !dictionaryFields || !(dictMeta = f.dictionary())) {
         type = decodeFieldType(f, decodeFieldChildren(f, dictionaries, dictionaryFields));
         field = new Field(f.name()!, type, f.nullable(), decodeCustomMetadata(f));
@@ -363,7 +363,7 @@ function decodeField(f: _Field, dictionaries?: Map<number, DataType>, dictionary
     else if (!dictionaries.has(id = dictMeta.id().low)) {
         // a dictionary index defaults to signed 32 bit int if unspecified
         keys = (keys = dictMeta.indexType()) ? decodeIndexType(keys) as TKeys : new Int32();
-        dictionaries.set(id, type = decodeFieldType(f, decodeFieldChildren(f)));
+        dictionaries.set(id, type = decodeFieldType(f, decodeFieldChildren(f, dictionaries, dictionaryFields)));
         dictType = new Dictionary(type, keys, id, dictMeta.isOrdered());
         dictField = new Field(f.name()!, dictType, f.nullable(), decodeCustomMetadata(f));
         dictionaryFields.set(id, [field = dictField]);