You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by em...@apache.org on 2019/06/14 05:52:53 UTC

[arrow] branch master updated: ARROW-1278: [Integration] Adding integration tests for fixed_size_list

This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new d20963d  ARROW-1278: [Integration] Adding integration tests for fixed_size_list
d20963d is described below

commit d20963def802bb18a7ee3d4f2609b192d4a49760
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Thu Jun 13 22:52:24 2019 -0700

    ARROW-1278: [Integration] Adding integration tests for fixed_size_list
    
    Adds integration tests for fixed_size_list
    Also adds support for fixed_size_list to RecordBatchSerializer, which was omitted in #4278
    
    Author: Benjamin Kietzman <be...@gmail.com>
    
    Closes #4309 from bkietz/1278-integration-tests-for-fixed-size-list and squashes the following commits:
    
    8b356f34c <Benjamin Kietzman> revert removal of ninja-build from dockerfile
    e7ed00143 <Benjamin Kietzman> fix flake8 error
    8ab4efcfb <Benjamin Kietzman> Adding integration tests for fixed_size_list
---
 cpp/src/arrow/ipc/writer.cc                        | 10 +++++
 integration/integration_test.py                    | 43 ++++++++++++++++++++++
 .../apache/arrow/vector/ipc/ArrowFileReader.java   |  2 +-
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 8917410..37927a4 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -346,6 +346,16 @@ class RecordBatchSerializer : public ArrayVisitor {
 
   Status Visit(const MapArray& array) override { return VisitList(array); }
 
+  Status Visit(const FixedSizeListArray& array) override {
+    --max_recursion_depth_;
+    auto size = array.list_type()->list_size();
+    auto values = array.values()->Slice(array.offset() * size, array.length() * size);
+
+    RETURN_NOT_OK(VisitArray(*values));
+    ++max_recursion_depth_;
+    return Status::OK();
+  }
+
   Status Visit(const StructArray& array) override {
     --max_recursion_depth_;
     for (int i = 0; i < array.num_fields(); ++i) {
diff --git a/integration/integration_test.py b/integration/integration_test.py
index 9c6317b..7b8e562 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -765,6 +765,47 @@ class MapColumn(Column):
         return [self.pairs.get_json()]
 
 
+class FixedSizeListType(DataType):
+
+    def __init__(self, name, value_type, list_size, nullable=True):
+        super(FixedSizeListType, self).__init__(name, nullable=nullable)
+        self.value_type = value_type
+        self.list_size = list_size
+
+    def _get_type(self):
+        return OrderedDict([
+            ('name', 'fixedsizelist'),
+            ('listSize', self.list_size)
+        ])
+
+    def _get_children(self):
+        return [self.value_type.get_json()]
+
+    def generate_column(self, size, name=None):
+        is_valid = self._make_is_valid(size)
+        values = self.value_type.generate_column(size * self.list_size)
+
+        if name is None:
+            name = self.name
+        return FixedSizeListColumn(name, size, is_valid, values)
+
+
+class FixedSizeListColumn(Column):
+
+    def __init__(self, name, count, is_valid, values):
+        super(FixedSizeListColumn, self).__init__(name, count)
+        self.is_valid = is_valid
+        self.values = values
+
+    def _get_buffers(self):
+        return [
+            ('VALIDITY', [int(v) for v in self.is_valid])
+        ]
+
+    def _get_children(self):
+        return [self.values.get_json()]
+
+
 class StructType(DataType):
 
     def __init__(self, name, field_types, nullable=True):
@@ -1032,6 +1073,8 @@ def generate_map_case():
 def generate_nested_case():
     fields = [
         ListType('list_nullable', get_field('item', 'int32')),
+        FixedSizeListType('fixedsizelist_nullable',
+                          get_field('item', 'int32'), 4),
         StructType('struct_nullable', [get_field('f1', 'int32'),
                                        get_field('f2', 'utf8')]),
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java
index aa25abd..e9ffaef 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java
@@ -142,7 +142,7 @@ public class ArrowFileReader extends ArrowReader {
     ensureInitialized();
     int blockIndex = footer.getRecordBatches().indexOf(block);
     if (blockIndex == -1) {
-      throw new IllegalArgumentException("Arrow bock does not exist in record batches: " + block);
+      throw new IllegalArgumentException("Arrow block does not exist in record batches: " + block);
     }
     currentRecordBatch = blockIndex;
     return loadNextBatch();