You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/12/09 17:04:31 UTC

arrow git commit: ARROW-400: set struct length on load

Repository: arrow
Updated Branches:
  refs/heads/master d06c49144 -> 14ed1be2d


ARROW-400: set struct length on load

Adds unit test, closes #233

Author: Julien Le Dem <ju...@dremio.com>
Author: Wes McKinney <we...@twosigma.com>

Closes #234 from wesm/ARROW-400 and squashes the following commits:

f516ba1 [Wes McKinney] Add unit test for ARROW-400
741ff71 [Julien Le Dem] ARROW-400: set struct length on json load


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/14ed1be2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/14ed1be2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/14ed1be2

Branch: refs/heads/master
Commit: 14ed1be2d89fedc31f4015456cda28216f926dcc
Parents: d06c491
Author: Julien Le Dem <ju...@dremio.com>
Authored: Fri Dec 9 12:04:25 2016 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Dec 9 12:04:25 2016 -0500

----------------------------------------------------------------------
 integration/data/struct_example.json            | 237 +++++++++++++++++++
 .../apache/arrow/vector/complex/MapVector.java  |   2 +-
 .../arrow/vector/file/json/JsonFileReader.java  |   4 +
 .../arrow/vector/file/json/TestJSONFile.java    |  20 ++
 4 files changed, 262 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/14ed1be2/integration/data/struct_example.json
----------------------------------------------------------------------
diff --git a/integration/data/struct_example.json b/integration/data/struct_example.json
new file mode 100644
index 0000000..3ea062d
--- /dev/null
+++ b/integration/data/struct_example.json
@@ -0,0 +1,237 @@
+{
+  "schema": {
+    "fields": [
+      {
+        "name": "struct_nullable",
+        "type": {
+          "name": "struct"
+        },
+        "nullable": true,
+        "children": [
+          {
+            "name": "f1",
+            "type": {
+              "name": "int",
+              "isSigned": true,
+              "bitWidth": 32
+            },
+            "nullable": true,
+            "children": [],
+            "typeLayout": {
+              "vectors": [
+                {
+                  "type": "VALIDITY",
+                  "typeBitWidth": 1
+                },
+                {
+                  "type": "DATA",
+                  "typeBitWidth": 32
+                }
+              ]
+            }
+          },
+          {
+            "name": "f2",
+            "type": {
+              "name": "utf8"
+            },
+            "nullable": true,
+            "children": [],
+            "typeLayout": {
+              "vectors": [
+                {
+                  "type": "VALIDITY",
+                  "typeBitWidth": 1
+                },
+                {
+                  "type": "OFFSET",
+                  "typeBitWidth": 32
+                },
+                {
+                  "type": "DATA",
+                  "typeBitWidth": 8
+                }
+              ]
+            }
+          }
+        ],
+        "typeLayout": {
+          "vectors": [
+            {
+              "type": "VALIDITY",
+              "typeBitWidth": 1
+            }
+          ]
+        }
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 7,
+      "columns": [
+        {
+          "name": "struct_nullable",
+          "count": 7,
+          "VALIDITY": [
+            0,
+            1,
+            1,
+            1,
+            0,
+            1,
+            0
+          ],
+          "children": [
+            {
+              "name": "f1",
+              "count": 7,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                1,
+                0,
+                0
+              ],
+              "DATA": [
+                1402032511,
+                290876774,
+                137773603,
+                410361374,
+                1959836418,
+                1995074679,
+                -163525262
+              ]
+            },
+            {
+              "name": "f2",
+              "count": 7,
+              "VALIDITY": [
+                0,
+                1,
+                1,
+                1,
+                0,
+                1,
+                0
+              ],
+              "OFFSET": [
+                0,
+                0,
+                7,
+                14,
+                21,
+                21,
+                28,
+                28
+              ],
+              "DATA": [
+                "",
+                "MhRNxD4",
+                "3F9HBxK",
+                "aVd88fp",
+                "",
+                "3loZrRf",
+                ""
+              ]
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "count": 10,
+      "columns": [
+        {
+          "name": "struct_nullable",
+          "count": 10,
+          "VALIDITY": [
+            0,
+            1,
+            1,
+            0,
+            1,
+            0,
+            0,
+            1,
+            1,
+            1
+          ],
+          "children": [
+            {
+              "name": "f1",
+              "count": 10,
+              "VALIDITY": [
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0
+              ],
+              "DATA": [
+                -2041500147,
+                1715692943,
+                -35444996,
+                1425496657,
+                112765084,
+                1760754983,
+                413888857,
+                2039738337,
+                -1924327700,
+                670528518
+              ]
+            },
+            {
+              "name": "f2",
+              "count": 10,
+              "VALIDITY": [
+                1,
+                0,
+                0,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                0
+              ],
+              "OFFSET": [
+                0,
+                7,
+                7,
+                7,
+                14,
+                21,
+                28,
+                35,
+                42,
+                49,
+                49
+              ],
+              "DATA": [
+                "AS5oARE",
+                "",
+                "",
+                "JGdagcX",
+                "78SLiRw",
+                "vbGf7OY",
+                "5uh5fTs",
+                "0ilsf82",
+                "LjS9MbU",
+                ""
+              ]
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/14ed1be2/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
index c2f216b..31a1bb7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
@@ -50,7 +50,7 @@ public class MapVector extends AbstractMapVector {
   private final SingleMapReaderImpl reader = new SingleMapReaderImpl(this);
   private final Accessor accessor = new Accessor();
   private final Mutator mutator = new Mutator();
-  int valueCount;
+  public int valueCount;
 
   public MapVector(String name, BufferAllocator allocator, CallBack callBack) {
     super(name, allocator, callBack);

http://git-wip-us.apache.org/repos/asf/arrow/blob/14ed1be2/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
index 26dd3f6..152867c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
@@ -47,6 +47,7 @@ import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.ValueVector.Mutator;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.NullableMapVector;
 import org.apache.arrow.vector.schema.ArrowVectorType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -153,6 +154,9 @@ public class JsonFileReader implements AutoCloseable {
         }
         readToken(END_ARRAY);
       }
+      if (vector instanceof NullableMapVector) {
+        ((NullableMapVector)vector).valueCount = count;
+      }
     }
     readToken(END_OBJECT);
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/14ed1be2/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
index 7d25003..3720a13 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
@@ -21,11 +21,13 @@ import java.io.File;
 import java.io.IOException;
 
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.NullableMapVector;
 import org.apache.arrow.vector.file.BaseFileTest;
 import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -117,4 +119,22 @@ public class TestJSONFile extends BaseFileTest {
     }
   }
 
+  @Test
+  public void testSetStructLength() throws IOException {
+    File file = new File("../../integration/data/struct_example.json");
+    try (
+            BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+    ) {
+      JsonFileReader reader = new JsonFileReader(file, readerAllocator);
+      Schema schema = reader.start();
+      LOGGER.debug("reading schema: " + schema);
+
+      // initialize vectors
+      try (VectorSchemaRoot root = reader.read();) {
+        FieldVector vector = root.getVector("struct_nullable");
+        Assert.assertEquals(7, vector.getAccessor().getValueCount());
+      }
+    }
+  }
+
 }