You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ju...@apache.org on 2016/11/09 16:55:56 UTC
arrow git commit: ARROW-372: json vector serialization format
Repository: arrow
Updated Branches:
refs/heads/master 6996c17f7 -> 4fa7ac4f6
ARROW-372: json vector serialization format
This format serializes the vectors in JSON.
It is not a generic JSON to arrow converter but rather a human readable version of the vectors to help with tests.
Author: Julien Le Dem <ju...@dremio.com>
Closes #201 from julienledem/json_file and squashes the following commits:
2e63bec [Julien Le Dem] add missing license
5588729 [Julien Le Dem] refactor tests, improve format
5ef5356 [Julien Le Dem] improve format to allow empty column name
746430c [Julien Le Dem] ARROW-372: Create JSON arrow file format for integration tests
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4fa7ac4f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4fa7ac4f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4fa7ac4f
Branch: refs/heads/master
Commit: 4fa7ac4f6ca30c34a73fb84d9d56d54aed96491b
Parents: 6996c17
Author: Julien Le Dem <ju...@dremio.com>
Authored: Wed Nov 9 08:55:51 2016 -0800
Committer: Julien Le Dem <ju...@dremio.com>
Committed: Wed Nov 9 08:55:51 2016 -0800
----------------------------------------------------------------------
.../arrow/vector/file/json/JsonFileReader.java | 223 +++++++++++++++++++
.../arrow/vector/file/json/JsonFileWriter.java | 167 ++++++++++++++
.../apache/arrow/vector/file/BaseFileTest.java | 220 ++++++++++++++++++
.../apache/arrow/vector/file/TestArrowFile.java | 200 +----------------
.../arrow/vector/file/json/TestJSONFile.java | 120 ++++++++++
5 files changed, 741 insertions(+), 189 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/4fa7ac4f/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
new file mode 100644
index 0000000..859a3a0
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
@@ -0,0 +1,223 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.arrow.vector.file.json;
+
+import static com.fasterxml.jackson.core.JsonToken.END_ARRAY;
+import static com.fasterxml.jackson.core.JsonToken.END_OBJECT;
+import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
+import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ValueVector.Mutator;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.schema.ArrowVectorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.databind.MappingJsonFactory;
+import com.google.common.base.Objects;
+
+public class JsonFileReader {
+ private final File inputFile;
+ private final JsonParser parser;
+ private final BufferAllocator allocator;
+ private Schema schema;
+
+ public JsonFileReader(File inputFile, BufferAllocator allocator) throws JsonParseException, IOException {
+ super();
+ this.inputFile = inputFile;
+ this.allocator = allocator;
+ MappingJsonFactory jsonFactory = new MappingJsonFactory();
+ this.parser = jsonFactory.createParser(inputFile);
+ }
+
+ public Schema start() throws JsonParseException, IOException {
+ readToken(START_OBJECT);
+ {
+ this.schema = readNextField("schema", Schema.class);
+ nextFieldIs("batches");
+ readToken(START_ARRAY);
+ return schema;
+ }
+ }
+
+ public VectorSchemaRoot read() throws IOException {
+ VectorSchemaRoot recordBatch = new VectorSchemaRoot(schema, allocator);
+ readToken(START_OBJECT);
+ {
+ int count = readNextField("count", Integer.class);
+ recordBatch.setRowCount(count);
+ nextFieldIs("columns");
+ readToken(START_ARRAY);
+ {
+ for (Field field : schema.getFields()) {
+ FieldVector vector = recordBatch.getVector(field.getName());
+ readVector(field, vector);
+ }
+ }
+ readToken(END_ARRAY);
+ }
+ readToken(END_OBJECT);
+ return recordBatch;
+ }
+
+ private void readVector(Field field, FieldVector vector) throws JsonParseException, IOException {
+ List<ArrowVectorType> vectorTypes = field.getTypeLayout().getVectorTypes();
+ List<BufferBacked> fieldInnerVectors = vector.getFieldInnerVectors();
+ if (vectorTypes.size() != fieldInnerVectors.size()) {
+ throw new IllegalArgumentException("vector types and inner vectors are not the same size: " + vectorTypes.size() + " != " + fieldInnerVectors.size());
+ }
+ readToken(START_OBJECT);
+ {
+ String name = readNextField("name", String.class);
+ if (!Objects.equal(field.getName(), name)) {
+ throw new IllegalArgumentException("Expected field " + field.getName() + " but got " + name);
+ }
+ int count = readNextField("count", Integer.class);
+ for (int v = 0; v < vectorTypes.size(); v++) {
+ ArrowVectorType vectorType = vectorTypes.get(v);
+ BufferBacked innerVector = fieldInnerVectors.get(v);
+ nextFieldIs(vectorType.getName());
+ readToken(START_ARRAY);
+ ValueVector valueVector = (ValueVector)innerVector;
+ valueVector.allocateNew();
+ Mutator mutator = valueVector.getMutator();
+ mutator.setValueCount(count);
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ setValueFromParser(valueVector, i);
+ }
+ readToken(END_ARRAY);
+ }
+ // if children
+ List<Field> fields = field.getChildren();
+ if (!fields.isEmpty()) {
+ List<FieldVector> vectorChildren = vector.getChildrenFromFields();
+ if (fields.size() != vectorChildren.size()) {
+ throw new IllegalArgumentException("fields and children are not the same size: " + fields.size() + " != " + vectorChildren.size());
+ }
+ nextFieldIs("children");
+ readToken(START_ARRAY);
+ for (int i = 0; i < fields.size(); i++) {
+ Field childField = fields.get(i);
+ FieldVector childVector = vectorChildren.get(i);
+ readVector(childField, childVector);
+ }
+ readToken(END_ARRAY);
+ }
+ }
+ readToken(END_OBJECT);
+ }
+
+ private void setValueFromParser(ValueVector valueVector, int i) throws IOException {
+ switch (valueVector.getMinorType()) {
+ case BIT:
+ ((BitVector)valueVector).getMutator().set(i, parser.readValueAs(Boolean.class) ? 1 : 0);
+ break;
+ case TINYINT:
+ ((TinyIntVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+ break;
+ case SMALLINT:
+ ((SmallIntVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+ break;
+ case INT:
+ ((IntVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+ break;
+ case BIGINT:
+ ((BigIntVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class));
+ break;
+ case UINT1:
+ ((UInt1Vector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+ break;
+ case UINT2:
+ ((UInt2Vector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+ break;
+ case UINT4:
+ ((UInt4Vector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+ break;
+ case UINT8:
+ ((UInt8Vector)valueVector).getMutator().set(i, parser.readValueAs(Long.class));
+ break;
+ case FLOAT4:
+ ((Float4Vector)valueVector).getMutator().set(i, parser.readValueAs(Float.class));
+ break;
+ case FLOAT8:
+ ((Float8Vector)valueVector).getMutator().set(i, parser.readValueAs(Double.class));
+ break;
+ case VARCHAR:
+ ((VarCharVector)valueVector).getMutator().setSafe(i, parser.readValueAs(String.class).getBytes(UTF_8));
+ break;
+ case TIMESTAMP:
+ ((TimeStampVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class));
+ break;
+ default:
+ throw new UnsupportedOperationException("minor type: " + valueVector.getMinorType());
+ }
+ }
+
+ public void close() throws IOException {
+ readToken(END_ARRAY);
+ readToken(END_OBJECT);
+ parser.close();
+ }
+
+ private <T> T readNextField(String expectedFieldName, Class<T> c) throws IOException, JsonParseException {
+ nextFieldIs(expectedFieldName);
+ parser.nextToken();
+ return parser.readValueAs(c);
+ }
+
+ private void nextFieldIs(String expectedFieldName) throws IOException, JsonParseException {
+ String name = parser.nextFieldName();
+ if (name == null || !name.equals(expectedFieldName)) {
+ throw new IllegalStateException("Expected " + expectedFieldName + " but got " + name);
+ }
+ }
+
+ private void readToken(JsonToken expected) throws JsonParseException, IOException {
+ JsonToken t = parser.nextToken();
+ if (t != expected) {
+ throw new IllegalStateException("Expected " + expected + " but got " + t);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/arrow/blob/4fa7ac4f/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
new file mode 100644
index 0000000..47c1a7d
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
@@ -0,0 +1,167 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.arrow.vector.file.json;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ValueVector.Accessor;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.schema.ArrowVectorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.fasterxml.jackson.core.JsonEncoding;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter.NopIndenter;
+import com.fasterxml.jackson.databind.MappingJsonFactory;
+
+public class JsonFileWriter {
+
+ public static final class JSONWriteConfig {
+ private final boolean pretty;
+ private JSONWriteConfig(boolean pretty) {
+ this.pretty = pretty;
+ }
+ private JSONWriteConfig() {
+ this.pretty = false;
+ }
+ public JSONWriteConfig pretty(boolean pretty) {
+ return new JSONWriteConfig(pretty);
+ }
+ }
+
+ public static JSONWriteConfig config() {
+ return new JSONWriteConfig();
+ }
+
+ private final JsonGenerator generator;
+ private Schema schema;
+
+ public JsonFileWriter(File outputFile) throws IOException {
+ this(outputFile, config());
+ }
+
+ public JsonFileWriter(File outputFile, JSONWriteConfig config) throws IOException {
+ MappingJsonFactory jsonFactory = new MappingJsonFactory();
+ this.generator = jsonFactory.createGenerator(outputFile, JsonEncoding.UTF8);
+ if (config.pretty) {
+ DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
+ prettyPrinter.indentArraysWith(NopIndenter.instance);
+ this.generator.setPrettyPrinter(prettyPrinter);
+ }
+ }
+
+ public void start(Schema schema) throws IOException {
+ this.schema = schema;
+ generator.writeStartObject();
+ generator.writeObjectField("schema", schema);
+ generator.writeArrayFieldStart("batches");
+ }
+
+ public void write(VectorSchemaRoot recordBatch) throws IOException {
+ if (!recordBatch.getSchema().equals(schema)) {
+ throw new IllegalArgumentException("record batches must have the same schema: " + schema);
+ }
+ generator.writeStartObject();
+ {
+ generator.writeObjectField("count", recordBatch.getRowCount());
+ generator.writeArrayFieldStart("columns");
+ for (Field field : schema.getFields()) {
+ FieldVector vector = recordBatch.getVector(field.getName());
+ writeVector(field, vector);
+ }
+ generator.writeEndArray();
+ }
+ generator.writeEndObject();
+ }
+
+ private void writeVector(Field field, FieldVector vector) throws IOException {
+ List<ArrowVectorType> vectorTypes = field.getTypeLayout().getVectorTypes();
+ List<BufferBacked> fieldInnerVectors = vector.getFieldInnerVectors();
+ if (vectorTypes.size() != fieldInnerVectors.size()) {
+ throw new IllegalArgumentException("vector types and inner vectors are not the same size: " + vectorTypes.size() + " != " + fieldInnerVectors.size());
+ }
+ generator.writeStartObject();
+ {
+ generator.writeObjectField("name", field.getName());
+ int valueCount = vector.getAccessor().getValueCount();
+ generator.writeObjectField("count", valueCount);
+ for (int v = 0; v < vectorTypes.size(); v++) {
+ ArrowVectorType vectorType = vectorTypes.get(v);
+ BufferBacked innerVector = fieldInnerVectors.get(v);
+ generator.writeArrayFieldStart(vectorType.getName());
+ ValueVector valueVector = (ValueVector)innerVector;
+ for (int i = 0; i < valueCount; i++) {
+ writeValueToGenerator(valueVector, i);
+ }
+ generator.writeEndArray();
+ }
+ List<Field> fields = field.getChildren();
+ List<FieldVector> children = vector.getChildrenFromFields();
+ if (fields.size() != children.size()) {
+ throw new IllegalArgumentException("fields and children are not the same size: " + fields.size() + " != " + children.size());
+ }
+ if (fields.size() > 0) {
+ generator.writeArrayFieldStart("children");
+ for (int i = 0; i < fields.size(); i++) {
+ Field childField = fields.get(i);
+ FieldVector childVector = children.get(i);
+ writeVector(childField, childVector);
+ }
+ generator.writeEndArray();
+ }
+ }
+ generator.writeEndObject();
+ }
+
+ private void writeValueToGenerator(ValueVector valueVector, int i) throws IOException {
+ switch (valueVector.getMinorType()) {
+ case TIMESTAMP:
+ generator.writeNumber(((TimeStampVector)valueVector).getAccessor().get(i));
+ break;
+ case BIT:
+ generator.writeNumber(((BitVector)valueVector).getAccessor().get(i));
+ break;
+ default:
+ // TODO: each type
+ Accessor accessor = valueVector.getAccessor();
+ Object value = accessor.getObject(i);
+ if (value instanceof Number || value instanceof Boolean) {
+ generator.writeObject(value);
+ } else {
+ generator.writeObject(value.toString());
+ }
+ break;
+ }
+ }
+
+ public void close() throws IOException {
+ generator.writeEndArray();
+ generator.writeEndObject();
+ generator.close();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/arrow/blob/4fa7ac4f/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java
new file mode 100644
index 0000000..6e577b5
--- /dev/null
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java
@@ -0,0 +1,220 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.file;
+
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector.Accessor;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.NullableMapVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.holders.NullableTimeStampHolder;
+import org.joda.time.DateTimeZone;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import io.netty.buffer.ArrowBuf;
+
+/**
+ * Helps testing the file formats
+ */
+public class BaseFileTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(BaseFileTest.class);
+ protected static final int COUNT = 10;
+ protected BufferAllocator allocator;
+
+ private DateTimeZone defaultTimezone = DateTimeZone.getDefault();
+
+ @Before
+ public void init() {
+ DateTimeZone.setDefault(DateTimeZone.forOffsetHours(2));
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void tearDown() {
+ allocator.close();
+ DateTimeZone.setDefault(defaultTimezone);
+ }
+
+ protected void validateContent(int count, VectorSchemaRoot root) {
+ for (int i = 0; i < count; i++) {
+ Assert.assertEquals(i, root.getVector("int").getAccessor().getObject(i));
+ Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getAccessor().getObject(i));
+ }
+ }
+
+ protected void writeComplexData(int count, MapVector parent) {
+ ArrowBuf varchar = allocator.buffer(3);
+ varchar.readerIndex(0);
+ varchar.setByte(0, 'a');
+ varchar.setByte(1, 'b');
+ varchar.setByte(2, 'c');
+ varchar.writerIndex(3);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ MapWriter rootWriter = writer.rootAsMap();
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ ListWriter listWriter = rootWriter.list("list");
+ MapWriter mapWriter = rootWriter.map("map");
+ for (int i = 0; i < count; i++) {
+ if (i % 5 != 3) {
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ }
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 3; j++) {
+ listWriter.varChar().writeVarChar(0, 3, varchar);
+ }
+ listWriter.endList();
+ mapWriter.setPosition(i);
+ mapWriter.start();
+ mapWriter.timeStamp("timestamp").writeTimeStamp(i);
+ mapWriter.end();
+ }
+ writer.setValueCount(count);
+ varchar.release();
+ }
+
+ public void printVectors(List<FieldVector> vectors) {
+ for (FieldVector vector : vectors) {
+ LOGGER.debug(vector.getField().getName());
+ Accessor accessor = vector.getAccessor();
+ int valueCount = accessor.getValueCount();
+ for (int i = 0; i < valueCount; i++) {
+ LOGGER.debug(String.valueOf(accessor.getObject(i)));
+ }
+ }
+ }
+
+ protected void validateComplexContent(int count, VectorSchemaRoot root) {
+ Assert.assertEquals(count, root.getRowCount());
+ printVectors(root.getFieldVectors());
+ for (int i = 0; i < count; i++) {
+ Object intVal = root.getVector("int").getAccessor().getObject(i);
+ if (i % 5 != 3) {
+ Assert.assertEquals(i, intVal);
+ } else {
+ Assert.assertNull(intVal);
+ }
+ Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getAccessor().getObject(i));
+ Assert.assertEquals(i % 3, ((List<?>)root.getVector("list").getAccessor().getObject(i)).size());
+ NullableTimeStampHolder h = new NullableTimeStampHolder();
+ FieldReader mapReader = root.getVector("map").getReader();
+ mapReader.setPosition(i);
+ mapReader.reader("timestamp").read(h);
+ Assert.assertEquals(i, h.value);
+ }
+ }
+
+ protected void writeData(int count, MapVector parent) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ MapWriter rootWriter = writer.rootAsMap();
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ for (int i = 0; i < count; i++) {
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ }
+ writer.setValueCount(count);
+ }
+
+ public void validateUnionData(int count, VectorSchemaRoot root) {
+ FieldReader unionReader = root.getVector("union").getReader();
+ for (int i = 0; i < count; i++) {
+ unionReader.setPosition(i);
+ switch (i % 4) {
+ case 0:
+ Assert.assertEquals(i, unionReader.readInteger().intValue());
+ break;
+ case 1:
+ Assert.assertEquals(i, unionReader.readLong().longValue());
+ break;
+ case 2:
+ Assert.assertEquals(i % 3, unionReader.size());
+ break;
+ case 3:
+ NullableTimeStampHolder h = new NullableTimeStampHolder();
+ unionReader.reader("timestamp").read(h);
+ Assert.assertEquals(i, h.value);
+ break;
+ }
+ }
+ }
+
+ public void writeUnionData(int count, NullableMapVector parent) {
+ ArrowBuf varchar = allocator.buffer(3);
+ varchar.readerIndex(0);
+ varchar.setByte(0, 'a');
+ varchar.setByte(1, 'b');
+ varchar.setByte(2, 'c');
+ varchar.writerIndex(3);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ MapWriter rootWriter = writer.rootAsMap();
+ IntWriter intWriter = rootWriter.integer("union");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("union");
+ ListWriter listWriter = rootWriter.list("union");
+ MapWriter mapWriter = rootWriter.map("union");
+ for (int i = 0; i < count; i++) {
+ switch (i % 4) {
+ case 0:
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ break;
+ case 1:
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ break;
+ case 2:
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 3; j++) {
+ listWriter.varChar().writeVarChar(0, 3, varchar);
+ }
+ listWriter.endList();
+ break;
+ case 3:
+ mapWriter.setPosition(i);
+ mapWriter.start();
+ mapWriter.timeStamp("timestamp").writeTimeStamp(i);
+ mapWriter.end();
+ break;
+ }
+ }
+ writer.setValueCount(count);
+ varchar.release();
+ }
+}
http://git-wip-us.apache.org/repos/asf/arrow/blob/4fa7ac4f/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
index e97bc14..c9e60ee 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
@@ -27,53 +27,22 @@ import java.io.IOException;
import java.util.List;
import org.apache.arrow.memory.BufferAllocator;
-import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.FieldVector;
-import org.apache.arrow.vector.ValueVector.Accessor;
import org.apache.arrow.vector.VectorLoader;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.VectorUnloader;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.NullableMapVector;
-import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
-import org.apache.arrow.vector.complex.reader.FieldReader;
-import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
-import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
-import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
-import org.apache.arrow.vector.complex.writer.BigIntWriter;
-import org.apache.arrow.vector.complex.writer.IntWriter;
-import org.apache.arrow.vector.holders.NullableTimeStampHolder;
import org.apache.arrow.vector.schema.ArrowBuffer;
import org.apache.arrow.vector.schema.ArrowRecordBatch;
import org.apache.arrow.vector.types.pojo.Schema;
-import org.joda.time.DateTimeZone;
-import org.junit.After;
import org.junit.Assert;
-import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import io.netty.buffer.ArrowBuf;
-
-public class TestArrowFile {
+public class TestArrowFile extends BaseFileTest {
private static final Logger LOGGER = LoggerFactory.getLogger(TestArrowFile.class);
- private static final int COUNT = 10;
- private BufferAllocator allocator;
-
- private DateTimeZone defaultTimezone = DateTimeZone.getDefault();
-
- @Before
- public void init() {
- DateTimeZone.setDefault(DateTimeZone.forOffsetHours(2));
- allocator = new RootAllocator(Integer.MAX_VALUE);
- }
-
- @After
- public void tearDown() {
- allocator.close();
- DateTimeZone.setDefault(defaultTimezone);
- }
@Test
public void testWrite() throws IOException {
@@ -101,54 +70,6 @@ public class TestArrowFile {
}
}
- private void writeComplexData(int count, MapVector parent) {
- ArrowBuf varchar = allocator.buffer(3);
- varchar.readerIndex(0);
- varchar.setByte(0, 'a');
- varchar.setByte(1, 'b');
- varchar.setByte(2, 'c');
- varchar.writerIndex(3);
- ComplexWriter writer = new ComplexWriterImpl("root", parent);
- MapWriter rootWriter = writer.rootAsMap();
- IntWriter intWriter = rootWriter.integer("int");
- BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
- ListWriter listWriter = rootWriter.list("list");
- MapWriter mapWriter = rootWriter.map("map");
- for (int i = 0; i < count; i++) {
- intWriter.setPosition(i);
- intWriter.writeInt(i);
- bigIntWriter.setPosition(i);
- bigIntWriter.writeBigInt(i);
- listWriter.setPosition(i);
- listWriter.startList();
- for (int j = 0; j < i % 3; j++) {
- listWriter.varChar().writeVarChar(0, 3, varchar);
- }
- listWriter.endList();
- mapWriter.setPosition(i);
- mapWriter.start();
- mapWriter.timeStamp("timestamp").writeTimeStamp(i);
- mapWriter.end();
- }
- writer.setValueCount(count);
- varchar.release();
- }
-
-
- private void writeData(int count, MapVector parent) {
- ComplexWriter writer = new ComplexWriterImpl("root", parent);
- MapWriter rootWriter = writer.rootAsMap();
- IntWriter intWriter = rootWriter.integer("int");
- BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
- for (int i = 0; i < count; i++) {
- intWriter.setPosition(i);
- intWriter.writeInt(i);
- bigIntWriter.setPosition(i);
- bigIntWriter.writeBigInt(i);
- }
- writer.setValueCount(count);
- }
-
@Test
public void testWriteRead() throws IOException {
File file = new File("target/mytest.arrow");
@@ -197,13 +118,6 @@ public class TestArrowFile {
}
}
- private void validateContent(int count, VectorSchemaRoot root) {
- for (int i = 0; i < count; i++) {
- Assert.assertEquals(i, root.getVector("int").getAccessor().getObject(i));
- Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getAccessor().getObject(i));
- }
- }
-
@Test
public void testWriteReadComplex() throws IOException {
File file = new File("target/mytest_complex.arrow");
@@ -244,45 +158,6 @@ public class TestArrowFile {
}
}
- public void printVectors(List<FieldVector> vectors) {
- for (FieldVector vector : vectors) {
- LOGGER.debug(vector.getField().getName());
- Accessor accessor = vector.getAccessor();
- int valueCount = accessor.getValueCount();
- for (int i = 0; i < valueCount; i++) {
- LOGGER.debug(String.valueOf(accessor.getObject(i)));
- }
- }
- }
-
- private void validateComplexContent(int count, VectorSchemaRoot root) {
- Assert.assertEquals(count, root.getRowCount());
- printVectors(root.getFieldVectors());
- for (int i = 0; i < count; i++) {
- Assert.assertEquals(i, root.getVector("int").getAccessor().getObject(i));
- Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getAccessor().getObject(i));
- Assert.assertEquals(i % 3, ((List<?>)root.getVector("list").getAccessor().getObject(i)).size());
- NullableTimeStampHolder h = new NullableTimeStampHolder();
- FieldReader mapReader = root.getVector("map").getReader();
- mapReader.setPosition(i);
- mapReader.reader("timestamp").read(h);
- Assert.assertEquals(i, h.value);
- }
- }
-
- private void write(FieldVector parent, File file) throws FileNotFoundException, IOException {
- VectorUnloader vectorUnloader = newVectorUnloader(parent);
- Schema schema = vectorUnloader.getSchema();
- LOGGER.debug("writing schema: " + schema);
- try (
- FileOutputStream fileOutputStream = new FileOutputStream(file);
- ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema);
- ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
- ) {
- arrowWriter.writeRecordBatch(recordBatch);
- }
- }
-
@Test
public void testWriteReadMultipleRBs() throws IOException {
File file = new File("target/mytest_multiple.arrow");
@@ -381,69 +256,16 @@ public class TestArrowFile {
}
}
- public void validateUnionData(int count, VectorSchemaRoot root) {
- FieldReader unionReader = root.getVector("union").getReader();
- for (int i = 0; i < count; i++) {
- unionReader.setPosition(i);
- switch (i % 4) {
- case 0:
- Assert.assertEquals(i, unionReader.readInteger().intValue());
- break;
- case 1:
- Assert.assertEquals(i, unionReader.readLong().longValue());
- break;
- case 2:
- Assert.assertEquals(i % 3, unionReader.size());
- break;
- case 3:
- NullableTimeStampHolder h = new NullableTimeStampHolder();
- unionReader.reader("timestamp").read(h);
- Assert.assertEquals(i, h.value);
- break;
- }
- }
- }
-
- public void writeUnionData(int count, NullableMapVector parent) {
- ArrowBuf varchar = allocator.buffer(3);
- varchar.readerIndex(0);
- varchar.setByte(0, 'a');
- varchar.setByte(1, 'b');
- varchar.setByte(2, 'c');
- varchar.writerIndex(3);
- ComplexWriter writer = new ComplexWriterImpl("root", parent);
- MapWriter rootWriter = writer.rootAsMap();
- IntWriter intWriter = rootWriter.integer("union");
- BigIntWriter bigIntWriter = rootWriter.bigInt("union");
- ListWriter listWriter = rootWriter.list("union");
- MapWriter mapWriter = rootWriter.map("union");
- for (int i = 0; i < count; i++) {
- switch (i % 4) {
- case 0:
- intWriter.setPosition(i);
- intWriter.writeInt(i);
- break;
- case 1:
- bigIntWriter.setPosition(i);
- bigIntWriter.writeBigInt(i);
- break;
- case 2:
- listWriter.setPosition(i);
- listWriter.startList();
- for (int j = 0; j < i % 3; j++) {
- listWriter.varChar().writeVarChar(0, 3, varchar);
- }
- listWriter.endList();
- break;
- case 3:
- mapWriter.setPosition(i);
- mapWriter.start();
- mapWriter.timeStamp("timestamp").writeTimeStamp(i);
- mapWriter.end();
- break;
- }
+ private void write(FieldVector parent, File file) throws FileNotFoundException, IOException {
+ VectorUnloader vectorUnloader = newVectorUnloader(parent);
+ Schema schema = vectorUnloader.getSchema();
+ LOGGER.debug("writing schema: " + schema);
+ try (
+ FileOutputStream fileOutputStream = new FileOutputStream(file);
+ ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema);
+ ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ ) {
+ arrowWriter.writeRecordBatch(recordBatch);
}
- writer.setValueCount(count);
- varchar.release();
}
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/4fa7ac4f/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
new file mode 100644
index 0000000..7d25003
--- /dev/null
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.file.json;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.NullableMapVector;
+import org.apache.arrow.vector.file.BaseFileTest;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestJSONFile extends BaseFileTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(TestJSONFile.class);
+
+ @Test
+ public void testWriteReadComplexJSON() throws IOException {
+ File file = new File("target/mytest_complex.json");
+ int count = COUNT;
+
+ // write
+ try (
+ BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ MapVector parent = new MapVector("parent", originalVectorAllocator, null)) {
+ writeComplexData(count, parent);
+ writeJSON(file, new VectorSchemaRoot(parent.getChild("root")));
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ ) {
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator);
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateComplexContent(count, root);
+ }
+ reader.close();
+ }
+ }
+
+ @Test
+ public void testWriteComplexJSON() throws IOException {
+ File file = new File("target/mytest_write_complex.json");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null)) {
+ writeComplexData(count, parent);
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateComplexContent(root.getRowCount(), root);
+ writeJSON(file, root);
+ }
+ }
+
+ public void writeJSON(File file, VectorSchemaRoot root) throws IOException {
+ JsonFileWriter writer = new JsonFileWriter(file, JsonFileWriter.config().pretty(true));
+ writer.start(root.getSchema());
+ writer.write(root);
+ writer.close();
+ }
+
+
+ @Test
+ public void testWriteReadUnionJSON() throws IOException {
+ File file = new File("target/mytest_write_union.json");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null)) {
+
+ writeUnionData(count, parent);
+
+ printVectors(parent.getChildrenFromFields());
+
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateUnionData(count, root);
+
+ writeJSON(file, root);
+ }
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+ ) {
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator);
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateUnionData(count, root);
+ }
+ }
+ }
+
+}