You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by pr...@apache.org on 2020/04/17 21:56:31 UTC
[drill] branch master updated: DRILL-7703: Support for 3+D arrays
in EVF JSON loader
This is an automated email from the ASF dual-hosted git repository.
progers pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 69d397c DRILL-7703: Support for 3+D arrays in EVF JSON loader
69d397c is described below
commit 69d397c053f4afb89371e7926ce75815ac78bebd
Author: Paul Rogers <pa...@yahoo.com>
AuthorDate: Wed Apr 15 11:02:47 2020 -0700
DRILL-7703: Support for 3+D arrays in EVF JSON loader
Revises the EVF-based JSON loader to support nested
repeated lists.
---
.../store/easy/json/loader/JsonLoaderImpl.java | 17 +-
.../store/easy/json/loader/JsonLoaderOptions.java | 2 +
.../json/loader/RepeatedListValueListener.java | 67 ++--
.../exec/store/easy/json/loader/TupleListener.java | 370 ++++++++++++++-------
.../easy/json/loader/UnknownFieldListener.java | 10 +-
.../easy/json/parser/JsonStructureParser.java | 45 +--
.../exec/store/easy/json/parser/TokenIterator.java | 8 +
.../exec/store/easy/json/loader/TestObjects.java | 3 +
.../store/easy/json/loader/TestRepeatedList.java | 165 +++++++--
.../store/easy/json/loader/TestScalarArrays.java | 3 +
.../exec/store/easy/json/loader/TestScalars.java | 3 +
.../exec/store/easy/json/loader/TestUnknowns.java | 27 ++
.../exec/store/easy/json/loader/TestVariant.java | 3 +
.../drill/exec/record/metadata/MetadataUtils.java | 5 +
14 files changed, 529 insertions(+), 199 deletions(-)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderImpl.java
index ecfaf4b..b434750 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderImpl.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderImpl.java
@@ -24,6 +24,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.drill.common.exceptions.CustomErrorContext;
+import org.apache.drill.common.exceptions.EmptyErrorContext;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.exec.physical.resultSet.ResultSetLoader;
import org.apache.drill.exec.physical.resultSet.RowSetLoader;
@@ -183,6 +184,13 @@ public class JsonLoaderImpl implements JsonLoader, ErrorFactory {
}
public JsonLoader build() {
+ // Defaults, primarily for testing.
+ if (options == null) {
+ options = new JsonLoaderOptions();
+ }
+ if (errorContext == null) {
+ errorContext = EmptyErrorContext.INSTANCE;
+ }
return new JsonLoaderImpl(this);
}
}
@@ -313,6 +321,7 @@ public class JsonLoaderImpl implements JsonLoader, ErrorFactory {
public RuntimeException syntaxError(JsonParseException e) {
throw buildError(
UserException.dataReadError(e)
+ .message("Error parsing JSON - %s", e.getMessage())
.addContext("Syntax error"));
}
@@ -378,14 +387,6 @@ public class JsonLoaderImpl implements JsonLoader, ErrorFactory {
.addContext("JSON type", jsonType.toString()));
}
- public UserException unsupportedArrayException(String key, int dims) {
- return buildError(
- UserException.validationError()
- .message("JSON reader does not arrays deeper than two levels")
- .addContext("Field", key)
- .addContext("Array nesting", dims));
- }
-
@Override
public RuntimeException messageParseError(MessageContextException e) {
return buildError(
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderOptions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderOptions.java
index d982e11..a8b221d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderOptions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderOptions.java
@@ -29,6 +29,7 @@ import org.apache.drill.exec.store.easy.json.parser.JsonStructureOptions;
public class JsonLoaderOptions extends JsonStructureOptions {
public boolean readNumbersAsDouble;
+ public boolean unionEnabled;
/**
* Drill prior to version 1.18 would read a null string
@@ -49,5 +50,6 @@ public class JsonLoaderOptions extends JsonStructureOptions {
public JsonLoaderOptions(OptionSet options) {
super(options);
this.readNumbersAsDouble = options.getBoolean(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+ this.unionEnabled = options.getBoolean(ExecConstants.ENABLE_UNION_TYPE_KEY);
}
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/RepeatedListValueListener.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/RepeatedListValueListener.java
index 22bc68d..ba1647d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/RepeatedListValueListener.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/RepeatedListValueListener.java
@@ -17,6 +17,8 @@
*/
package org.apache.drill.exec.store.easy.json.loader;
+import java.util.function.Function;
+
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.store.easy.json.loader.AbstractArrayListener.ObjectArrayListener;
@@ -48,44 +50,65 @@ public class RepeatedListValueListener extends AbstractValueListener {
private RepeatedListValueListener(JsonLoaderImpl loader, ObjectWriter writer,
ValueListener elementListener) {
+ this(loader, writer,
+ new RepeatedArrayListener(loader, writer.schema(),
+ writer.array(), elementListener));
+ }
+
+ private RepeatedListValueListener(JsonLoaderImpl loader, ObjectWriter writer,
+ RepeatedArrayListener outerArrayListener) {
super(loader);
this.repeatedListWriter = writer;
- this.outerArrayListener = new RepeatedArrayListener(loader, writer.schema(),
- writer.array(), elementListener);
+ this.outerArrayListener = outerArrayListener;
}
/**
* Create a repeated list listener for a scalar value.
*/
- public static ValueListener repeatedListFor(JsonLoaderImpl loader, ObjectWriter writer) {
- ColumnMetadata elementSchema = writer.schema().childSchema();
- return wrapInnerArray(loader, writer,
- new ScalarArrayListener(loader, elementSchema,
- ScalarListener.listenerFor(loader, writer.array().entry())));
+ public static ValueListener multiDimScalarArrayFor(JsonLoaderImpl loader, ObjectWriter writer, int dims) {
+ return buildOuterArrays(loader, writer, dims,
+ innerWriter ->
+ new ScalarArrayListener(loader, innerWriter.schema(),
+ ScalarListener.listenerFor(loader, innerWriter))
+ );
}
/**
* Create a repeated list listener for a Map.
*/
- public static ValueListener repeatedObjectListFor(JsonLoaderImpl loader,
- ObjectWriter writer, TupleMetadata providedSchema) {
- ArrayWriter outerArrayWriter = writer.array();
- ArrayWriter innerArrayWriter = outerArrayWriter.array();
- return wrapInnerArray(loader, writer,
- new ObjectArrayListener(loader, innerArrayWriter,
- new ObjectValueListener(loader, outerArrayWriter.entry().schema(),
- new TupleListener(loader, innerArrayWriter.tuple(), providedSchema))));
+ public static ValueListener multiDimObjectArrayFor(JsonLoaderImpl loader,
+ ObjectWriter writer, int dims, TupleMetadata providedSchema) {
+ return buildOuterArrays(loader, writer, dims,
+ innerWriter ->
+ new ObjectArrayListener(loader, innerWriter.array(),
+ new ObjectValueListener(loader, innerWriter.array().entry().schema(),
+ new TupleListener(loader, innerWriter.array().tuple(), providedSchema))));
}
/**
- * Given the inner array, wrap it to produce the repeated list.
+ * Create layers of repeated list listeners around the type-specific
+ * array. If the JSON has three array levels, the outer two are repeated
+ * lists, the inner is type-specific: say an array of {@code BIGINT} or
+ * a map array.
*/
- private static ValueListener wrapInnerArray(JsonLoaderImpl loader, ObjectWriter writer,
- ArrayListener innerArrayListener) {
- return new RepeatedListValueListener(loader, writer,
- new RepeatedListElementListener(loader,
- writer.schema(), writer.array().array(),
- innerArrayListener));
+ public static ValueListener buildOuterArrays(JsonLoaderImpl loader, ObjectWriter writer, int dims,
+ Function<ObjectWriter, ArrayListener> innerCreator) {
+ ColumnMetadata colSchema = writer.schema();
+ ObjectWriter writers[] = new ObjectWriter[dims];
+ writers[0] = writer;
+ for (int i = 1; i < dims; i++) {
+ writers[i] = writers[i-1].array().entry();
+ }
+ ArrayListener prevArrayListener = innerCreator.apply(writers[dims - 1]);
+ RepeatedArrayListener innerArrayListener = null;
+ for (int i = dims - 2; i >= 0; i--) {
+ innerArrayListener = new RepeatedArrayListener(loader, colSchema,
+ writers[i].array(),
+ new RepeatedListElementListener(loader, colSchema,
+ writers[i+1].array(), prevArrayListener));
+ prevArrayListener = innerArrayListener;
+ }
+ return new RepeatedListValueListener(loader, writer, innerArrayListener);
}
/**
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/TupleListener.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/TupleListener.java
index 53ad5c6..493bea9 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/TupleListener.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/TupleListener.java
@@ -17,11 +17,11 @@
*/
package org.apache.drill.exec.store.easy.json.loader;
+import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.MetadataUtils;
-import org.apache.drill.exec.record.metadata.RepeatedListBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.store.easy.json.loader.AbstractArrayListener.ObjectArrayListener;
import org.apache.drill.exec.store.easy.json.loader.AbstractArrayListener.ScalarArrayListener;
@@ -36,6 +36,7 @@ import org.apache.drill.exec.store.easy.json.parser.ValueListener;
import org.apache.drill.exec.vector.accessor.ArrayWriter;
import org.apache.drill.exec.vector.accessor.ObjectWriter;
import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
/**
* Accepts { name : value ... }
@@ -164,11 +165,11 @@ public class TupleListener implements ObjectListener {
*/
@Override
public ValueListener addField(String key, ValueDef valueDef) {
- ColumnMetadata colSchema = providedColumn(key);
- if (colSchema != null) {
- return listenerFor(colSchema);
+ ColumnMetadata providedCol = providedColumn(key);
+ if (providedCol != null) {
+ return listenerForSchema(providedCol);
} else {
- return listenerFor(key, valueDef);
+ return listenerForValue(key, valueDef);
}
}
@@ -177,101 +178,173 @@ public class TupleListener implements ObjectListener {
}
/**
- * Build a column and its listener based on a provided schema.
+ * Build a column and its listener based a provided schema.
+ * The user is responsible to ensure that the provided schema
+ * accurately reflects the structure of the JSON being parsed.
*/
- private ValueListener listenerFor(ColumnMetadata colSchema) {
- switch (colSchema.structureType()) {
- case PRIMITIVE:
- if (colSchema.isArray()) {
+ private ValueListener listenerForSchema(ColumnMetadata providedCol) {
+ switch (providedCol.structureType()) {
+
+ case PRIMITIVE: {
+ ColumnMetadata colSchema = providedCol.copy();
+ if (providedCol.isArray()) {
return scalarArrayListenerFor(colSchema);
} else {
return scalarListenerFor(colSchema);
}
- case TUPLE:
- if (colSchema.isArray()) {
- return objectArrayListenerFor(colSchema);
+ }
+
+ case TUPLE: {
+ // Propagate the provided map schema into the object
+ // listener as a provided tuple schema.
+ ColumnMetadata colSchema = providedCol.cloneEmpty();
+ TupleMetadata providedSchema = providedCol.tupleSchema();
+ if (providedCol.isArray()) {
+ return objectArrayListenerFor(colSchema, providedSchema);
} else {
- return objectListenerFor(colSchema);
+ return objectListenerFor(colSchema, providedSchema);
}
- case VARIANT:
- if (colSchema.isArray()) {
+ }
+
+ case VARIANT: {
+ // A variant can contain multiple types. The schema does not
+ // declare the types; rather they are discovered by the reader.
+ // That is, there is no VARIANT<INT, DOUBLE>, there is just VARIANT.
+ ColumnMetadata colSchema = providedCol.cloneEmpty();
+ if (providedCol.isArray()) {
return variantArrayListenerFor(colSchema);
} else {
return variantListenerFor(colSchema);
}
+ }
+
case MULTI_ARRAY:
- return repeatedListListenerFor(colSchema);
+ return multiDimArrayListenerForSchema(providedCol);
+
default:
+ throw loader.unsupportedType(providedCol);
}
- throw loader.unsupportedType(colSchema);
}
/**
* Build a column and its listener based on a look-ahead hint.
*/
- protected ValueListener listenerFor(String key, ValueDef valueDef) {
+ protected ValueListener listenerForValue(String key, ValueDef valueDef) {
if (!valueDef.isArray()) {
if (valueDef.type().isUnknown()) {
return unknownListenerFor(key);
} else if (valueDef.type().isObject()) {
- return objectListenerFor(key, null);
+ return objectListenerForValue(key);
} else {
- return scalarListenerFor(key, valueDef.type());
+ return scalarListenerForValue(key, valueDef.type());
}
} else if (valueDef.dimensions() == 1) {
if (valueDef.type().isUnknown()) {
return unknownArrayListenerFor(key, valueDef);
} else if (valueDef.type().isObject()) {
- return objectArrayListenerFor(key, null);
+ return objectArrayListenerForValue(key);
} else {
- return arrayListenerFor(key, valueDef.type());
+ return scalarArrayListenerForValue(key, valueDef.type());
}
- } else if (valueDef.dimensions() == 2) {
+ } else {
if (valueDef.type().isUnknown()) {
return unknownArrayListenerFor(key, valueDef);
} else if (valueDef.type().isObject()) {
- return repeatedListOfObjectsListenerFor(key, null);
+ return multiDimObjectArrayListenerForValue(key, valueDef);
} else {
- return repeatedListListenerFor(key, valueDef);
+ return multiDimScalarArrayListenerForValue(key, valueDef);
}
- } else {
- throw loader.unsupportedArrayException(key, valueDef.dimensions());
}
}
- public ScalarListener scalarListenerFor(String key, JsonType jsonType) {
- ColumnMetadata colSchema = MetadataUtils.newScalar(key,
- Types.optional(scalarTypeFor(key, jsonType)));
- return scalarListenerFor(colSchema);
- }
-
- private ObjectWriter addFieldWriter(ColumnMetadata colSchema) {
- int index = tupleWriter.addColumn(colSchema);
- return tupleWriter.column(index);
+ /**
+ * Create a scalar column and listener given the definition of a JSON
+ * scalar value.
+ */
+ public ScalarListener scalarListenerForValue(String key, JsonType jsonType) {
+ return scalarListenerFor(MetadataUtils.newScalar(key,
+ Types.optional(scalarTypeFor(key, jsonType))));
}
+ /**
+ * Create a scalar column and listener given the column schema.
+ */
public ScalarListener scalarListenerFor(ColumnMetadata colSchema) {
return ScalarListener.listenerFor(loader, addFieldWriter(colSchema));
}
- public ObjectValueListener objectListenerFor(ColumnMetadata providedCol) {
- return objectListenerFor(providedCol.name(), providedCol.tupleSchema());
+ /**
+ * Create a scalar array column and listener given the definition of a JSON
+ * array of scalars.
+ */
+ public ArrayValueListener scalarArrayListenerForValue(String key, JsonType jsonType) {
+ return scalarArrayListenerFor(MetadataUtils.newScalar(key,
+ Types.repeated(scalarTypeFor(key, jsonType))));
+ }
+
+ /**
+ * Create a multi- (2+) dimensional scalar array from a JSON value description.
+ */
+ private ValueListener multiDimScalarArrayListenerForValue(String key, ValueDef valueDef) {
+ return multiDimScalarArrayListenerFor(
+ repeatedListSchemaFor(key, valueDef.dimensions(),
+ MetadataUtils.newScalar(key, scalarTypeFor(key, valueDef.type()), DataMode.REPEATED)),
+ valueDef.dimensions());
+ }
+
+ /**
+ * Create a multi- (2+) dimensional scalar array from a column schema and dimension
+ * count hint.
+ */
+ private ValueListener multiDimScalarArrayListenerFor(ColumnMetadata colSchema, int dims) {
+ return RepeatedListValueListener.multiDimScalarArrayFor(loader,
+ addFieldWriter(colSchema), dims);
+ }
+
+ /**
+ * Create a scalar array column and array listener for the given column
+ * schema.
+ */
+ public ArrayValueListener scalarArrayListenerFor(ColumnMetadata colSchema) {
+ return new ScalarArrayValueListener(loader, colSchema,
+ new ScalarArrayListener(loader, colSchema,
+ scalarListenerFor(colSchema)));
}
- public ObjectValueListener objectListenerFor(String key, TupleMetadata providedSchema) {
+ /**
+ * Create a map column and its associated object value listener for the
+ * a JSON object value given the value's key.
+ */
+ public ObjectValueListener objectListenerForValue(String key) {
ColumnMetadata colSchema = MetadataUtils.newMap(key);
+ return objectListenerFor(colSchema, colSchema.tupleSchema());
+ }
+
+ /**
+ * Create a map column and its associated object value listener for the
+ * given key and optional provided schema.
+ */
+ public ObjectValueListener objectListenerFor(ColumnMetadata colSchema, TupleMetadata providedSchema) {
return new ObjectValueListener(loader, colSchema,
new TupleListener(loader, addFieldWriter(colSchema).tuple(),
providedSchema));
}
- public ArrayValueListener objectArrayListenerFor(ColumnMetadata providedCol) {
- return objectArrayListenerFor(providedCol.name(), providedCol.tupleSchema());
+ /**
+ * Create a map array column and its associated object array listener
+ * for the given key.
+ */
+ public ArrayValueListener objectArrayListenerForValue(String key) {
+ ColumnMetadata colSchema = MetadataUtils.newMapArray(key);
+ return objectArrayListenerFor(colSchema, colSchema.tupleSchema());
}
+ /**
+ * Create a map array column and its associated object array listener
+ * for the given column schema and optional provided schema.
+ */
public ArrayValueListener objectArrayListenerFor(
- String key, TupleMetadata providedSchema) {
- ColumnMetadata colSchema = MetadataUtils.newMapArray(key);
+ ColumnMetadata colSchema, TupleMetadata providedSchema) {
ArrayWriter arrayWriter = addFieldWriter(colSchema).array();
return new ObjectArrayValueListener(loader, colSchema,
new ObjectArrayListener(loader, arrayWriter,
@@ -279,10 +352,125 @@ public class TupleListener implements ObjectListener {
new TupleListener(loader, arrayWriter.tuple(), providedSchema))));
}
- public ArrayValueListener arrayListenerFor(String key, JsonType jsonType) {
- ColumnMetadata colSchema = MetadataUtils.newScalar(key,
- Types.repeated(scalarTypeFor(key, jsonType)));
- return scalarArrayListenerFor(colSchema);
+ /**
+ * Create a RepeatedList which contains (empty) Map objects using the provided
+ * schema. That is, create a multi-dimensional array of maps.
+ * The map fields are created on the fly, optionally using the provided schema.
+ */
+ private ValueListener multiDimObjectArrayListenerForValue(String key, ValueDef valueDef) {
+ return multiDimObjectArrayListenerFor(
+ repeatedListSchemaFor(key, valueDef.dimensions(),
+ MetadataUtils.newMapArray(key)),
+ valueDef.dimensions(), null);
+ }
+
+ /**
+ * Create a multi- (2+) dimensional scalar array from a column schema, dimension
+ * count hint, and optional provided schema.
+ */
+ private ValueListener multiDimObjectArrayListenerFor(ColumnMetadata colSchema,
+ int dims, TupleMetadata providedSchema) {
+ return RepeatedListValueListener.multiDimObjectArrayFor(loader,
+ addFieldWriter(colSchema), dims, providedSchema);
+ }
+
+ /**
+ * Create a variant (UNION) column and its associated listener given
+ * a column schema.
+ */
+ private ValueListener variantListenerFor(ColumnMetadata colSchema) {
+ return new VariantListener(loader, addFieldWriter(colSchema).variant());
+ }
+
+ /**
+ * Create a variant array (LIST) column and its associated listener given
+ * a column schema.
+ */
+ private ValueListener variantArrayListenerFor(ColumnMetadata colSchema) {
+ return new ListListener(loader, addFieldWriter(colSchema));
+ }
+
+ /**
+ * Create a RepeatedList which contains Unions. (Actually, this is an
+ * array of List objects internally.) The variant is variable, it makes no
+ * sense to specify a schema for the variant. Also, omitting the schema
+ * save a large amount of complexity that will likely never be needed.
+ */
+ @SuppressWarnings("unused")
+ private ValueListener repeatedListOfVariantListenerFor(String key, ValueDef valueDef) {
+ return multiDimVariantArrayListenerFor(
+ MetadataUtils.newVariant(key, DataMode.REPEATED),
+ valueDef.dimensions());
+ }
+
+ /**
+ * Create a multi- (2+) dimensional variant array from a column schema and dimension
+ * count hint. This is actually an (n-1) dimensional array of lists, where a LISt
+ * is a repeated UNION.
+ */
+ private ValueListener multiDimVariantArrayListenerFor(ColumnMetadata colSchema, int dims) {
+ return RepeatedListValueListener.repeatedVariantListFor(loader,
+ addFieldWriter(colSchema));
+ }
+
+ /**
+ * Create a repeated list column and its multiple levels of inner structure
+ * from a provided schema. Repeated lists can nest to any number of levels to
+ * provide any number of dimensions. In general, if an array is <i>n</i>-dimensional,
+ * then there are <i>n</i>-1 repeated lists with some array type as the
+ * innermost dimension.
+ */
+ private ValueListener multiDimArrayListenerForSchema(ColumnMetadata providedSchema) {
+ // Parse the stack of repeated lists to count the "outer" dimensions and
+ // to locate the innermost array (the "list" which is "repeated").
+ int dims = 1; // For inner array
+ ColumnMetadata elementSchema = providedSchema;
+ while (MetadataUtils.isRepeatedList(elementSchema)) {
+ dims++;
+ elementSchema = elementSchema.childSchema();
+ Preconditions.checkArgument(elementSchema != null);
+ }
+
+ ColumnMetadata colSchema = repeatedListSchemaFor(providedSchema.name(), dims,
+ elementSchema.cloneEmpty());
+ switch (elementSchema.structureType()) {
+
+ case PRIMITIVE:
+ return multiDimScalarArrayListenerFor(colSchema, dims);
+
+ case TUPLE:
+ return multiDimObjectArrayListenerFor(colSchema,
+ dims, elementSchema.tupleSchema());
+
+ case VARIANT:
+ return multiDimVariantArrayListenerFor(colSchema, dims);
+
+ default:
+ throw loader.unsupportedType(providedSchema);
+ }
+ }
+
+ /**
+ * Create a listener when we don't have type information. For the case
+ * {@code null} appears before other values.
+ */
+ private ValueListener unknownListenerFor(String key) {
+ return new UnknownFieldListener(this, key);
+ }
+
+ /**
+ * Create a listener when we don't have type information. For the case
+ * {@code []} appears before other values.
+ */
+ private ValueListener unknownArrayListenerFor(String key, ValueDef valueDef) {
+ UnknownFieldListener fieldListener = new UnknownFieldListener(this, key);
+ fieldListener.array(valueDef);
+ return fieldListener;
+ }
+
+ private ObjectWriter addFieldWriter(ColumnMetadata colSchema) {
+ int index = tupleWriter.addColumn(colSchema);
+ return tupleWriter.column(index);
}
/**
@@ -321,85 +509,17 @@ public class TupleListener implements ObjectListener {
}
}
- public ArrayValueListener scalarArrayListenerFor(ColumnMetadata colSchema) {
- return new ScalarArrayValueListener(loader, colSchema,
- new ScalarArrayListener(loader, colSchema,
- scalarListenerFor(colSchema)));
- }
-
/**
- * Create a listener when we don't have type information. For the case
- * {@code null} appears before other values.
+ * Build up a repeated list column definition given a specification of the
+ * number of dimensions and the JSON type. Creation of the element type is
+ * via a closure that builds the needed schema.
*/
- private ValueListener unknownListenerFor(String key) {
- return new UnknownFieldListener(this, key);
- }
-
- /**
- * Create a listener when we don't have type information. For the case
- * {@code []} appears before other values.
- */
- private ValueListener unknownArrayListenerFor(String key, ValueDef valueDef) {
- UnknownFieldListener fieldListener = new UnknownFieldListener(this, key);
- fieldListener.array(valueDef);
- return fieldListener;
- }
-
- private ValueListener variantListenerFor(ColumnMetadata colSchema) {
- return new VariantListener(loader, addFieldWriter(colSchema).variant());
- }
-
- private ValueListener variantArrayListenerFor(ColumnMetadata colSchema) {
- return new ListListener(loader, addFieldWriter(colSchema));
- }
-
- private ValueListener repeatedListListenerFor(String key, ValueDef valueDef) {
- ColumnMetadata colSchema = new RepeatedListBuilder(key)
- .addArray(scalarTypeFor(key, valueDef.type()))
- .buildColumn();
- return repeatedListListenerFor(colSchema);
- }
-
- /**
- * Create a RepeatedList which contains (empty) Map objects using the provided
- * schema. The map fields are created on the fly from the provided schema.
- */
- private ValueListener repeatedListOfObjectsListenerFor(String key, ColumnMetadata providedCol) {
- ColumnMetadata colSchema = new RepeatedListBuilder(key)
- .addMapArray()
- .resumeList()
- .buildColumn();
- TupleMetadata providedSchema = providedCol == null ? null
- : providedCol.childSchema().tupleSchema();
- return RepeatedListValueListener.repeatedObjectListFor(loader,
- addFieldWriter(colSchema), providedSchema);
- }
-
- /**
- * Create a RepeatedList which contains Unions. (Actually, this is an
- * array of List objects internally.) The variant is variable, it makes no
- * sense to specify a schema for the variant. Also, omitting the schema
- * save a large amount of complexity that will likely never be needed.
- */
- private ValueListener repeatedListOfVariantListenerFor(String key) {
- ColumnMetadata colSchema = new RepeatedListBuilder(key)
- .addList()
- .resumeList()
- .buildColumn();
- return RepeatedListValueListener.repeatedVariantListFor(loader,
- addFieldWriter(colSchema));
- }
-
- private ValueListener repeatedListListenerFor(ColumnMetadata colSchema) {
- ColumnMetadata childSchema = colSchema.childSchema();
- if (childSchema != null) {
- if (childSchema.isMap()) {
- return repeatedListOfObjectsListenerFor(colSchema.name(), colSchema);
- }
- if (childSchema.isVariant()) {
- return repeatedListOfVariantListenerFor(colSchema.name());
- }
+ private ColumnMetadata repeatedListSchemaFor(String key, int dims,
+ ColumnMetadata innerArray) {
+ ColumnMetadata prev = innerArray;
+ for (int i = 1; i < dims; i++) {
+ prev = MetadataUtils.newRepeatedList(key, prev);
}
- return RepeatedListValueListener.repeatedListFor(loader, addFieldWriter(colSchema));
+ return prev;
}
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/UnknownFieldListener.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/UnknownFieldListener.java
index 0a2ca34..530342d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/UnknownFieldListener.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/UnknownFieldListener.java
@@ -105,7 +105,7 @@ public class UnknownFieldListener extends AbstractValueListener implements NullT
@Override
public ObjectListener object() {
- return resolveScalar(JsonType.OBJECT).object();
+ return resolveTo(parentTuple.objectListenerForValue(key)).object();
}
/**
@@ -115,7 +115,7 @@ public class UnknownFieldListener extends AbstractValueListener implements NullT
*/
protected ValueListener resolveScalar(JsonType type) {
if (unknownArray == null) {
- return resolveTo(parentTuple.scalarListenerFor(key, type));
+ return resolveTo(parentTuple.scalarListenerForValue(key, type));
} else {
// Saw {a: []}, {a: 10}. Since we infer that 10 is a
@@ -154,11 +154,11 @@ public class UnknownFieldListener extends AbstractValueListener implements NullT
if (unknownArray == null) {
logger.warn("Ambiguous type! JSON field {}" +
" contains all nulls. Assuming VARCHAR.", key);
- resolveTo(parentTuple.scalarListenerFor(key, JsonType.STRING));
+ resolveTo(parentTuple.scalarListenerForValue(key, JsonType.STRING));
} else {
logger.warn("Ambiguous type! JSON array field {}" +
" contains all empty arrays. Assuming repeated VARCHAR.", key);
- resolveTo(parentTuple.arrayListenerFor(key, JsonType.STRING));
+ resolveTo(parentTuple.scalarArrayListenerForValue(key, JsonType.STRING));
}
}
@@ -168,7 +168,7 @@ public class UnknownFieldListener extends AbstractValueListener implements NullT
" starts with null element. Assuming repeated VARCHAR.", key);
valueDef = new ValueDef(JsonType.STRING, valueDef.dimensions());
}
- return resolveTo(parentTuple.listenerFor(key, valueDef));
+ return resolveTo(parentTuple.listenerForValue(key, valueDef));
}
/**
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java
index 2b814ac..5c4425d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java
@@ -36,28 +36,33 @@ import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.ObjectMapper;
/**
- * Parser for JSON that converts a stream of tokens from the Jackson JSON
- * parser into a set of events on listeners structured to follow the
- * data structure of the incoming data. JSON can assume many forms. This
- * class assumes that the data is in a tree structure that corresponds
- * to the Drill row structure: a series of object with (mostly) the
- * same schema. Members of the top-level object can be Drill types:
- * scalars, arrays, nested objects (Drill "MAP"s), and so on.
+ * Parser for a subset of the <a href="http://jsonlines.org/">jsonlines</a>
+ * format. In particular, supports line-delimited JSON objects, or a single
+ * array which holds a list of JSON objects.
* <p>
- * The structure parser follows the structure of the incoming data,
- * whatever it might be. This class imposes no semantic rules on that
- * data, it just "calls 'em as it sees 'em" as they say. The listeners
- * are responsible for deciding if the data data makes sense, and if
- * so, how it should be handled.
+ * Alternatively, a message parser can provide a path to an array of JSON
+ * objects within a messages such as a REST response.
* <p>
- * The root listener will receive an event to fields in the top-level
- * object as those fields first appear. Each field is a value object
- * and can correspond to a scalar, array, another object, etc. The
- * type of the value is declared when known, but sometimes it is not
- * known, such as if the value is {@code null}. And, of course, according
- * to JSON, the value is free to change from one row to the next. The
- * listener decides it if wants to handle such "schema change", and if
- * so, how.
+ * Implemented as a parser which converts a stream of tokens from the Jackson
+ * JSON parser into a set of events on listeners structured to follow the data
+ * structure of the incoming data. JSON can assume many forms. This class
+ * assumes that the data is in a tree structure that corresponds to the Drill
+ * row structure: a series of object with (mostly) the same schema. Members of
+ * the top-level object can be Drill types: scalars, arrays, nested objects
+ * (Drill "MAP"s), and so on.
+ * <p>
+ * The structure parser follows the structure of the incoming data, whatever it
+ * might be. This class imposes no semantic rules on that data, it just "calls
+ * 'em as it sees 'em" as they say. The listeners are responsible for deciding
+ * if the data data makes sense, and if so, how it should be handled.
+ * <p>
+ * The root listener will receive an event to fields in the top-level object as
+ * those fields first appear. Each field is a value object and can correspond to
+ * a scalar, array, another object, etc. The type of the value is declared when
+ * known, but sometimes it is not known, such as if the value is {@code null}.
+ * And, of course, according to JSON, the value is free to change from one row
+ * to the next. The listener decides it if wants to handle such "schema change",
+ * and if so, how.
*/
public class JsonStructureParser {
protected static final Logger logger = LoggerFactory.getLogger(JsonStructureParser.class);
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java
index ecd5b29..5fbcc25 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java
@@ -128,6 +128,8 @@ public class TokenIterator {
public String textValue() {
try {
return parser.getText();
+ } catch (JsonParseException e) {
+ throw errorFactory.syntaxError(e);
} catch (IOException e) {
throw errorFactory.ioException(e);
}
@@ -136,6 +138,8 @@ public class TokenIterator {
public long longValue() {
try {
return parser.getLongValue();
+ } catch (JsonParseException e) {
+ throw errorFactory.syntaxError(e);
} catch (IOException e) {
throw errorFactory.ioException(e);
} catch (UnsupportedConversionError e) {
@@ -146,6 +150,8 @@ public class TokenIterator {
public String stringValue() {
try {
return parser.getValueAsString();
+ } catch (JsonParseException e) {
+ throw errorFactory.syntaxError(e);
} catch (IOException e) {
throw errorFactory.ioException(e);
} catch (UnsupportedConversionError e) {
@@ -156,6 +162,8 @@ public class TokenIterator {
public double doubleValue() {
try {
return parser.getValueAsDouble();
+ } catch (JsonParseException e) {
+ throw errorFactory.syntaxError(e);
} catch (IOException e) {
throw errorFactory.ioException(e);
} catch (UnsupportedConversionError e) {
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestObjects.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestObjects.java
index 420c2fa..7c6475d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestObjects.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestObjects.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.rowSet.RowSet;
@@ -32,7 +33,9 @@ import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.rowSet.RowSetUtilities;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
+@Category(RowSetTests.class)
public class TestObjects extends BaseJsonLoaderTest {
@Test
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestRepeatedList.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestRepeatedList.java
index 8a54863..76d48f3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestRepeatedList.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestRepeatedList.java
@@ -24,20 +24,20 @@ import static org.apache.drill.test.rowSet.RowSetUtilities.singleObjArray;
import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.rowSet.RowSet;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.rowSet.RowSetUtilities;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
/**
- * Tests repeated lists to form a 2D array of various data types.
+ * Tests repeated lists to form a 2D or 3D array of various data types.
*/
+@Category(RowSetTests.class)
public class TestRepeatedList extends BaseJsonLoaderTest {
@Test
@@ -173,21 +173,6 @@ public class TestRepeatedList extends BaseJsonLoaderTest {
}
@Test
- public void test3DScalars() {
- String json =
- "{a: [[[1, 2]]]]}";
- JsonLoaderFixture loader = new JsonLoaderFixture();
- loader.open(json);
- try {
- loader.next();
- fail();
- } catch (UserException e) {
- assertTrue(e.getMessage().contains("arrays deeper than two levels"));
- }
- loader.close();
- }
-
- @Test
public void test2DObjects() {
String json =
"{a: [[{b: 1}, {b: 2}], [{b: 3}, {b: 4}, {b: 5}]]}\n" +
@@ -298,4 +283,146 @@ public class TestRepeatedList extends BaseJsonLoaderTest {
assertNull(loader.next());
loader.close();
}
+
+ @Test
+ public void test3DScalars() {
+ String json =
+ "{a: [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]}";
+ JsonLoaderFixture loader = new JsonLoaderFixture();
+ loader.open(json);
+ RowSet results = loader.next();
+ assertNotNull(results);
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addRepeatedList("a")
+ .addDimension()
+ .addArray(MinorType.BIGINT)
+ .resumeList()
+ .resumeSchema()
+ .build();
+ RowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addSingleCol(objArray(
+ objArray(longArray(1L, 2L), longArray(3L, 4L)),
+ objArray(longArray(5L, 6L), longArray(7L, 8L))))
+ .build();
+ RowSetUtilities.verify(expected, results);
+ assertNull(loader.next());
+ loader.close();
+ }
+
+ @Test
+ public void testNullTo3DScalars() {
+ String json =
+ "{a: null}\n" +
+ "{a: [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]}";
+ JsonLoaderFixture loader = new JsonLoaderFixture();
+ loader.open(json);
+ RowSet results = loader.next();
+ assertNotNull(results);
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addRepeatedList("a")
+ .addDimension()
+ .addArray(MinorType.BIGINT)
+ .resumeList()
+ .resumeSchema()
+ .build();
+ RowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addSingleCol(objArray())
+ .addSingleCol(objArray(
+ objArray(longArray(1L, 2L), longArray(3L, 4L)),
+ objArray(longArray(5L, 6L), longArray(7L, 8L))))
+ .build();
+ RowSetUtilities.verify(expected, results);
+ assertNull(loader.next());
+ loader.close();
+ }
+
+ @Test
+ public void testUnknownTo3DScalars() {
+ String json =
+ "{a: []}\n" +
+ "{a: [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]}";
+ JsonLoaderFixture loader = new JsonLoaderFixture();
+ loader.open(json);
+ RowSet results = loader.next();
+ assertNotNull(results);
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addRepeatedList("a")
+ .addDimension()
+ .addArray(MinorType.BIGINT)
+ .resumeList()
+ .resumeSchema()
+ .build();
+ RowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addSingleCol(objArray())
+ .addSingleCol(objArray(
+ objArray(longArray(1L, 2L), longArray(3L, 4L)),
+ objArray(longArray(5L, 6L), longArray(7L, 8L))))
+ .build();
+ RowSetUtilities.verify(expected, results);
+ assertNull(loader.next());
+ loader.close();
+ }
+
+ @Test
+ public void test3DObjects() {
+ String json =
+ "{a: [[[{n: 1}, {n: 2}], [{n: 3}, {n: 4}]], " +
+ "[[{n: 5}, {n: 6}], [{n: 7}, {n: 8}]]]}";
+ JsonLoaderFixture loader = new JsonLoaderFixture();
+ loader.open(json);
+ RowSet results = loader.next();
+ assertNotNull(results);
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addRepeatedList("a")
+ .addDimension()
+ .addMapArray()
+ .addNullable("n", MinorType.BIGINT)
+ .resumeList()
+ .resumeList()
+ .resumeSchema()
+ .build();
+ RowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addSingleCol(objArray(
+ objArray(objArray(mapValue(1L), mapValue(2L)), objArray(mapValue(3L), mapValue(4L))),
+ objArray(objArray(mapValue(5L), mapValue(6L)), objArray(mapValue(7L), mapValue(8L)))))
+ .build();
+ RowSetUtilities.verify(expected, results);
+ assertNull(loader.next());
+ loader.close();
+ }
+
+ @Test
+ public void testUnknownTo3DObjects() {
+ String json =
+ "{a: []}\n" +
+ "{a: [[[{n: 1}, {n: 2}], [{n: 3}, {n: 4}]], " +
+ "[[{n: 5}, {n: 6}], [{n: 7}, {n: 8}]]]}";
+ JsonLoaderFixture loader = new JsonLoaderFixture();
+ loader.open(json);
+ RowSet results = loader.next();
+ assertNotNull(results);
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addRepeatedList("a")
+ .addDimension()
+ .addMapArray()
+ .addNullable("n", MinorType.BIGINT)
+ .resumeList()
+ .resumeList()
+ .resumeSchema()
+ .build();
+ RowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addSingleCol(objArray())
+ .addSingleCol(objArray(
+ objArray(objArray(mapValue(1L), mapValue(2L)), objArray(mapValue(3L), mapValue(4L))),
+ objArray(objArray(mapValue(5L), mapValue(6L)), objArray(mapValue(7L), mapValue(8L)))))
+ .build();
+ RowSetUtilities.verify(expected, results);
+ assertNull(loader.next());
+ loader.close();
+ }
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalarArrays.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalarArrays.java
index 8d82878..ea6b0e9 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalarArrays.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalarArrays.java
@@ -26,6 +26,7 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.rowSet.RowSet;
@@ -33,6 +34,7 @@ import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.rowSet.RowSetUtilities;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
/**
* Test scalar arrays. Without a schema, the first array token
@@ -44,6 +46,7 @@ import org.junit.Test;
* Verifies that null array elements are converted to a default
* value for the type (false, 0 or empty string.)
*/
+@Category(RowSetTests.class)
public class TestScalarArrays extends BaseJsonLoaderTest {
@Test
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalars.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalars.java
index 0b19366..09ba62f 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalars.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalars.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.resultSet.project.Projections;
@@ -31,6 +32,7 @@ import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.rowSet.RowSetUtilities;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
/**
* Tests JSON scalar handling. Without a schema, the first non-null value
@@ -44,6 +46,7 @@ import org.junit.Test;
* to a few messy rows a billion rows in, or due to the order that the scanners
* see the data.
*/
+@Category(RowSetTests.class)
public class TestScalars extends BaseJsonLoaderTest {
/**
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestUnknowns.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestUnknowns.java
index 0ac5ec6..501c1a7 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestUnknowns.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestUnknowns.java
@@ -24,12 +24,14 @@ import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.rowSet.RowSet;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.rowSet.RowSetUtilities;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
/**
* Tests the ability of the JSON reader to "wait out" a set of leading
@@ -37,6 +39,7 @@ import org.junit.Test;
* deciding on the column type. Hitting the end of batch, or an array
* that contains only null values, forces resolution to VARCHAR.
*/
+@Category(RowSetTests.class)
public class TestUnknowns extends BaseJsonLoaderTest {
@Test
@@ -84,6 +87,30 @@ public class TestUnknowns extends BaseJsonLoaderTest {
loader.close();
}
+ @Test
+ public void testNullToObject() {
+ String json =
+ "{a: null} {a: {b: 20, c: 220}}";
+ JsonLoaderFixture loader = new JsonLoaderFixture();
+ loader.open(json);
+ RowSet results = loader.next();
+ assertNotNull(results);
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addMap("a")
+ .addNullable("b", MinorType.BIGINT)
+ .addNullable("c", MinorType.BIGINT)
+ .resumeSchema()
+ .build();
+ RowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addSingleCol(mapValue(null, null))
+ .addSingleCol(mapValue(20, 220))
+ .build();
+ RowSetUtilities.verify(expected, results);
+ assertNull(loader.next());
+ loader.close();
+ }
+
/**
* Input contains all nulls. The loader will force resolve to a
* type, and will choose VARCHAR as all scalar types which
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestVariant.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestVariant.java
index 480aab3..66bc4be 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestVariant.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestVariant.java
@@ -22,13 +22,16 @@ import static org.apache.drill.test.rowSet.RowSetUtilities.objArray;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.rowSet.RowSet;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.rowSet.RowSetUtilities;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
+@Category(RowSetTests.class)
public class TestVariant extends BaseJsonLoaderTest {
@Test
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java
index 0dc469d..c9d2294 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java
@@ -304,4 +304,9 @@ public class MetadataUtils {
}
return false;
}
+
+ public static boolean isRepeatedList(ColumnMetadata col) {
+ return col.type() == MinorType.LIST &&
+ col.mode() == DataMode.REPEATED;
+ }
}