You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by bo...@apache.org on 2019/09/03 15:06:45 UTC
[drill] branch master updated: DRILL-7096: Develop vector for
canonical Map
This is an automated email from the ASF dual-hosted git repository.
bohdan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new b30830a DRILL-7096: Develop vector for canonical Map<K,V>
b30830a is described below
commit b30830ab165d378ff55a9a215f98183d35b17ccd
Author: Bohdan Kazydub <bo...@gmail.com>
AuthorDate: Mon Mar 25 16:40:32 2019 +0200
DRILL-7096: Develop vector for canonical Map<K,V>
- Added new type DICT;
- Created value vectors for the type for single and repeated modes;
- Implemented corresponding FieldReaders and FieldWriters;
- Made changes in EvaluationVisitor to be able to read values from the map by key;
- Made changes to DrillParquetGroupConverter to be able to read Parquet's MAP type;
- Added an option `store.parquet.reader.enable_map_support` to disable reading MAP type as DICT from Parquet files;
- Updated AvroRecordReader to use new DICT type for Avro's MAP;
- Added support of the new type to ParquetRecordWriter.
---
.../java/org/apache/drill/common/types/Types.java | 30 +-
.../client/src/include/drill/protobuf/Types.pb.h | 5 +-
contrib/native/client/src/protobuf/Types.pb.cc | 12 +-
.../codegen/templates/AbstractRecordWriter.java | 10 +
.../codegen/templates/EventBasedRecordWriter.java | 9 +
.../templates/ParquetOutputRecordWriter.java | 9 +
.../src/main/codegen/templates/RecordWriter.java | 2 +
.../src/main/codegen/templates/TypeHelper.java | 3 +
.../java/org/apache/drill/exec/ExecConstants.java | 4 +
.../apache/drill/exec/expr/EvaluationVisitor.java | 118 +++-
.../drill/exec/expr/fn/DrillAggFuncHolder.java | 4 +-
.../drill/exec/expr/fn/impl/UnionFunctions.java | 4 +-
.../physical/impl/flatten/FlattenRecordBatch.java | 15 +-
.../exec/physical/impl/join/MergeJoinBatch.java | 24 +-
.../physical/impl/join/NestedLoopJoinBatch.java | 28 +-
.../OrderedPartitionRecordBatch.java | 5 +-
.../impl/statistics/StatisticsAggBatch.java | 29 +-
.../drill/exec/planner/index/SimpleRexRemap.java | 14 +-
.../drill/exec/record/RecordBatchLoader.java | 6 +-
.../apache/drill/exec/record/RecordBatchSizer.java | 26 +-
.../org/apache/drill/exec/record/TypedFieldId.java | 132 +++--
.../apache/drill/exec/record/VectorContainer.java | 4 +-
.../drill/exec/record/VectorInitializer.java | 6 +-
.../exec/resolver/ResolverTypePrecedence.java | 1 +
.../exec/server/options/SystemOptionManager.java | 3 +-
.../drill/exec/store/avro/AvroRecordReader.java | 27 +-
.../store/parquet/BaseParquetMetadataProvider.java | 5 +-
.../exec/store/parquet/ParquetReaderUtility.java | 94 ++++
.../exec/store/parquet/ParquetRecordWriter.java | 117 +++-
.../store/parquet/ParquetTableMetadataUtils.java | 127 +++--
.../drill/exec/store/parquet/ParquetWriter.java | 2 +-
.../parquet/metadata/FileMetadataCollector.java | 38 +-
.../store/parquet/metadata/MetadataVersion.java | 17 +-
.../exec/store/parquet/metadata/Metadata_V4.java | 92 ++-
.../store/parquet2/DrillParquetGroupConverter.java | 102 ++--
.../parquet2/DrillParquetMapGroupConverter.java | 118 ++++
.../exec/store/parquet2/DrillParquetReader.java | 45 +-
.../org/apache/drill/exec/vector/CopyUtil.java | 12 +-
.../drill/exec/vector/complex/FieldIdUtil.java | 110 +++-
.../java-exec/src/main/resources/drill-module.conf | 1 +
.../drill/exec/compile/TestEvaluationVisitor.java | 2 +-
.../org/apache/drill/exec/expr/ExpressionTest.java | 16 +-
.../physical/impl/writer/TestParquetWriter.java | 78 +++
.../record/ExpressionTreeMaterializerTest.java | 20 +-
.../drill/exec/record/vector/TestDictVector.java | 459 +++++++++++++++
.../apache/drill/exec/record/vector/TestLoad.java | 2 +-
.../drill/exec/store/avro/AvroFormatTest.java | 206 ++++++-
.../apache/drill/exec/store/avro/AvroTestUtil.java | 53 ++
.../exec/store/parquet/TestParquetComplex.java | 615 +++++++++++++++++++++
.../store/parquet/TestParquetMetadataCache.java | 30 +
.../java/org/apache/drill/test/TestBuilder.java | 35 ++
.../drill/test/rowSet/test/TestFillEmpties.java | 1 +
.../parquet/complex/map/parquet/000000_0.parquet | Bin 0 -> 3649 bytes
.../store/parquet/complex/simple_map.parquet | Bin 0 -> 437 bytes
.../src/main/codegen/includes/vv_imports.ftl | 1 +
.../codegen/templates/AbstractFieldReader.java | 31 +-
.../codegen/templates/AbstractFieldWriter.java | 34 ++
.../templates/AbstractPromotableFieldWriter.java | 5 +
.../src/main/codegen/templates/BaseReader.java | 71 ++-
.../src/main/codegen/templates/BaseWriter.java | 20 +
.../main/codegen/templates/BasicTypeHelper.java | 41 ++
.../src/main/codegen/templates/ComplexCopier.java | 17 +
.../src/main/codegen/templates/ComplexReaders.java | 12 +
.../src/main/codegen/templates/ListWriters.java | 35 +-
.../src/main/codegen/templates/MapWriters.java | 81 ++-
.../src/main/codegen/templates/NullReader.java | 26 +-
.../main/codegen/templates/RepeatedDictWriter.java | 151 +++++
.../src/main/codegen/templates/UnionReader.java | 17 +-
.../src/main/codegen/templates/UnionVector.java | 8 +
.../src/main/codegen/templates/UnionWriter.java | 7 +
.../holders/DictHolder.java} | 18 +-
.../holders/RepeatedDictHolder.java} | 15 +-
.../exec/expr/holders/RepeatedListHolder.java | 19 +-
.../drill/exec/expr/holders/RepeatedMapHolder.java | 22 +-
.../holders/RepeatedValueHolder.java} | 18 +-
.../record/metadata/AbstractColumnMetadata.java | 5 +
...etadata.java => AbstractMapColumnMetadata.java} | 89 +--
.../drill/exec/record/metadata/ColumnMetadata.java | 1 +
.../exec/record/metadata/DictColumnMetadata.java | 72 +++
.../exec/record/metadata/MapColumnMetadata.java | 82 +--
.../drill/exec/record/metadata/MetadataUtils.java | 10 +
.../drill/exec/record/metadata/TupleSchema.java | 6 +-
.../main/java/org/apache/drill/exec/util/Text.java | 2 +-
.../accessor/reader/ColumnReaderFactory.java | 1 +
.../accessor/writer/ColumnWriterFactory.java | 2 +
.../exec/vector/complex/AbstractMapVector.java | 2 +-
...pVector.java => AbstractRepeatedMapVector.java} | 356 ++++--------
.../vector/complex/BaseRepeatedValueVector.java | 51 +-
.../drill/exec/vector/complex/DictVector.java | 312 +++++++++++
.../exec/vector/complex/RepeatedDictVector.java | 165 ++++++
.../exec/vector/complex/RepeatedListVector.java | 42 +-
.../exec/vector/complex/RepeatedMapVector.java | 511 ++---------------
.../vector/complex/impl/AbstractBaseReader.java | 5 +
...mpl.java => AbstractRepeatedMapReaderImpl.java} | 82 +--
.../vector/complex/impl/MapOrListWriterImpl.java | 11 +
...ReaderImpl.java => RepeatedDictReaderImpl.java} | 79 ++-
.../complex/impl/RepeatedListReaderImpl.java | 4 +-
.../vector/complex/impl/RepeatedMapReaderImpl.java | 116 +---
.../vector/complex/impl/RepeatedMapWriter.java | 58 ++
.../vector/complex/impl/SingleDictReaderImpl.java | 185 +++++++
.../exec/vector/complex/impl/SingleDictWriter.java | 98 ++++
.../impl/SingleLikeRepeatedMapReaderImpl.java | 2 +-
.../exec/vector/complex/reader/FieldReader.java | 14 +-
.../exec/vector/complex/writer/FieldWriter.java | 3 +-
.../apache/drill/common/expression/SchemaPath.java | 21 +-
.../drill/common/expression/SchemaPathTest.java | 5 +
.../drill/metastore/util/SchemaPathUtils.java | 28 +-
.../org/apache/drill/common/types/TypeProtos.java | 17 +-
protocol/src/main/protobuf/Types.proto | 1 +
109 files changed, 4452 insertions(+), 1562 deletions(-)
diff --git a/common/src/main/java/org/apache/drill/common/types/Types.java b/common/src/main/java/org/apache/drill/common/types/Types.java
index 7a31c7a..cd619d1 100644
--- a/common/src/main/java/org/apache/drill/common/types/Types.java
+++ b/common/src/main/java/org/apache/drill/common/types/Types.java
@@ -49,6 +49,7 @@ public class Types {
switch(type.getMinorType()) {
case LIST:
case MAP:
+ case DICT:
return true;
default:
return false;
@@ -187,7 +188,8 @@ public class Types {
// Composite types and other types that are not atomic types (SQL standard
// or not) except ARRAY types (handled above):
- case MAP: return "STRUCT"; // Drill map represents struct and in future will be renamed
+ case MAP: return "STRUCT"; // Drill map represents struct
+ case DICT: return "MAP";
case LATE: return "ANY";
case NULL: return "NULL";
case UNION: return "UNION";
@@ -270,6 +272,7 @@ public class Types {
case "INTERVAL YEAR TO MONTH": return java.sql.Types.OTHER;
case "INTERVAL DAY TO SECOND": return java.sql.Types.OTHER;
case "STRUCT": return java.sql.Types.OTHER; // Drill doesn't support java.sql.Struct
+ case "MAP": return java.sql.Types.OTHER;
case "NATIONAL CHARACTER VARYING": return java.sql.Types.NVARCHAR;
case "NATIONAL CHARACTER": return java.sql.Types.NCHAR;
case "NULL": return java.sql.Types.NULL;
@@ -344,6 +347,7 @@ public class Types {
case LATE:
case LIST:
case MAP:
+ case DICT:
case UNION:
case NULL:
case TIMETZ: // SQL TIME WITH TIME ZONE
@@ -431,6 +435,7 @@ public class Types {
case INTERVAL:
case MAP:
+ case DICT:
case LATE:
case NULL:
case UNION:
@@ -575,7 +580,6 @@ public class Types {
public static MinorType getMinorTypeFromName(String typeName) {
typeName = typeName.toLowerCase();
-
switch (typeName) {
case "bool":
case "boolean":
@@ -728,8 +732,14 @@ public class Types {
* @return true if type can be used in ORDER BY clause
*/
public static boolean isSortable(MinorType type) {
- // Currently only map and list columns are not sortable.
- return type != MinorType.MAP && type != MinorType.LIST;
+ switch (type) {
+ case DICT:
+ case LIST:
+ case MAP:
+ return false;
+ default:
+ return true;
+ }
}
/**
@@ -830,4 +840,16 @@ public class Types {
return 0;
}
}
+
+ public static boolean isNullable(final MajorType type) {
+ switch (type.getMode()) {
+ case REQUIRED:
+ case REPEATED:
+ return false;
+ case OPTIONAL:
+ return !isComplex(type);
+ default:
+ throw new UnsupportedOperationException("Unexpected/unhandled DataMode value " + type.getMode());
+ }
+ }
}
diff --git a/contrib/native/client/src/include/drill/protobuf/Types.pb.h b/contrib/native/client/src/include/drill/protobuf/Types.pb.h
index ed99a61..0ccf40b 100644
--- a/contrib/native/client/src/include/drill/protobuf/Types.pb.h
+++ b/contrib/native/client/src/include/drill/protobuf/Types.pb.h
@@ -97,11 +97,12 @@ enum MinorType {
LIST = 40,
GENERIC_OBJECT = 41,
UNION = 42,
- VARDECIMAL = 43
+ VARDECIMAL = 43,
+ DICT = 44
};
bool MinorType_IsValid(int value);
const MinorType MinorType_MIN = LATE;
-const MinorType MinorType_MAX = VARDECIMAL;
+const MinorType MinorType_MAX = DICT;
const int MinorType_ARRAYSIZE = MinorType_MAX + 1;
const ::google::protobuf::EnumDescriptor* MinorType_descriptor();
diff --git a/contrib/native/client/src/protobuf/Types.pb.cc b/contrib/native/client/src/protobuf/Types.pb.cc
index 267646b..2960b97 100644
--- a/contrib/native/client/src/protobuf/Types.pb.cc
+++ b/contrib/native/client/src/protobuf/Types.pb.cc
@@ -103,7 +103,7 @@ void AddDescriptorsImpl() {
"de\030\002 \001(\0162\020.common.DataMode\022\r\n\005width\030\003 \001("
"\005\022\021\n\tprecision\030\004 \001(\005\022\r\n\005scale\030\005 \001(\005\022\020\n\010t"
"imeZone\030\006 \001(\005\022#\n\010sub_type\030\007 \003(\0162\021.common"
- ".MinorType*\253\004\n\tMinorType\022\010\n\004LATE\020\000\022\007\n\003MA"
+ ".MinorType*\265\004\n\tMinorType\022\010\n\004LATE\020\000\022\007\n\003MA"
"P\020\001\022\013\n\007TINYINT\020\003\022\014\n\010SMALLINT\020\004\022\007\n\003INT\020\005\022"
"\n\n\006BIGINT\020\006\022\014\n\010DECIMAL9\020\007\022\r\n\tDECIMAL18\020\010"
"\022\023\n\017DECIMAL28SPARSE\020\t\022\023\n\017DECIMAL38SPARSE"
@@ -117,12 +117,13 @@ void AddDescriptorsImpl() {
"\022\022\n\016DECIMAL38DENSE\020\"\022\016\n\nDM_UNKNOWN\020%\022\020\n\014"
"INTERVALYEAR\020&\022\017\n\013INTERVALDAY\020\'\022\010\n\004LIST\020"
"(\022\022\n\016GENERIC_OBJECT\020)\022\t\n\005UNION\020*\022\016\n\nVARD"
- "ECIMAL\020+*=\n\010DataMode\022\017\n\013DM_OPTIONAL\020\000\022\017\n"
- "\013DM_REQUIRED\020\001\022\017\n\013DM_REPEATED\020\002B-\n\035org.a"
- "pache.drill.common.typesB\nTypeProtosH\001"
+ "ECIMAL\020+\022\010\n\004DICT\020,*=\n\010DataMode\022\017\n\013DM_OPT"
+ "IONAL\020\000\022\017\n\013DM_REQUIRED\020\001\022\017\n\013DM_REPEATED\020"
+ "\002B-\n\035org.apache.drill.common.typesB\nType"
+ "ProtosH\001"
};
::google::protobuf::DescriptorPool::InternalAddGeneratedFile(
- descriptor, 878);
+ descriptor, 888);
::google::protobuf::MessageFactory::InternalRegisterGeneratedFile(
"Types.proto", &protobuf_RegisterTypes);
}
@@ -184,6 +185,7 @@ bool MinorType_IsValid(int value) {
case 41:
case 42:
case 43:
+ case 44:
return true;
default:
return false;
diff --git a/exec/java-exec/src/main/codegen/templates/AbstractRecordWriter.java b/exec/java-exec/src/main/codegen/templates/AbstractRecordWriter.java
index 2bd5faf..6982c75 100644
--- a/exec/java-exec/src/main/codegen/templates/AbstractRecordWriter.java
+++ b/exec/java-exec/src/main/codegen/templates/AbstractRecordWriter.java
@@ -71,6 +71,16 @@ public abstract class AbstractRecordWriter implements RecordWriter {
throw new UnsupportedOperationException("Doesn't support writing RepeatedList");
}
+ @Override
+ public FieldConverter getNewDictConverter(int fieldId, String fieldName, FieldReader reader) {
+ throw new UnsupportedOperationException("Doesn't support writing Dict");
+ }
+
+ @Override
+ public FieldConverter getNewRepeatedDictConverter(int fieldId, String fieldName, FieldReader reader) {
+ throw new UnsupportedOperationException("Doesn't support writing RepeatedDict");
+ }
+
<#list vv.types as type>
<#list type.minor as minor>
<#list vv.modes as mode>
diff --git a/exec/java-exec/src/main/codegen/templates/EventBasedRecordWriter.java b/exec/java-exec/src/main/codegen/templates/EventBasedRecordWriter.java
index d87eeb3..a541c63 100644
--- a/exec/java-exec/src/main/codegen/templates/EventBasedRecordWriter.java
+++ b/exec/java-exec/src/main/codegen/templates/EventBasedRecordWriter.java
@@ -146,6 +146,15 @@ public class EventBasedRecordWriter {
return recordWriter.getNewRepeatedMapConverter(fieldId, fieldName, reader);
}
+ case DICT:
+ switch (reader.getType().getMode()) {
+ case REQUIRED:
+ case OPTIONAL:
+ return recordWriter.getNewDictConverter(fieldId, fieldName, reader);
+ case REPEATED:
+ return recordWriter.getNewRepeatedDictConverter(fieldId, fieldName, reader);
+ }
+
case LIST:
return recordWriter.getNewRepeatedListConverter(fieldId, fieldName, reader);
diff --git a/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java b/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java
index 1da206d..1f6e467 100644
--- a/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java
+++ b/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java
@@ -71,7 +71,16 @@ import java.util.Map;
*/
public abstract class ParquetOutputRecordWriter extends AbstractRecordWriter implements RecordWriter {
+ /**
+ * Name of nested group for Parquet's {@code LIST} type.
+ * @see <a href="https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists">LIST logical type</a>
+ */
protected static final String LIST = "list";
+
+ /**
+ * Name of Parquet's {@code LIST} element type.
+ * @see #LIST
+ */
protected static final String ELEMENT = "element";
protected static final int ZERO_IDX = 0;
diff --git a/exec/java-exec/src/main/codegen/templates/RecordWriter.java b/exec/java-exec/src/main/codegen/templates/RecordWriter.java
index e07ab7c..d11c2a1 100644
--- a/exec/java-exec/src/main/codegen/templates/RecordWriter.java
+++ b/exec/java-exec/src/main/codegen/templates/RecordWriter.java
@@ -69,6 +69,8 @@ public interface RecordWriter {
public FieldConverter getNewUnionConverter(int fieldId, String fieldName, FieldReader reader);
public FieldConverter getNewRepeatedMapConverter(int fieldId, String fieldName, FieldReader reader);
public FieldConverter getNewRepeatedListConverter(int fieldId, String fieldName, FieldReader reader);
+ public FieldConverter getNewDictConverter(int fieldId, String fieldName, FieldReader reader);
+ public FieldConverter getNewRepeatedDictConverter(int fieldId, String fieldName, FieldReader reader);
<#list vv.types as type>
<#list type.minor as minor>
diff --git a/exec/java-exec/src/main/codegen/templates/TypeHelper.java b/exec/java-exec/src/main/codegen/templates/TypeHelper.java
index 8829c79..73152ce 100644
--- a/exec/java-exec/src/main/codegen/templates/TypeHelper.java
+++ b/exec/java-exec/src/main/codegen/templates/TypeHelper.java
@@ -59,6 +59,7 @@ public class TypeHelper extends BasicTypeHelper {
</#list>
</#list>
case MAP:
+ case DICT:
case LIST:
case NULL:
return new GenericAccessor(vector);
@@ -70,6 +71,8 @@ public class TypeHelper extends BasicTypeHelper {
switch (type) {
case UNION:
return model._ref(UnionHolder.class);
+ case DICT:
+ return model._ref(DictHolder.class);
case MAP:
case LIST:
return model._ref(ComplexHolder.class);
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index 463b0a0..066d04d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -1109,4 +1109,8 @@ public final class ExecConstants {
new OptionDescription("Specifies the number of attempts for retrying query planning after detecting that query metadata is changed. " +
"If the number of retries was exceeded, query will be planned without metadata information from the Metastore. " +
"This option is not active for now. Default is 5. (Drill 1.17+)"));
+
+ public static final String PARQUET_READER_ENABLE_MAP_SUPPORT = "store.parquet.reader.enable_map_support";
+ public static final BooleanValidator PARQUET_READER_ENABLE_MAP_SUPPORT_VALIDATOR = new BooleanValidator(
+ PARQUET_READER_ENABLE_MAP_SUPPORT, new OptionDescription("Enables Drill Parquet reader to read Parquet MAP type correctly. (Drill 1.17+)"));
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java
index 1f8a779..4d6203f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.expr;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@@ -73,8 +74,6 @@ import org.apache.drill.exec.vector.ValueHolderHelper;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
import org.apache.drill.shaded.guava.com.google.common.base.Function;
-import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
-import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
import com.sun.codemodel.JBlock;
import com.sun.codemodel.JClass;
import com.sun.codemodel.JConditional;
@@ -139,7 +138,7 @@ public class EvaluationVisitor {
}
}
- Map<ExpressionHolder,HoldingContainer> previousExpressions = Maps.newHashMap();
+ Map<ExpressionHolder,HoldingContainer> previousExpressions = new HashMap<>();
Stack<Map<ExpressionHolder,HoldingContainer>> mapStack = new Stack<>();
@@ -220,23 +219,15 @@ public class EvaluationVisitor {
HoldingContainer output = generator.declare(ifExpr.getMajorType());
- JConditional jc = null;
JBlock conditionalBlock = new JBlock(false, false);
IfCondition c = ifExpr.ifCondition;
HoldingContainer holdingContainer = c.condition.accept(this, generator);
- if (jc == null) {
- if (holdingContainer.isOptional()) {
- jc = conditionalBlock._if(holdingContainer.getIsSet().eq(JExpr.lit(1)).cand(holdingContainer.getValue().eq(JExpr.lit(1))));
- } else {
- jc = conditionalBlock._if(holdingContainer.getValue().eq(JExpr.lit(1)));
- }
+ JConditional jc;
+ if (holdingContainer.isOptional()) {
+ jc = conditionalBlock._if(holdingContainer.getIsSet().eq(JExpr.lit(1)).cand(holdingContainer.getValue().eq(JExpr.lit(1))));
} else {
- if (holdingContainer.isOptional()) {
- jc = jc._else()._if(holdingContainer.getIsSet().eq(JExpr.lit(1)).cand(holdingContainer.getValue().eq(JExpr.lit(1))));
- } else {
- jc = jc._else()._if(holdingContainer.getValue().eq(JExpr.lit(1)));
- }
+ jc = conditionalBlock._if(holdingContainer.getValue().eq(JExpr.lit(1)));
}
generator.nestEvalBlock(jc._then());
@@ -493,6 +484,7 @@ public class EvaluationVisitor {
final boolean listVector = e.getTypedFieldId().isListVector();
if (!hasReadPath && !complex) {
+
JBlock eval = new JBlock();
if (repeated) {
@@ -523,14 +515,32 @@ public class EvaluationVisitor {
eval.add(expr.invoke("setPosition").arg(recordIndex));
int listNum = 0;
+ JVar valueIndex = eval.decl(generator.getModel().INT, "valueIndex", JExpr.lit(-1));
+
+ int depth = 0;
+ boolean isDict = e.getFieldId().isDict(depth);
+
while (seg != null) {
if (seg.isArray()) {
- // stop once we get to the last segment and the final type is neither complex nor repeated (map, list, repeated list).
+
+ // stop once we get to the last segment and the final type is neither complex nor repeated (map, dict, list, repeated list).
// In case of non-complex and non-repeated type, we return Holder, in stead of FieldReader.
if (seg.isLastPath() && !complex && !repeated && !listVector) {
break;
}
+ depth++;
+
+ if (isDict) {
+ JExpression keyExpr = JExpr.lit(seg.getArraySegment().getIndex());
+
+ expr = getDictReaderReadByKeyExpression(generator, eval, expr, keyExpr, valueIndex, isNull);
+
+ seg = seg.getChild();
+ isDict = e.getFieldId().isDict(depth);
+ continue;
+ }
+
JVar list = generator.declareClassField("list", generator.getModel()._ref(FieldReader.class));
eval.assign(list, expr);
@@ -557,6 +567,25 @@ public class EvaluationVisitor {
expr = list.invoke("reader");
listNum++;
} else {
+
+ if (e.getFieldId().isDict(depth)) {
+ depth++;
+ JExpression keyExpr = JExpr.lit(seg.getNameSegment().getPath());
+
+ MajorType finalType = e.getFieldId().getFinalType();
+ if (seg.getChild() == null && !(Types.isComplex(finalType) || Types.isRepeated(finalType))) {
+ // This is the last segment:
+ eval.add(expr.invoke("read").arg(keyExpr).arg(out.getHolder()));
+ return out;
+ }
+
+ expr = getDictReaderReadByKeyExpression(generator, eval, expr, keyExpr, valueIndex, isNull);
+
+ seg = seg.getChild();
+ isDict = e.getFieldId().isDict(depth);
+ continue;
+ }
+
JExpression fieldName = JExpr.lit(seg.getNameSegment().getPath());
expr = expr.invoke("reader").arg(fieldName);
}
@@ -564,7 +593,14 @@ public class EvaluationVisitor {
}
if (complex || repeated) {
- // //
+
+ if (isDict) {
+ JVar dictReader = generator.declareClassField("dictReader", generator.getModel()._ref(FieldReader.class));
+ eval.assign(dictReader, expr);
+
+ return new HoldingContainer(e.getMajorType(), dictReader, null, null, false, true);
+ }
+
JVar complexReader = generator.declareClassField("reader", generator.getModel()._ref(FieldReader.class));
if (isNullReaderLikely) {
@@ -574,13 +610,10 @@ public class EvaluationVisitor {
JExpression nullReader;
if (complex) {
nullReader = nrClass.staticRef("EMPTY_MAP_INSTANCE");
- } else if (repeated) {
- nullReader = nrClass.staticRef("EMPTY_LIST_INSTANCE");
} else {
- nullReader = nrClass.staticRef("INSTANCE");
+ nullReader = nrClass.staticRef("EMPTY_LIST_INSTANCE");
}
-
jc._then().assign(complexReader, expr);
jc._else().assign(complexReader, nullReader);
} else {
@@ -591,7 +624,11 @@ public class EvaluationVisitor {
return hc;
} else {
if (seg != null) {
- eval.add(expr.invoke("read").arg(JExpr.lit(seg.getArraySegment().getIndex())).arg(out.getHolder()));
+ JExpression holderExpr = out.getHolder();
+ if (e.getFieldId().isDict(depth)) {
+ holderExpr = JExpr.cast(generator.getModel()._ref(ValueHolder.class), holderExpr);
+ }
+ eval.add(expr.invoke("read").arg(JExpr.lit(seg.getArraySegment().getIndex())).arg(holderExpr));
} else {
eval.add(expr.invoke("read").arg(out.getHolder()));
}
@@ -622,6 +659,39 @@ public class EvaluationVisitor {
return false;
}
+ /**
+ * Adds code to {@code eval} block which reads values by key from {@code expr} which is an instance of
+ * {@link org.apache.drill.exec.vector.complex.reader.BaseReader.DictReader}.
+ *
+ *
+ * @param generator current class generator
+ * @param eval evaluation block the code will be added to
+ * @param expr DICT reader to read values from
+ * @param keyExpr key literal
+ * @param valueIndex current value index (will be reassigned in the method)
+ * @param isNull variable to indicate whether entry with the key exists in the DICT.
+ * Will be set to {@literal 1} if the key is not present
+ * @return expression corresponding to {@link org.apache.drill.exec.vector.complex.DictVector#FIELD_VALUE_NAME}'s
+ * reader with its position set to index corresponding to the key
+ */
+ private JExpression getDictReaderReadByKeyExpression(ClassGenerator generator, JBlock eval, JExpression expr,
+ JExpression keyExpr, JVar valueIndex, JVar isNull) {
+ JVar dictReader = generator.declareClassField("dictReader", generator.getModel()._ref(FieldReader.class));
+ eval.assign(dictReader, expr);
+ eval.assign(valueIndex, expr.invoke("find").arg(keyExpr));
+
+ JConditional conditional = eval._if(valueIndex.gt(JExpr.lit(-1)));
+ JBlock ifFound = conditional._then().block();
+ expr = dictReader.invoke("reader").arg(JExpr.lit("value"));
+ ifFound.add(expr.invoke("setPosition").arg(valueIndex));
+
+ JBlock elseBlock = conditional._else().block();
+ elseBlock.add(dictReader.invoke("setPosition").arg(valueIndex));
+ elseBlock.assign(isNull, JExpr.lit(1));
+
+ return expr;
+ }
+
private HoldingContainer visitReturnValueExpression(ReturnValueExpression e, ClassGenerator<?> generator) {
LogicalExpression child = e.getChild();
// Preconditions.checkArgument(child.getMajorType().equals(Types.REQUIRED_BOOLEAN));
@@ -709,7 +779,7 @@ public class EvaluationVisitor {
throws RuntimeException {
String convertFunctionName = e.getConvertFunction() + e.getEncodingType();
- List<LogicalExpression> newArgs = Lists.newArrayList();
+ List<LogicalExpression> newArgs = new ArrayList<>();
newArgs.add(e.getInput()); // input_expr
FunctionCall fc = new FunctionCall(convertFunctionName, newArgs, e.getPosition());
@@ -720,7 +790,7 @@ public class EvaluationVisitor {
public HoldingContainer visitAnyValueExpression(AnyValueExpression e, ClassGenerator<?> value)
throws RuntimeException {
- List<LogicalExpression> newArgs = Lists.newArrayList();
+ List<LogicalExpression> newArgs = new ArrayList<>();
newArgs.add(e.getInput()); // input_expr
FunctionCall fc = new FunctionCall(AnyValueExpression.ANY_VALUE, newArgs, e.getPosition());
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/DrillAggFuncHolder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/DrillAggFuncHolder.java
index bb6123a..aac0abb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/DrillAggFuncHolder.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/DrillAggFuncHolder.java
@@ -170,7 +170,9 @@ class DrillAggFuncHolder extends DrillFuncHolder {
workspaceJVars[i] = g.declareClassField("work", g.getHolderType(getWorkspaceVars()[i].getMajorType()));
//Declare a workspace vector for the workspace var.
- TypedFieldId typedFieldId = new TypedFieldId(getWorkspaceVars()[i].getMajorType(), g.getWorkspaceTypes().size());
+ TypedFieldId typedFieldId = new TypedFieldId.Builder().finalType(getWorkspaceVars()[i].getMajorType())
+ .addId(g.getWorkspaceTypes().size())
+ .build();
JVar vv = g.declareVectorValueSetupAndMember(g.getMappingSet().getWorkspace(), typedFieldId);
g.getWorkspaceTypes().add(typedFieldId);
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/UnionFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/UnionFunctions.java
index 4efa0c5..1820a2f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/UnionFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/UnionFunctions.java
@@ -147,7 +147,7 @@ public class UnionFunctions {
String typeName;
if (input.isSet()) {
- typeName = input.getType().getMinorType().name();
+ typeName = input.getTypeString();
} else {
typeName = org.apache.drill.common.types.TypeProtos.MinorType.NULL.name();
}
@@ -206,7 +206,7 @@ public class UnionFunctions {
@Override
public void eval() {
- String typeName = input.getType().getMinorType().name();
+ String typeName = input.getTypeString();
byte[] type = typeName.getBytes();
buf = buf.reallocIfNeeded(type.length);
buf.setBytes(0, type);
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/flatten/FlattenRecordBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/flatten/FlattenRecordBatch.java
index 58cc31e..7877c6b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/flatten/FlattenRecordBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/flatten/FlattenRecordBatch.java
@@ -18,6 +18,7 @@
package org.apache.drill.exec.physical.impl.flatten;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import org.apache.drill.common.exceptions.UserException;
@@ -55,12 +56,12 @@ import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.util.record.RecordBatchStats;
import org.apache.drill.exec.util.record.RecordBatchStats.RecordBatchIOType;
import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.complex.AbstractRepeatedMapVector;
import org.apache.drill.exec.vector.complex.RepeatedMapVector;
import org.apache.drill.exec.vector.complex.RepeatedValueVector;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter;
import com.carrotsearch.hppc.IntHashSet;
-import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
import com.sun.codemodel.JExpr;
// TODO - handle the case where a user tries to flatten a scalar, should just act as a project all of the columns exactly
@@ -364,8 +365,8 @@ public class FlattenRecordBatch extends AbstractSingleRecordBatch<FlattenPOP> {
final ValueVector flattenField = incoming.getValueAccessorById(vectorClass, fieldId.getFieldIds()).getValueVector();
TransferPair tp = null;
- if (flattenField instanceof RepeatedMapVector) {
- tp = ((RepeatedMapVector)flattenField).getTransferPairToSingleMap(reference.getAsNamePart().getName(), oContext.getAllocator());
+ if (flattenField instanceof AbstractRepeatedMapVector) {
+ tp = ((AbstractRepeatedMapVector) flattenField).getTransferPairToSingleMap(reference.getAsNamePart().getName(), oContext.getAllocator());
} else if ( !(flattenField instanceof RepeatedValueVector) ) {
if(incoming.getRecordCount() != 0) {
throw UserException.unsupportedError().message("Flatten does not support inputs of non-list values.").build(logger);
@@ -386,11 +387,11 @@ public class FlattenRecordBatch extends AbstractSingleRecordBatch<FlattenPOP> {
@Override
protected boolean setupNewSchema() throws SchemaChangeException {
- this.allocationVectors = Lists.newArrayList();
+ this.allocationVectors = new ArrayList<>();
container.clear();
final List<NamedExpression> exprs = getExpressionList();
final ErrorCollector collector = new ErrorCollectorImpl();
- final List<TransferPair> transfers = Lists.newArrayList();
+ final List<TransferPair> transfers = new ArrayList<>();
final ClassGenerator<Flattener> cg = CodeGenerator.getRoot(Flattener.TEMPLATE_DEFINITION, context.getOptions());
cg.getCodeGenerator().plainJavaCapable(true);
@@ -443,7 +444,7 @@ public class FlattenRecordBatch extends AbstractSingleRecordBatch<FlattenPOP> {
// Need to process ComplexWriter function evaluation.
// Lazy initialization of the list of complex writers, if not done yet.
if (complexWriters == null) {
- complexWriters = Lists.newArrayList();
+ complexWriters = new ArrayList<>();
}
// The reference name will be passed to ComplexWriter, used as the name of the output vector from the writer.
@@ -491,7 +492,7 @@ public class FlattenRecordBatch extends AbstractSingleRecordBatch<FlattenPOP> {
private List<NamedExpression> getExpressionList() {
- List<NamedExpression> exprs = Lists.newArrayList();
+ List<NamedExpression> exprs = new ArrayList<>();
for (MaterializedField field : incoming.getSchema()) {
String fieldName = field.getName();
if (fieldName.equals(popConfig.getColumn().getRootSegmentPath())) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/MergeJoinBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/MergeJoinBatch.java
index 9f3f1bf..7c59b4d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/MergeJoinBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/MergeJoinBatch.java
@@ -383,10 +383,14 @@ public class MergeJoinBatch extends AbstractBinaryRecordBatch<MergeJoinPOP> {
outputType = inputType;
}
// TODO (DRILL-4011): Factor out CopyUtil and use it here.
- JVar vvIn = cg.declareVectorValueSetupAndMember("incomingLeft",
- new TypedFieldId(inputType, vectorId));
- JVar vvOut = cg.declareVectorValueSetupAndMember("outgoing",
- new TypedFieldId(outputType,vectorId));
+ TypedFieldId inTypedFieldId = new TypedFieldId.Builder().finalType(inputType)
+ .addId(vectorId)
+ .build();
+ JVar vvIn = cg.declareVectorValueSetupAndMember("incomingLeft", inTypedFieldId);
+ TypedFieldId outTypedFieldId = new TypedFieldId.Builder().finalType(outputType)
+ .addId(vectorId)
+ .build();
+ JVar vvOut = cg.declareVectorValueSetupAndMember("outgoing", outTypedFieldId);
// todo: check result of copyFromSafe and grow allocation
cg.getEvalBlock().add(vvOut.invoke("copyFromSafe")
.arg(copyLeftMapping.getValueReadIndex())
@@ -412,10 +416,14 @@ public class MergeJoinBatch extends AbstractBinaryRecordBatch<MergeJoinPOP> {
outputType = inputType;
}
// TODO (DRILL-4011): Factor out CopyUtil and use it here.
- JVar vvIn = cg.declareVectorValueSetupAndMember("incomingRight",
- new TypedFieldId(inputType, vectorId - rightVectorBase));
- JVar vvOut = cg.declareVectorValueSetupAndMember("outgoing",
- new TypedFieldId(outputType,vectorId));
+ TypedFieldId inTypedFieldId = new TypedFieldId.Builder().finalType(inputType)
+ .addId(vectorId - rightVectorBase)
+ .build();
+ JVar vvIn = cg.declareVectorValueSetupAndMember("incomingRight", inTypedFieldId);
+ TypedFieldId outTypedFieldId = new TypedFieldId.Builder().finalType(outputType)
+ .addId(vectorId)
+ .build();
+ JVar vvOut = cg.declareVectorValueSetupAndMember("outgoing", outTypedFieldId);
// todo: check result of copyFromSafe and grow allocation
cg.getEvalBlock().add(vvOut.invoke("copyFromSafe")
.arg(copyRightMappping.getValueReadIndex())
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/NestedLoopJoinBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/NestedLoopJoinBatch.java
index 14786a1..7513ebd 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/NestedLoopJoinBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/NestedLoopJoinBatch.java
@@ -294,10 +294,16 @@ public class NestedLoopJoinBatch extends AbstractBinaryRecordBatch<NestedLoopJoi
// Add the vector to the output container
container.addOrGet(field);
- JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("leftBatch",
- new TypedFieldId(fieldType, false, fieldId));
- JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing",
- new TypedFieldId(fieldType, false, outputFieldId));
+ TypedFieldId inFieldId = new TypedFieldId.Builder().finalType(fieldType)
+ .hyper(false)
+ .addId(fieldId)
+ .build();
+ JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("leftBatch", inFieldId);
+ TypedFieldId outFieldId = new TypedFieldId.Builder().finalType(fieldType)
+ .hyper(false)
+ .addId(outputFieldId)
+ .build();
+ JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing", outFieldId);
nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(leftIndex).arg(outIndex).arg(inVV));
nLJClassGenerator.rotateBlock();
@@ -328,10 +334,16 @@ public class NestedLoopJoinBatch extends AbstractBinaryRecordBatch<NestedLoopJoi
MaterializedField newField = MaterializedField.create(field.getName(), outputType);
container.addOrGet(newField);
- JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("rightContainer",
- new TypedFieldId(inputType, true, fieldId));
- JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing",
- new TypedFieldId(outputType, false, outputFieldId));
+ TypedFieldId inFieldId = new TypedFieldId.Builder().finalType(inputType)
+ .hyper(true)
+ .addId(fieldId)
+ .build();
+ JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("rightContainer", inFieldId);
+ TypedFieldId outFieldId = new TypedFieldId.Builder().finalType(outputType)
+ .hyper(false)
+ .addId(outputFieldId)
+ .build();
+ JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing", outFieldId);
nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe")
.arg(recordIndexWithinBatch)
.arg(outIndex)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/orderedpartitioner/OrderedPartitionRecordBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/orderedpartitioner/OrderedPartitionRecordBatch.java
index 4881d4b..e8b522c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/orderedpartitioner/OrderedPartitionRecordBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/orderedpartitioner/OrderedPartitionRecordBatch.java
@@ -610,8 +610,11 @@ public class OrderedPartitionRecordBatch extends AbstractRecordBatch<OrderedPart
cg.setMappingSet(incomingMapping);
ClassGenerator.HoldingContainer left = cg.addExpr(expr, ClassGenerator.BlkCreateMode.FALSE);
cg.setMappingSet(partitionMapping);
+ TypedFieldId fieldId = new TypedFieldId.Builder().finalType(expr.getMajorType())
+ .addId(count++)
+ .build();
ClassGenerator.HoldingContainer right = cg.addExpr(
- new ValueVectorReadExpression(new TypedFieldId(expr.getMajorType(), count++)), ClassGenerator.BlkCreateMode.FALSE);
+ new ValueVectorReadExpression(fieldId), ClassGenerator.BlkCreateMode.FALSE);
cg.setMappingSet(mainMapping);
// next we wrap the two comparison sides and add the expression block for the comparison.
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsAggBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsAggBatch.java
index dcb6a2c..c418933 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsAggBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsAggBatch.java
@@ -25,7 +25,6 @@ import org.apache.drill.common.expression.FunctionCallFactory;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.expression.ValueExpressions;
-import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.exception.ClassTransformationException;
import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.exception.SchemaChangeException;
@@ -229,24 +228,16 @@ public class StatisticsAggBatch extends StreamingAggBatch {
}
private boolean isColMinorTypeValid(MaterializedField mf) throws UnsupportedOperationException {
- String mTypeStr = null;
- if (mf.getType().getMinorType() == TypeProtos.MinorType.GENERIC_OBJECT) {
- mTypeStr = "GENERIC OBJECT";
- } else if (mf.getType().getMinorType() == TypeProtos.MinorType.LATE) {
- mTypeStr = "LATE";
- }else if (mf.getType().getMinorType() == TypeProtos.MinorType.LIST) {
- mTypeStr = "LIST";
- } else if (mf.getType().getMinorType() == TypeProtos.MinorType.MAP) {
- mTypeStr = "MAP";
- } else if (mf.getType().getMinorType() == TypeProtos.MinorType.UNION) {
- mTypeStr = "UNION";
- }
- if (mTypeStr != null) {
- return false;
- //throw new UnsupportedOperationException(String.format("Column %s has data-type %s which is not supported",
- // mf.getName(), mTypeStr));
- } else {
- return true;
+ switch (mf.getType().getMinorType()) {
+ case GENERIC_OBJECT:
+ case LATE:
+ case LIST:
+ case MAP:
+ case DICT:
+ case UNION:
+ return false;
+ default:
+ return true;
}
}
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/index/SimpleRexRemap.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/index/SimpleRexRemap.java
index 812c5f6..946b118 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/index/SimpleRexRemap.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/index/SimpleRexRemap.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.planner.index;
+import org.apache.drill.exec.util.Utilities;
import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap;
import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
import org.apache.calcite.rel.RelNode;
@@ -166,17 +167,6 @@ public class SimpleRexRemap {
getFullPath(nameSeg.getChild()));
}
- private static PathSegment convertLiteral(RexLiteral literal) {
- switch (literal.getType().getSqlTypeName()) {
- case CHAR:
- return new PathSegment.NameSegment(RexLiteral.stringValue(literal));
- case INTEGER:
- return new PathSegment.ArraySegment(RexLiteral.intValue(literal));
- default:
- return null;
- }
- }
-
/**
* This class go through the RexNode, collect all the fieldNames, mark starting positions(RexNode) of fields
* so this information can be used later e,.g. replaced with a substitute node
@@ -230,7 +220,7 @@ public class SimpleRexRemap {
if (mapOrArray != null) {
if (call.operands.get(1) instanceof RexLiteral) {
PathSegment newFieldPath = newPath(
- mapOrArray.cloneWithNewChild(convertLiteral((RexLiteral) call.operands.get(1))),
+ mapOrArray.cloneWithNewChild(Utilities.convertLiteral((RexLiteral) call.operands.get(1))),
call);
return newFieldPath;
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchLoader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchLoader.java
index 224ed6f..0b36fac 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchLoader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchLoader.java
@@ -114,12 +114,12 @@ public class RecordBatchLoader implements VectorAccessible, Iterable<VectorWrapp
schemaChanged = true;
vector = TypeHelper.getNewVector(fieldDef, allocator);
- // If the field is a map, check if the map schema changed.
+ // If the field is a map or a dict, check if the schema changed.
- } else if (vector.getField().getType().getMinorType() == MinorType.MAP &&
+ } else if ((vector.getField().getType().getMinorType() == MinorType.MAP || vector.getField().getType().getMinorType() == MinorType.DICT) &&
! isSameSchema(vector.getField().getChildren(), field.getChildList())) {
- // The map schema changed. Discard the old map and create a new one.
+ // The schema changed. Discard the old one and create a new one.
schemaChanged = true;
vector.clear();
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java
index 679f173..3db9814 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java
@@ -37,8 +37,8 @@ import org.apache.drill.exec.vector.UInt4Vector;
import org.apache.drill.exec.vector.UntypedNullVector;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.complex.AbstractMapVector;
+import org.apache.drill.exec.vector.complex.AbstractRepeatedMapVector;
import org.apache.drill.exec.vector.complex.RepeatedListVector;
-import org.apache.drill.exec.vector.complex.RepeatedMapVector;
import org.apache.drill.exec.vector.complex.RepeatedValueVector;
import org.apache.drill.exec.vector.VariableWidthVector;
@@ -326,9 +326,15 @@ public class RecordBatchSizer {
}
public boolean isComplex() {
- return metadata.getType().getMinorType() == MinorType.MAP ||
- metadata.getType().getMinorType() == MinorType.UNION ||
- metadata.getType().getMinorType() == MinorType.LIST;
+ switch (metadata.getType().getMinorType()) {
+ case LIST:
+ case MAP:
+ case DICT:
+ case UNION:
+ return true;
+ default:
+ return false;
+ }
}
public boolean isRepeatedList() {
@@ -457,8 +463,8 @@ public class RecordBatchSizer {
}
private void allocateMap(AbstractMapVector map, int recordCount) {
- if (map instanceof RepeatedMapVector) {
- ((RepeatedMapVector) map).allocateOffsetsNew(recordCount);
+ if (map instanceof AbstractRepeatedMapVector) {
+ ((AbstractRepeatedMapVector) map).allocateOffsetsNew(recordCount);
recordCount *= getEntryCardinalityForAlloc();
}
@@ -761,9 +767,10 @@ public class RecordBatchSizer {
ColumnSize colSize = new ColumnSize(v, prefix);
switch (v.getField().getType().getMinorType()) {
case MAP:
+ case DICT:
// Maps consume no size themselves. However, their contained
// vectors do consume space, so visit columns recursively.
- expandMap(colSize, (AbstractMapVector) v, prefix + v.getField().getName() + ".");
+ expandMap(colSize, v, prefix + v.getField().getName() + ".");
break;
case LIST:
// complex ListVector cannot be casted to RepeatedListVector.
@@ -783,16 +790,15 @@ public class RecordBatchSizer {
return colSize;
}
- private void expandMap(ColumnSize colSize, AbstractMapVector mapVector, String prefix) {
+ private void expandMap(ColumnSize colSize, ValueVector mapVector, String prefix) {
for (ValueVector vector : mapVector) {
colSize.children.put(vector.getField().getName(), measureColumn(vector, prefix));
}
// For a repeated map, we need the memory for the offset vector (only).
// Map elements are recursively expanded above.
-
if (mapVector.getField().getDataMode() == DataMode.REPEATED) {
- ((RepeatedMapVector) mapVector).getOffsetVector().collectLedgers(ledgers);
+ ((RepeatedValueVector) mapVector).getOffsetVector().collectLedgers(ledgers);
}
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/TypedFieldId.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/TypedFieldId.java
index d089115..178ddfa 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/TypedFieldId.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/TypedFieldId.java
@@ -18,6 +18,7 @@
package org.apache.drill.exec.record;
import java.util.Arrays;
+import java.util.BitSet;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.drill.common.expression.PathSegment;
@@ -35,44 +36,46 @@ import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
*/
public class TypedFieldId {
- final MajorType finalType;
- final MajorType secondaryFinal;
- final MajorType intermediateType;
- final int[] fieldIds;
- final boolean isHyperReader;
- final boolean isListVector;
- final PathSegment remainder;
-
- public TypedFieldId(MajorType type, int... fieldIds) {
- this(type, type, type, false, null, fieldIds);
- }
-
- public TypedFieldId(MajorType type, IntArrayList breadCrumb, PathSegment remainder) {
- this(type, type, type, false, remainder, breadCrumb.toArray());
- }
-
- public TypedFieldId(MajorType type, boolean isHyper, int... fieldIds) {
- this(type, type, type, isHyper, null, fieldIds);
- }
- public TypedFieldId(MajorType intermediateType, MajorType secondaryFinal, MajorType finalType, boolean isHyper, PathSegment remainder, int... fieldIds) {
- this(intermediateType, secondaryFinal, finalType, isHyper, false, remainder, fieldIds);
- }
+ private final MajorType finalType;
+ private final MajorType secondaryFinal;
+ private final MajorType intermediateType;
+ private final int[] fieldIds;
+ private final boolean isHyperReader;
+ private final boolean isListVector;
+ private final PathSegment remainder;
- public TypedFieldId(MajorType intermediateType, MajorType secondaryFinal, MajorType finalType, boolean isHyper, boolean isListVector, PathSegment remainder, int... fieldIds) {
- super();
- this.intermediateType = intermediateType;
- this.finalType = finalType;
- this.secondaryFinal = secondaryFinal;
- this.fieldIds = fieldIds;
- this.isHyperReader = isHyper;
- this.isListVector = isListVector;
- this.remainder = remainder;
+ /**
+ * Used to determine if a dict is placed at specific depth
+ */
+ private final BitSet dictBitSet;
+
+ private TypedFieldId(Builder builder) {
+ this.intermediateType = builder.intermediateType;
+ this.finalType = builder.finalType;
+ this.secondaryFinal = builder.secondaryFinal;
+ this.fieldIds = builder.ids.toArray();
+ this.isHyperReader = builder.hyperReader;
+ this.isListVector = builder.isListVector;
+ this.remainder = builder.remainder;
+ this.dictBitSet = builder.dictBitSet;
}
public TypedFieldId cloneWithChild(int id) {
int[] fieldIds = ArrayUtils.add(this.fieldIds, id);
- return new TypedFieldId(intermediateType, secondaryFinal, finalType, isHyperReader, remainder, fieldIds);
+ return getBuilder().clearAndAddIds(fieldIds)
+ .build();
+ }
+
+ private Builder getBuilder() {
+ return new Builder().intermediateType(intermediateType)
+ .finalType(finalType)
+ .secondaryFinal(secondaryFinal)
+ .addIds(fieldIds)
+ .remainder(remainder)
+ .copyDictBitSet(dictBitSet)
+ .hyper(isHyperReader)
+ .listVector(isListVector);
}
public PathSegment getLastSegment() {
@@ -87,7 +90,8 @@ public class TypedFieldId {
}
public TypedFieldId cloneWithRemainder(PathSegment remainder) {
- return new TypedFieldId(intermediateType, secondaryFinal, finalType, isHyperReader, remainder, fieldIds);
+ return getBuilder().remainder(remainder)
+ .build();
}
public boolean hasRemainder() {
@@ -111,6 +115,16 @@ public class TypedFieldId {
}
/**
+ * Check if it is a {@link org.apache.drill.common.types.TypeProtos.MinorType#DICT} type at a given segment's depth
+ *
+ * @param depth depth of interest starting with {@literal 0}
+ * @return {@code true} if it is DICT, {@code false} otherwise
+ */
+ public boolean isDict(int depth) {
+ return dictBitSet.get(depth);
+ }
+
+ /**
* Return the class for the value vector (type, mode).
*
* @return the specific, generated ValueVector subclass that
@@ -118,8 +132,7 @@ public class TypedFieldId {
*/
public Class<? extends ValueVector> getIntermediateClass() {
- return (Class<? extends ValueVector>) BasicTypeHelper.getValueVectorClass(intermediateType.getMinorType(),
- intermediateType.getMode());
+ return BasicTypeHelper.getValueVectorClass(intermediateType.getMinorType(), intermediateType.getMode());
}
public MajorType getFinalType() {
@@ -138,15 +151,16 @@ public class TypedFieldId {
return new Builder();
}
- public static class Builder{
+ public static class Builder {
final IntArrayList ids = new IntArrayList();
MajorType finalType;
MajorType intermediateType;
MajorType secondaryFinal;
PathSegment remainder;
- boolean hyperReader = false;
- boolean withIndex = false;
- boolean isListVector = false;
+ boolean hyperReader;
+ boolean withIndex;
+ boolean isListVector;
+ BitSet dictBitSet = new BitSet();
public Builder addId(int id) {
ids.add(id);
@@ -163,13 +177,13 @@ public class TypedFieldId {
return this;
}
- public Builder hyper() {
- this.hyperReader = true;
+ public Builder hyper(boolean hyper) {
+ this.hyperReader = hyper;
return this;
}
- public Builder listVector() {
- this.isListVector = true;
+ public Builder listVector(boolean listVector) {
+ this.isListVector = listVector;
return this;
}
@@ -188,13 +202,41 @@ public class TypedFieldId {
return this;
}
+ public Builder setDict(int depth) {
+ this.dictBitSet.set(depth, true);
+ return this;
+ }
+
+ public Builder resetDictBitSet() {
+ dictBitSet.clear();
+ return this;
+ }
+
+ private Builder addIds(int... ids) {
+ for (int id : ids) {
+ this.ids.add(id);
+ }
+ return this;
+ }
+
+ private Builder clearAndAddIds(int[] ids) {
+ this.ids.clear();
+ addIds(ids);
+ return this;
+ }
+
+ private Builder copyDictBitSet(BitSet dictBitSet) {
+ this.dictBitSet.or(dictBitSet);
+ return this;
+ }
+
public TypedFieldId build() {
- Preconditions.checkNotNull(intermediateType);
Preconditions.checkNotNull(finalType);
if (intermediateType == null) {
intermediateType = finalType;
}
+
if (secondaryFinal == null) {
secondaryFinal = finalType;
}
@@ -209,7 +251,7 @@ public class TypedFieldId {
// TODO: there is a bug here with some things.
//if(intermediateType != finalType) actualFinalType = finalType.toBuilder().setMode(DataMode.OPTIONAL).build();
- return new TypedFieldId(intermediateType, secondaryFinal, actualFinalType, hyperReader, isListVector, remainder, ids.toArray());
+ return new TypedFieldId(this);
}
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java
index d7f77bf..c2553c2 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java
@@ -251,7 +251,9 @@ public class VectorContainer implements VectorAccessible {
schema = null;
int i = wrappers.size();
wrappers.add(SimpleVectorWrapper.create(vv));
- return new TypedFieldId(vv.getField().getType(), i);
+ return new TypedFieldId.Builder().finalType(vv.getField().getType())
+ .addId(i)
+ .build();
}
public ValueVector getLast() {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorInitializer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorInitializer.java
index 206df8d..83c0142 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorInitializer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorInitializer.java
@@ -24,7 +24,7 @@ import java.util.Map.Entry;
import org.apache.drill.exec.vector.AllocationHelper;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.complex.AbstractMapVector;
-import org.apache.drill.exec.vector.complex.RepeatedMapVector;
+import org.apache.drill.exec.vector.complex.AbstractRepeatedMapVector;
import org.apache.drill.common.map.CaseInsensitiveMap;
import org.apache.drill.shaded.guava.com.google.common.annotations.VisibleForTesting;
@@ -135,8 +135,8 @@ public class VectorInitializer {
}
private void allocateMap(AbstractMapVector map, String prefix, int recordCount, AllocationHint hint) {
- if (map instanceof RepeatedMapVector) {
- ((RepeatedMapVector) map).allocateOffsetsNew(recordCount);
+ if (map instanceof AbstractRepeatedMapVector) {
+ ((AbstractRepeatedMapVector) map).allocateOffsetsNew(recordCount);
if (hint == null) {
recordCount *= 10;
} else {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java
index 247594f..bf0bb22 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java
@@ -78,6 +78,7 @@ public class ResolverTypePrecedence {
precedenceMap.put(MinorType.INTERVALYEAR, i+= 2);
precedenceMap.put(MinorType.INTERVAL, i+= 2);
precedenceMap.put(MinorType.MAP, i += 2);
+ precedenceMap.put(MinorType.DICT, i += 2);
precedenceMap.put(MinorType.LIST, i += 2);
precedenceMap.put(MinorType.UNION, i += 2);
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index d4289fb..9438870 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -294,7 +294,8 @@ public class SystemOptionManager extends BaseOptionManager implements AutoClosea
new OptionDefinition(ExecConstants.METASTORE_USE_STATISTICS_METADATA_VALIDATOR),
new OptionDefinition(ExecConstants.METASTORE_CTAS_AUTO_COLLECT_METADATA_VALIDATOR),
new OptionDefinition(ExecConstants.METASTORE_FALLBACK_TO_FILE_METADATA_VALIDATOR),
- new OptionDefinition(ExecConstants.METASTORE_RETRIVAL_RETRY_ATTEMPTS_VALIDATOR)
+ new OptionDefinition(ExecConstants.METASTORE_RETRIVAL_RETRY_ATTEMPTS_VALIDATOR),
+ new OptionDefinition(ExecConstants.PARQUET_READER_ENABLE_MAP_SUPPORT_VALIDATOR, new OptionMetaData(OptionValue.AccessibleScopes.SYSTEM_AND_SESSION, false, false))
};
CaseInsensitiveMap<OptionDefinition> map = Arrays.stream(definitions)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
index 7b89dae..541ff9c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java
@@ -23,8 +23,8 @@ import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.security.PrivilegedExceptionAction;
-import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
@@ -50,9 +50,11 @@ import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.exec.store.AbstractRecordReader;
import org.apache.drill.exec.store.parquet.ParquetReaderUtility;
import org.apache.drill.exec.util.ImpersonationUtil;
+import org.apache.drill.exec.vector.complex.DictVector;
import org.apache.drill.exec.vector.complex.fn.FieldSelection;
import org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl;
import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;
@@ -207,8 +209,11 @@ public class AvroRecordReader extends AbstractRecordReader {
final GenericArray<?> array = (GenericArray<?>) value;
Schema elementSchema = array.getSchema().getElementType();
Type elementType = elementSchema.getType();
- if (elementType == Schema.Type.RECORD || elementType == Schema.Type.MAP){
+ if (elementType == Schema.Type.RECORD) {
writer = (MapOrListWriterImpl) writer.list(fieldName).listoftmap(fieldName);
+ } else if (elementType == Schema.Type.MAP) {
+ writer = (MapOrListWriterImpl) writer.list(fieldName);
+ writer.listOfDict();
} else {
writer = (MapOrListWriterImpl) writer.list(fieldName);
}
@@ -227,14 +232,22 @@ public class AvroRecordReader extends AbstractRecordReader {
break;
case MAP:
@SuppressWarnings("unchecked")
- final HashMap<Object, Object> map = (HashMap<Object, Object>) value;
+ Map<Object, Object> map = (Map<Object, Object>) value;
+ // key type in Avro MAP is assumed to be string
+ Schema keySchema = Schema.create(Type.STRING);
Schema valueSchema = schema.getValueType();
- writer = (MapOrListWriterImpl) writer.map(fieldName);
- writer.start();
+
+ writer = (MapOrListWriterImpl) writer.dict(fieldName);
+ BaseWriter.DictWriter dictWriter = (BaseWriter.DictWriter) writer.map;
+
+ dictWriter.start();
for (Entry<Object, Object> entry : map.entrySet()) {
- process(entry.getValue(), valueSchema, entry.getKey().toString(), writer, fieldSelection.getChild(entry.getKey().toString()));
+ dictWriter.startKeyValuePair();
+ processPrimitive(entry.getKey(), keySchema, DictVector.FIELD_KEY_NAME, writer);
+ process(entry.getValue(), valueSchema, DictVector.FIELD_VALUE_NAME, writer, fieldSelection.getChild(entry.getKey().toString()));
+ dictWriter.endKeyValuePair();
}
- writer.end();
+ dictWriter.end();
break;
case FIXED:
case ENUM: // Enum symbols are strings
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java
index 7e851a4..6da965d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java
@@ -209,15 +209,16 @@ public abstract class BaseParquetMetadataProvider implements ParquetMetadataProv
if (tableMetadata == null) {
List<StatisticsHolder> tableStatistics = new ArrayList<>(DrillStatsTable.getEstimatedTableStats(statsTable));
Map<SchemaPath, TypeProtos.MajorType> fields = ParquetTableMetadataUtils.resolveFields(parquetTableMetadata);
+ Map<SchemaPath, TypeProtos.MajorType> intermediateFields = ParquetTableMetadataUtils.resolveIntermediateFields(parquetTableMetadata);
if (this.schema == null) {
schema = new TupleSchema();
- fields.forEach((schemaPath, majorType) -> SchemaPathUtils.addColumnMetadata(schema, schemaPath, majorType));
+ fields.forEach((schemaPath, majorType) -> SchemaPathUtils.addColumnMetadata(schema, schemaPath, majorType, intermediateFields));
} else {
// merges specified schema with schema from table
fields.forEach((schemaPath, majorType) -> {
if (SchemaPathUtils.getColumnMetadata(schemaPath, schema) == null) {
- SchemaPathUtils.addColumnMetadata(schema, schemaPath, majorType);
+ SchemaPathUtils.addColumnMetadata(schema, schemaPath, majorType, intermediateFields);
}
});
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
index 709d9d5..26021e3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
@@ -42,6 +42,7 @@ import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
@@ -52,6 +53,7 @@ import org.apache.parquet.example.data.simple.NanoTime;
import org.apache.parquet.io.api.Binary;
import org.joda.time.DateTimeZone;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
@@ -713,4 +715,96 @@ public class ParquetReaderUtility {
}
return false;
}
+
+ /**
+ * Converts list of {@link OriginalType}s to list of {@link org.apache.drill.common.types.TypeProtos.MajorType}s.
+ * <b>NOTE</b>: current implementation cares about {@link OriginalType#MAP} only
+ * converting it to {@link org.apache.drill.common.types.TypeProtos.MinorType#DICT}.
+ * Other original types are converted to {@code null}, because there is no certain correspondence
+ * (and, actually, a need because these types are used to differentiate between Drill's MAP and DICT types
+ * when constructing {@link org.apache.drill.exec.record.metadata.TupleSchema}) between these two.
+ *
+ * @param originalTypes list of Parquet's types
+ * @return list containing either {@code null} or type with minor
+ * type {@link org.apache.drill.common.types.TypeProtos.MinorType#DICT} values
+ */
+ public static List<TypeProtos.MajorType> getComplexTypes(List<OriginalType> originalTypes) {
+ List<TypeProtos.MajorType> result = new ArrayList<>();
+ if (originalTypes == null) {
+ return result;
+ }
+ for (OriginalType type : originalTypes) {
+ if (type == OriginalType.MAP) {
+ TypeProtos.MajorType drillType = TypeProtos.MajorType.newBuilder()
+ .setMinorType(TypeProtos.MinorType.DICT)
+ .setMode(TypeProtos.DataMode.OPTIONAL)
+ .build();
+ result.add(drillType);
+ } else {
+ result.add(null);
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Checks whether group field approximately matches pattern for Logical Lists:
+ * <pre>
+ * <list-repetition> group <name> (LIST) {
+ * repeated group list {
+ * <element-repetition> <element-type> element;
+ * }
+ * }
+ * </pre>
+ * (See for more details: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists)
+ *
+ * Note, that standard field names 'list' and 'element' aren't checked intentionally,
+ * because Hive lists have 'bag' and 'array_element' names instead.
+ *
+ * @param groupType type which may have LIST original type
+ * @return whether the type is LIST and nested field is repeated group
+ * @see <a href="https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists">Parquet List logical type</a>
+ */
+ public static boolean isLogicalListType(GroupType groupType) {
+ if (groupType.getOriginalType() == OriginalType.LIST && groupType.getFieldCount() == 1) {
+ Type nestedField = groupType.getFields().get(0);
+ return nestedField.isRepetition(Type.Repetition.REPEATED)
+ && !nestedField.isPrimitive()
+ && nestedField.getOriginalType() == null
+ && nestedField.asGroupType().getFieldCount() == 1;
+ }
+ return false;
+ }
+
+ /**
+ * Checks whether group field matches pattern for Logical Map type:
+ *
+ * <pre>
+ * <map-repetition> group <name> (MAP) {
+ * repeated group key_value {
+ * required <key-type> key;
+ * <value-repetition> <value-type> value;
+ * }
+ * }
+ * </pre>
+ *
+ * Note, that actual group names are not checked specifically.
+ *
+ * @param groupType parquet type which may be of MAP type
+ * @return whether the type is MAP
+ * @see <a href="https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps">Parquet Map logical type</a>
+ */
+ public static boolean isLogicalMapType(GroupType groupType) {
+ OriginalType type = groupType.getOriginalType();
+ // MAP_KEY_VALUE is here for backward-compatibility reasons
+ if ((type == OriginalType.MAP || type == OriginalType.MAP_KEY_VALUE)
+ && groupType.getFieldCount() == 1) {
+ Type nestedField = groupType.getFields().get(0);
+ return nestedField.isRepetition(Type.Repetition.REPEATED)
+ && !nestedField.isPrimitive()
+ && nestedField.asGroupType().getFieldCount() == 2;
+ }
+ return false;
+ }
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
index 999fdcf..9541006 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
@@ -22,6 +22,7 @@ import static java.lang.Math.max;
import static java.lang.Math.min;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -78,7 +79,6 @@ import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
-import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
import org.apache.parquet.schema.Types.ListBuilder;
public class ParquetRecordWriter extends ParquetOutputRecordWriter {
@@ -89,6 +89,12 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 10000;
private static final int BLOCKSIZE_MULTIPLE = 64 * 1024;
+ /**
+ * Name of nested group for Parquet's {@code MAP} type.
+ * @see <a href="https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps">MAP logical type</a>
+ */
+ private static final String GROUP_KEY_VALUE_NAME = "key_value";
+
public static final String DRILL_VERSION_PROPERTY = "drill.version";
public static final String WRITER_VERSION_PROPERTY = "drill-writer.version";
@@ -138,7 +144,7 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
this.extraMetaData.put(DRILL_VERSION_PROPERTY, DrillVersionInfo.getVersion());
this.extraMetaData.put(WRITER_VERSION_PROPERTY, String.valueOf(ParquetWriter.WRITER_VERSION));
this.storageStrategy = writer.getStorageStrategy() == null ? StorageStrategy.DEFAULT : writer.getStorageStrategy();
- this.cleanUpLocations = Lists.newArrayList();
+ this.cleanUpLocations = new ArrayList<>();
this.conf = new Configuration(writer.getFormatPlugin().getFsConf());
}
@@ -200,6 +206,7 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
MinorType type = field.getType().getMinorType();
switch (type) {
case MAP:
+ case DICT:
case LIST:
return true;
default:
@@ -225,7 +232,7 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
}
private void newSchema() throws IOException {
- List<Type> types = Lists.newArrayList();
+ List<Type> types = new ArrayList<>();
for (MaterializedField field : batchSchema) {
if (field.getName().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD)) {
continue;
@@ -295,6 +302,31 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
case MAP:
List<Type> types = getChildrenTypes(field);
return new GroupType(dataMode == DataMode.REPEATED ? Repetition.REPEATED : Repetition.OPTIONAL, field.getName(), types);
+ case DICT:
+ // RepeatedDictVector has DictVector as data vector hence the need to get the first child
+ // for REPEATED case to be able to access map's key and value fields
+ MaterializedField dictField = dataMode != DataMode.REPEATED
+ ? field : ((List<MaterializedField>) field.getChildren()).get(0);
+ List<Type> keyValueTypes = getChildrenTypes(dictField);
+
+ GroupType keyValueGroup = new GroupType(Repetition.REPEATED, GROUP_KEY_VALUE_NAME, keyValueTypes);
+ if (dataMode == DataMode.REPEATED) {
+ // Parquet's MAP repetition must be either optional or required, so nest it inside Parquet's LIST type
+ GroupType elementType = org.apache.parquet.schema.Types.buildGroup(Repetition.OPTIONAL)
+ .as(OriginalType.MAP)
+ .addField(keyValueGroup)
+ .named(LIST);
+ GroupType listGroup = new GroupType(Repetition.REPEATED, LIST, elementType);
+ return org.apache.parquet.schema.Types.buildGroup(Repetition.OPTIONAL)
+ .as(OriginalType.LIST)
+ .addField(listGroup)
+ .named(field.getName());
+ } else {
+ return org.apache.parquet.schema.Types.buildGroup(Repetition.OPTIONAL)
+ .as(OriginalType.MAP)
+ .addField(keyValueGroup)
+ .named(field.getName());
+ }
case LIST:
MaterializedField elementField = getDataField(field);
ListBuilder<GroupType> listBuilder = org.apache.parquet.schema.Types
@@ -442,7 +474,7 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
}
public class MapParquetConverter extends FieldConverter {
- List<FieldConverter> converters = Lists.newArrayList();
+ List<FieldConverter> converters = new ArrayList<>();
public MapParquetConverter(int fieldId, String fieldName, FieldReader reader) {
super(fieldId, fieldName, reader);
@@ -471,7 +503,7 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
}
public class RepeatedMapParquetConverter extends FieldConverter {
- List<FieldConverter> converters = Lists.newArrayList();
+ List<FieldConverter> converters = new ArrayList<>();
public RepeatedMapParquetConverter(int fieldId, String fieldName, FieldReader reader) {
super(fieldId, fieldName, reader);
@@ -551,6 +583,79 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
}
@Override
+ public FieldConverter getNewDictConverter(int fieldId, String fieldName, FieldReader reader) {
+ return new DictParquetConverter(fieldId, fieldName, reader);
+ }
+
+ public class DictParquetConverter extends FieldConverter {
+ List<FieldConverter> converters = new ArrayList<>();
+
+ public DictParquetConverter(int fieldId, String fieldName, FieldReader reader) {
+ super(fieldId, fieldName, reader);
+ int i = 0;
+ for (String name : reader) {
+ FieldConverter converter = EventBasedRecordWriter.getConverter(
+ ParquetRecordWriter.this, i++, name, reader.reader(name));
+ converters.add(converter);
+ }
+ }
+
+ @Override
+ public void writeField() throws IOException {
+ if (reader.size() == 0) {
+ return;
+ }
+
+ consumer.startField(fieldName, fieldId);
+ consumer.startGroup();
+ consumer.startField(GROUP_KEY_VALUE_NAME, 0);
+ while (reader.next()) {
+ consumer.startGroup();
+ for (FieldConverter converter : converters) {
+ converter.writeField();
+ }
+ consumer.endGroup();
+ }
+ consumer.endField(GROUP_KEY_VALUE_NAME, 0);
+ consumer.endGroup();
+ consumer.endField(fieldName, fieldId);
+ }
+ }
+
+ @Override
+ public FieldConverter getNewRepeatedDictConverter(int fieldId, String fieldName, FieldReader reader) {
+ return new RepeatedDictParquetConverter(fieldId, fieldName, reader);
+ }
+
+ public class RepeatedDictParquetConverter extends FieldConverter {
+ private final FieldConverter dictConverter;
+
+ public RepeatedDictParquetConverter(int fieldId, String fieldName, FieldReader reader) {
+ super(fieldId, fieldName, reader);
+ dictConverter = new DictParquetConverter(0, ELEMENT, reader.reader());
+ }
+
+ @Override
+ public void writeField() throws IOException {
+ if (reader.size() == 0) {
+ return;
+ }
+
+ consumer.startField(fieldName, fieldId);
+ consumer.startGroup();
+ consumer.startField(LIST, 0);
+ while (reader.next()) {
+ consumer.startGroup();
+ dictConverter.writeField();
+ consumer.endGroup();
+ }
+ consumer.endField(LIST, 0);
+ consumer.endGroup();
+ consumer.endField(fieldName, fieldId);
+ }
+ }
+
+ @Override
public void startRecord() throws IOException {
consumer.startMessage();
}
@@ -571,7 +676,7 @@ public class ParquetRecordWriter extends ParquetOutputRecordWriter {
@Override
public void abort() throws IOException {
- List<String> errors = Lists.newArrayList();
+ List<String> errors = new ArrayList<>();
for (Path location : cleanUpLocations) {
try {
if (fs.exists(location)) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetTableMetadataUtils.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetTableMetadataUtils.java
index 92ea0c6..c5b6f58 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetTableMetadataUtils.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetTableMetadataUtils.java
@@ -158,9 +158,12 @@ public class ParquetTableMetadataUtils {
rowGroupStatistics.add(new StatisticsHolder<>(rowGroupMetadata.getLength(), new BaseStatisticsKind(ExactStatisticsConstants.LENGTH, true)));
Map<SchemaPath, TypeProtos.MajorType> columns = getRowGroupFields(tableMetadata, rowGroupMetadata);
+ Map<SchemaPath, TypeProtos.MajorType> intermediateColumns = getIntermediateFields(tableMetadata, rowGroupMetadata);
TupleSchema schema = new TupleSchema();
- columns.forEach((schemaPath, majorType) -> SchemaPathUtils.addColumnMetadata(schema, schemaPath, majorType));
+ columns.forEach(
+ (schemaPath, majorType) -> SchemaPathUtils.addColumnMetadata(schema, schemaPath, majorType, intermediateColumns)
+ );
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.ROW_GROUP).build();
@@ -379,14 +382,8 @@ public class ParquetTableMetadataUtils {
}
private static Integer getInt(Object value) {
- if (value instanceof Integer) {
- return (Integer) value;
- } else if (value instanceof Long) {
- return ((Long) value).intValue();
- } else if (value instanceof Float) {
- return ((Float) value).intValue();
- } else if (value instanceof Double) {
- return ((Double) value).intValue();
+ if (value instanceof Number) {
+ return ((Number) value).intValue();
} else if (value instanceof String) {
return Integer.parseInt(value.toString());
} else if (value instanceof byte[]) {
@@ -398,14 +395,8 @@ public class ParquetTableMetadataUtils {
}
private static Long getLong(Object value) {
- if (value instanceof Integer) {
- return Long.valueOf((Integer) value);
- } else if (value instanceof Long) {
- return (Long) value;
- } else if (value instanceof Float) {
- return ((Float) value).longValue();
- } else if (value instanceof Double) {
- return ((Double) value).longValue();
+ if (value instanceof Number) {
+ return ((Number) value).longValue();
} else if (value instanceof String) {
return Long.parseLong(value.toString());
} else if (value instanceof byte[]) {
@@ -417,14 +408,8 @@ public class ParquetTableMetadataUtils {
}
private static Float getFloat(Object value) {
- if (value instanceof Integer) {
- return Float.valueOf((Integer) value);
- } else if (value instanceof Long) {
- return Float.valueOf((Long) value);
- } else if (value instanceof Float) {
- return (Float) value;
- } else if (value instanceof Double) {
- return ((Double) value).floatValue();
+ if (value instanceof Number) {
+ return ((Number) value).floatValue();
} else if (value instanceof String) {
return Float.parseFloat(value.toString());
}
@@ -438,14 +423,8 @@ public class ParquetTableMetadataUtils {
}
private static Double getDouble(Object value) {
- if (value instanceof Integer) {
- return Double.valueOf((Integer) value);
- } else if (value instanceof Long) {
- return Double.valueOf((Long) value);
- } else if (value instanceof Float) {
- return Double.valueOf((Float) value);
- } else if (value instanceof Double) {
- return (Double) value;
+ if (value instanceof Number) {
+ return ((Number) value).doubleValue();
} else if (value instanceof String) {
return Double.parseDouble(value.toString());
}
@@ -516,14 +495,45 @@ public class ParquetTableMetadataUtils {
.build();
SchemaPath columnPath = SchemaPath.getCompoundPath(column.getName());
- TypeProtos.MajorType majorType = columns.get(columnPath);
- if (majorType == null) {
- columns.put(columnPath, columnType);
- } else {
- TypeProtos.MinorType leastRestrictiveType = TypeCastRules.getLeastRestrictiveType(Arrays.asList(majorType.getMinorType(), columnType.getMinorType()));
- if (leastRestrictiveType != majorType.getMinorType()) {
- columns.put(columnPath, columnType);
- }
+ putType(columns, columnPath, columnType);
+ }
+ return columns;
+ }
+
+ /**
+ * Returns map of column names with their Drill types for every {@code NameSegment} in {@code SchemaPath}
+ * in specified {@code rowGroup}. The type for a {@code SchemaPath} can be {@code null} in case when
+ * it is not possible to determine its type. Actually, as of now this hierarchy is of interest solely
+ * because there is a need to account for {@link org.apache.drill.common.types.TypeProtos.MinorType#DICT}
+ * to make sure filters used on {@code DICT}'s values (get by key) are not pruned out before actual filtering
+ * happens.
+ *
+ * @param parquetTableMetadata the source of column types
+ * @param rowGroup row group whose columns should be discovered
+ * @return map of column names with their drill types
+ */
+ public static Map<SchemaPath, TypeProtos.MajorType> getIntermediateFields(
+ MetadataBase.ParquetTableMetadataBase parquetTableMetadata, MetadataBase.RowGroupMetadata rowGroup) {
+ Map<SchemaPath, TypeProtos.MajorType> columns = new LinkedHashMap<>();
+
+ MetadataVersion metadataVersion = new MetadataVersion(parquetTableMetadata.getMetadataVersion());
+ boolean hasParentTypes = parquetTableMetadata.hasColumnMetadata()
+ && metadataVersion.compareTo(new MetadataVersion(4, 1)) >= 0;
+
+ if (!hasParentTypes) {
+ return Collections.emptyMap();
+ }
+
+ for (MetadataBase.ColumnMetadata column : rowGroup.getColumns()) {
+ Metadata_V4.ColumnTypeMetadata_v4 columnTypeMetadata =
+ ((Metadata_V4.ParquetTableMetadata_v4) parquetTableMetadata).getColumnTypeInfo(column.getName());
+ List<OriginalType> parentTypes = columnTypeMetadata.parentTypes;
+ List<TypeProtos.MajorType> drillTypes = ParquetReaderUtility.getComplexTypes(parentTypes);
+
+ for (int i = 0; i < drillTypes.size(); i++) {
+ SchemaPath columnPath = SchemaPath.getCompoundPath(i + 1, column.getName());
+ TypeProtos.MajorType drillType = drillTypes.get(i);
+ putType(columns, columnPath, drillType);
}
}
return columns;
@@ -569,23 +579,34 @@ public class ParquetTableMetadataUtils {
* @return map of column names with their drill types
*/
static Map<SchemaPath, TypeProtos.MajorType> resolveFields(MetadataBase.ParquetTableMetadataBase parquetTableMetadata) {
- LinkedHashMap<SchemaPath, TypeProtos.MajorType> columns = new LinkedHashMap<>();
+ Map<SchemaPath, TypeProtos.MajorType> columns = new LinkedHashMap<>();
for (MetadataBase.ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
// row groups in the file have the same schema, so using the first one
Map<SchemaPath, TypeProtos.MajorType> fileColumns = getFileFields(parquetTableMetadata, file);
- fileColumns.forEach((columnPath, type) -> {
- TypeProtos.MajorType majorType = columns.get(columnPath);
- if (majorType == null) {
- columns.put(columnPath, type);
- } else {
- TypeProtos.MinorType leastRestrictiveType = TypeCastRules.getLeastRestrictiveType(Arrays.asList(majorType.getMinorType(), type.getMinorType()));
- if (leastRestrictiveType != majorType.getMinorType()) {
- columns.put(columnPath, type);
- }
- }
- });
+ fileColumns.forEach((columnPath, type) -> putType(columns, columnPath, type));
}
return columns;
}
+ static Map<SchemaPath, TypeProtos.MajorType> resolveIntermediateFields(MetadataBase.ParquetTableMetadataBase parquetTableMetadata) {
+ Map<SchemaPath, TypeProtos.MajorType> columns = new LinkedHashMap<>();
+ for (MetadataBase.ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
+ // row groups in the file have the same schema, so using the first one
+ Map<SchemaPath, TypeProtos.MajorType> fileColumns = getIntermediateFields(parquetTableMetadata, file.getRowGroups().iterator().next());
+ fileColumns.forEach((columnPath, type) -> putType(columns, columnPath, type));
+ }
+ return columns;
+ }
+
+ private static void putType(Map<SchemaPath, TypeProtos.MajorType> columns, SchemaPath columnPath, TypeProtos.MajorType type) {
+ TypeProtos.MajorType majorType = columns.get(columnPath);
+ if (majorType == null) {
+ columns.put(columnPath, type);
+ } else if (!majorType.equals(type)) {
+ TypeProtos.MinorType leastRestrictiveType = TypeCastRules.getLeastRestrictiveType(Arrays.asList(majorType.getMinorType(), type.getMinorType()));
+ if (leastRestrictiveType != majorType.getMinorType()) {
+ columns.put(columnPath, type);
+ }
+ }
+ }
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetWriter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetWriter.java
index aea3218..2bad852 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetWriter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetWriter.java
@@ -50,7 +50,7 @@ public class ParquetWriter extends AbstractWriter {
* Newer readers must be able to read old files. The Writer version tells the Parquet reader how to interpret fields
* or metadata when that data changes format from one writer version to another.
*/
- public static final int WRITER_VERSION = 2;
+ public static final int WRITER_VERSION = 3;
private final String location;
private final List<String> partitionColumns;
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/FileMetadataCollector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/FileMetadataCollector.java
index bc313f9..781a868 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/FileMetadataCollector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/FileMetadataCollector.java
@@ -92,7 +92,7 @@ public class FileMetadataCollector {
this.colTypeInfoMap = new HashMap<>();
for (String[] path : schema.getPaths()) {
- colTypeInfoMap.put(SchemaPath.getCompoundPath(path), ColTypeInfo.of(schema, schema, path, 0));
+ colTypeInfoMap.put(SchemaPath.getCompoundPath(path), ColTypeInfo.of(schema, schema, path, 0, new ArrayList<>()));
}
init();
@@ -173,10 +173,18 @@ public class FileMetadataCollector {
ColTypeInfo colTypeInfo = colTypeInfoMap.get(columnSchemaName);
long totalNullCount = stats.getNumNulls();
- Metadata_V4.ColumnTypeMetadata_v4 columnTypeMetadata = new Metadata_V4.ColumnTypeMetadata_v4(
- columnName, primitiveTypeName,
- colTypeInfo.originalType, colTypeInfo.precision, colTypeInfo.scale,
- colTypeInfo.repetitionLevel, colTypeInfo.definitionLevel, 0, false);
+ Metadata_V4.ColumnTypeMetadata_v4 columnTypeMetadata = new Metadata_V4.ColumnTypeMetadata_v4.Builder()
+ .name(columnName)
+ .primitiveType(primitiveTypeName)
+ .originalType(colTypeInfo.originalType)
+ .precision(colTypeInfo.precision)
+ .scale(colTypeInfo.scale)
+ .repetitionLevel(colTypeInfo.repetitionLevel)
+ .definitionLevel(colTypeInfo.definitionLevel)
+ .totalNullCount(0)
+ .interesting(false)
+ .parentTypes(colTypeInfo.parentTypes)
+ .build();
Metadata_V4.ColumnTypeMetadata_v4.Key columnTypeMetadataKey = new Metadata_V4.ColumnTypeMetadata_v4.Key(columnTypeMetadata.name);
totalNullCountMap.putIfAbsent(columnTypeMetadataKey, Metadata.DEFAULT_NULL_COUNT);
@@ -201,7 +209,7 @@ public class FileMetadataCollector {
}
long numNulls = stats.getNumNulls();
Metadata_V4.ColumnMetadata_v4 columnMetadata = new Metadata_V4.ColumnMetadata_v4(columnTypeMetadata.name,
- primitiveTypeName, minValue, maxValue, numNulls);
+ primitiveTypeName, minValue, maxValue, numNulls);
columnMetadataList.add(columnMetadata);
columnTypeMetadata.isInteresting = true;
}
@@ -240,20 +248,23 @@ public class FileMetadataCollector {
private static class ColTypeInfo {
OriginalType originalType;
+ List<OriginalType> parentTypes;
int precision;
int scale;
int repetitionLevel;
int definitionLevel;
- ColTypeInfo(OriginalType originalType, int precision, int scale, int repetitionLevel, int definitionLevel) {
+ ColTypeInfo(OriginalType originalType, List<OriginalType> parentTypes,
+ int precision, int scale, int repetitionLevel, int definitionLevel) {
this.originalType = originalType;
+ this.parentTypes = parentTypes;
this.precision = precision;
this.scale = scale;
this.repetitionLevel = repetitionLevel;
this.definitionLevel = definitionLevel;
}
- static ColTypeInfo of(MessageType schema, Type type, String[] path, int depth) {
+ static ColTypeInfo of(MessageType schema, Type type, String[] path, int depth, List<OriginalType> parentTypes) {
if (type.isPrimitive()) {
PrimitiveType primitiveType = (PrimitiveType) type;
int precision = 0;
@@ -266,10 +277,17 @@ public class FileMetadataCollector {
int repetitionLevel = schema.getMaxRepetitionLevel(path);
int definitionLevel = schema.getMaxDefinitionLevel(path);
- return new ColTypeInfo(type.getOriginalType(), precision, scale, repetitionLevel, definitionLevel);
+ return new ColTypeInfo(type.getOriginalType(), parentTypes, precision, scale, repetitionLevel, definitionLevel);
}
Type t = ((GroupType) type).getType(path[depth]);
- return of(schema, t, path, depth + 1);
+ if (!t.isPrimitive()) {
+ OriginalType originalType = t.getOriginalType();
+ if (originalType == OriginalType.MAP && !ParquetReaderUtility.isLogicalMapType(t.asGroupType())) {
+ originalType = null;
+ }
+ parentTypes.add(originalType);
+ }
+ return of(schema, t, path, depth + 1, parentTypes);
}
}
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/MetadataVersion.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/MetadataVersion.java
index 27a6ebc..46e4c57 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/MetadataVersion.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/MetadataVersion.java
@@ -145,10 +145,16 @@ public class MetadataVersion implements Comparable<MetadataVersion> {
*/
public static final String V3_3 = "3.3";
- /**
- * Version 4.0: Split the metadata cache file into summary and file metadata
- */
- public static final String V4 = "4.0";
+ /**
+ * Version 4.0: Split the metadata cache file into summary and file metadata
+ */
+ public static final String V4 = "4.0";
+
+ /**
+ * Version 4.1: Added parents' original types in {@link Metadata_V4.ColumnTypeMetadata_v4}
+ * and {@link Metadata_V4.ColumnMetadata_v4}
+ */
+ public static final String V4_1 = "4.1";
/**
* All historical versions of the Drill metadata cache files. In case of introducing a new parquet metadata version
@@ -161,7 +167,8 @@ public class MetadataVersion implements Comparable<MetadataVersion> {
new MetadataVersion(V3_1),
new MetadataVersion(V3_2),
new MetadataVersion(V3_3),
- new MetadataVersion(V4)
+ new MetadataVersion(V4),
+ new MetadataVersion(V4_1)
);
/**
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata_V4.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata_V4.java
index e909e9a..c331a70 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata_V4.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata_V4.java
@@ -27,6 +27,7 @@ import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@@ -292,6 +293,8 @@ public class Metadata_V4 {
@JsonProperty
public OriginalType originalType;
@JsonProperty
+ public List<OriginalType> parentTypes;
+ @JsonProperty
public int precision;
@JsonProperty
public int scale;
@@ -311,17 +314,18 @@ public class Metadata_V4 {
public ColumnTypeMetadata_v4() {
}
- public ColumnTypeMetadata_v4(String[] name, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, int precision, int scale, int repetitionLevel, int definitionLevel, long totalNullCount, boolean isInteresting) {
- this.name = name;
- this.primitiveType = primitiveType;
- this.originalType = originalType;
- this.precision = precision;
- this.scale = scale;
- this.repetitionLevel = repetitionLevel;
- this.definitionLevel = definitionLevel;
+ private ColumnTypeMetadata_v4(Builder builder) {
+ this.name = builder.name;
+ this.primitiveType = builder.primitiveType;
+ this.originalType = builder.originalType;
+ this.precision = builder.precision;
+ this.scale = builder.scale;
+ this.repetitionLevel = builder.repetitionLevel;
+ this.definitionLevel = builder.definitionLevel;
this.key = new Key(name);
- this.totalNullCount = totalNullCount;
- this.isInteresting = isInteresting;
+ this.totalNullCount = builder.totalNullCount;
+ this.isInteresting = builder.isInteresting;
+ this.parentTypes = Collections.unmodifiableList(builder.parentTypes);
}
@JsonIgnore
@@ -393,6 +397,74 @@ public class Metadata_V4 {
public String[] getName() {
return name;
}
+
+ public static class Builder {
+
+ private String[] name;
+ private PrimitiveType.PrimitiveTypeName primitiveType;
+ private OriginalType originalType;
+ private List<OriginalType> parentTypes;
+ private int precision;
+ private int scale;
+ private int repetitionLevel;
+ private int definitionLevel;
+ private long totalNullCount;
+ private boolean isInteresting;
+
+ public Builder name(String[] name) {
+ this.name = name;
+ return this;
+ }
+
+ public Builder primitiveType(PrimitiveType.PrimitiveTypeName primitiveType) {
+ this.primitiveType = primitiveType;
+ return this;
+ }
+
+ public Builder originalType(OriginalType originalType) {
+ this.originalType = originalType;
+ return this;
+ }
+
+ public Builder parentTypes(List<OriginalType> parentTypes) {
+ this.parentTypes = parentTypes;
+ return this;
+ }
+
+ public Builder precision(int precision) {
+ this.precision = precision;
+ return this;
+ }
+
+ public Builder scale(int scale) {
+ this.scale = scale;
+ return this;
+ }
+
+ public Builder repetitionLevel(int repetitionLevel) {
+ this.repetitionLevel = repetitionLevel;
+ return this;
+ }
+
+ public Builder definitionLevel(int definitionLevel) {
+ this.definitionLevel = definitionLevel;
+ return this;
+ }
+
+ public Builder totalNullCount(long totalNullCount) {
+ this.totalNullCount = totalNullCount;
+ return this;
+ }
+
+ public Builder interesting(boolean isInteresting) {
+ this.isInteresting = isInteresting;
+ return this;
+ }
+
+ public ColumnTypeMetadata_v4 build() {
+ return new ColumnTypeMetadata_v4(this);
+ }
+ }
}
/**
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetGroupConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetGroupConverter.java
index 924f611..fbe3ae3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetGroupConverter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetGroupConverter.java
@@ -46,10 +46,12 @@ import org.apache.drill.exec.physical.impl.OutputMutator;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.exec.store.parquet.ParquetReaderUtility;
import org.apache.drill.exec.store.parquet.columnreaders.ParquetColumnMetadata;
-import org.apache.drill.exec.vector.complex.impl.RepeatedMapWriter;
+import org.apache.drill.exec.vector.complex.impl.AbstractRepeatedMapWriter;
import org.apache.drill.exec.vector.complex.impl.SingleMapWriter;
import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.DictWriter;
import org.apache.drill.exec.vector.complex.writer.BigIntWriter;
import org.apache.drill.exec.vector.complex.writer.BitWriter;
import org.apache.drill.exec.vector.complex.writer.DateWriter;
@@ -69,7 +71,6 @@ import org.apache.parquet.io.api.Converter;
import org.apache.parquet.io.api.GroupConverter;
import org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
@@ -80,7 +81,8 @@ import static org.apache.drill.exec.store.parquet.ParquetReaderUtility.NanoTimeU
public class DrillParquetGroupConverter extends GroupConverter {
- private final List<Converter> converters;
+ protected final List<Converter> converters;
+
private final BaseWriter baseWriter;
private final OutputMutator mutator;
private final OptionManager options;
@@ -91,7 +93,24 @@ public class DrillParquetGroupConverter extends GroupConverter {
* Debugging information in form of "parent">fieldName[WriterClassName-hashCode()],
* where "parent" is parent converterName.
*/
- private final String converterName;
+ private String converterName;
+
+ /**
+ * Constructor is responsible for creation of converter without creation of child converters.
+ *
+ * @param mutator output mutator, used to share managed buffer with primitive converters
+ * @param baseWriter map or list writer associated with the group converter
+ * @param options option manager used to check enabled option when necessary
+ * @param containsCorruptedDates allows to select strategy for dates handling
+ */
+ protected DrillParquetGroupConverter(OutputMutator mutator, BaseWriter baseWriter, OptionManager options,
+ ParquetReaderUtility.DateCorruptionStatus containsCorruptedDates) {
+ this.mutator = mutator;
+ this.baseWriter = baseWriter;
+ this.options = options;
+ this.containsCorruptedDates = containsCorruptedDates;
+ converters = new ArrayList<>();
+ }
/**
* The constructor is responsible for creation of converters tree and may invoke itself for
@@ -112,12 +131,8 @@ public class DrillParquetGroupConverter extends GroupConverter {
Collection<SchemaPath> columns, OptionManager options,
ParquetReaderUtility.DateCorruptionStatus containsCorruptedDates,
boolean skipRepeated, String parentName) {
+ this(mutator, baseWriter, options, containsCorruptedDates);
this.converterName = String.format("%s>%s[%s-%d]", parentName, schema.getName(), baseWriter.getClass().getSimpleName(), baseWriter.hashCode());
- this.baseWriter = baseWriter;
- this.mutator = mutator;
- this.containsCorruptedDates = containsCorruptedDates;
- this.converters = new ArrayList<>();
- this.options = options;
Iterator<SchemaPath> colIterator = columns.iterator();
@@ -129,9 +144,7 @@ public class DrillParquetGroupConverter extends GroupConverter {
while (colIterator.hasNext()) {
PathSegment colPath = colIterator.next().getRootSegment();
String colPathName;
- if (colPath.isNamed() &&
- !DYNAMIC_STAR.equals(colPathName = colPath.getNameSegment().getPath()) &&
- colPathName.equalsIgnoreCase(name)) {
+ if (colPath.isNamed() && !DYNAMIC_STAR.equals(colPathName = colPath.getNameSegment().getPath()) && colPathName.equalsIgnoreCase(name)) {
name = colPathName;
colNextChild = colPath.getChild();
break;
@@ -158,10 +171,15 @@ public class DrillParquetGroupConverter extends GroupConverter {
BaseWriter writer;
GroupType fieldGroupType = fieldType.asGroupType();
- if (isLogicalListType(fieldGroupType)) {
- writer = getWriter(name, (m, s) -> m.list(s), l -> l.list());
+ if (ParquetReaderUtility.isLogicalListType(fieldGroupType)) {
+ writer = getWriter(name, MapWriter::list, ListWriter::list);
converter = new DrillParquetGroupConverter(mutator, writer, fieldGroupType, columns, options,
containsCorruptedDates, true, converterName);
+ } else if (options.getOption(ExecConstants.PARQUET_READER_ENABLE_MAP_SUPPORT_VALIDATOR)
+ && ParquetReaderUtility.isLogicalMapType(fieldGroupType)) {
+ writer = getWriter(name, MapWriter::dict, ListWriter::dict);
+ converter = new DrillParquetMapGroupConverter(
+ mutator, (DictWriter) writer, fieldGroupType, options, containsCorruptedDates);
} else if (fieldType.isRepetition(Repetition.REPEATED)) {
if (skipRepeated) {
converter = new DrillIntermediateParquetGroupConverter(mutator, baseWriter, fieldGroupType, columns, options,
@@ -172,7 +190,7 @@ public class DrillParquetGroupConverter extends GroupConverter {
containsCorruptedDates, false, converterName);
}
} else {
- writer = getWriter(name, (m, s) -> m.map(s), l -> l.map());
+ writer = getWriter(name, MapWriter::map, ListWriter::map);
converter = new DrillParquetGroupConverter(mutator, writer, fieldGroupType, columns, options,
containsCorruptedDates, false, converterName);
}
@@ -181,33 +199,7 @@ public class DrillParquetGroupConverter extends GroupConverter {
return converter;
}
- /**
- * Checks whether group field approximately matches pattern for Logical Lists:
- * <list-repetition> group <name> (LIST) {
- * repeated group list {
- * <element-repetition> <element-type> element;
- * }
- * }
- * (See for more details: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists)
- *
- * Note, that standard field names 'list' and 'element' aren't checked intentionally,
- * because Hive lists have 'bag' and 'array_element' names instead.
- *
- * @param groupType type which may have LIST original type
- * @return whether the type is LIST and nested field is repeated group
- */
- private boolean isLogicalListType(GroupType groupType) {
- if (groupType.getOriginalType() == OriginalType.LIST && groupType.getFieldCount() == 1) {
- Type nestedField = groupType.getFields().get(0);
- return nestedField.isRepetition(Repetition.REPEATED)
- && !nestedField.isPrimitive()
- && nestedField.getOriginalType() == null
- && nestedField.asGroupType().getFieldCount() == 1;
- }
- return false;
- }
-
- private PrimitiveConverter getConverterForType(String name, PrimitiveType type) {
+ protected PrimitiveConverter getConverterForType(String name, PrimitiveType type) {
switch(type.getPrimitiveTypeName()) {
case INT32: {
if (type.getOriginalType() == null) {
@@ -321,9 +313,7 @@ public class DrillParquetGroupConverter extends GroupConverter {
return new DrillVarBinaryConverter(writer, mutator.getManagedBuffer());
}
switch(type.getOriginalType()) {
- case UTF8: {
- return getVarCharConverter(name, type);
- }
+ case UTF8:
case ENUM: {
return getVarCharConverter(name, type);
}
@@ -404,19 +394,24 @@ public class DrillParquetGroupConverter extends GroupConverter {
@Override
public void start() {
- if (baseWriter instanceof SingleMapWriter || baseWriter instanceof RepeatedMapWriter) {
+ if (isMapWriter()) {
((MapWriter) baseWriter).start();
} else {
- ((BaseWriter.ListWriter) baseWriter).startList();
+ ((ListWriter) baseWriter).startList();
}
}
+ boolean isMapWriter() {
+ return baseWriter instanceof SingleMapWriter
+ || baseWriter instanceof AbstractRepeatedMapWriter;
+ }
+
@Override
public void end() {
- if (baseWriter instanceof SingleMapWriter || baseWriter instanceof RepeatedMapWriter) {
+ if (isMapWriter()) {
((MapWriter) baseWriter).end();
} else {
- ((BaseWriter.ListWriter) baseWriter).endList();
+ ((ListWriter) baseWriter).endList();
}
}
@@ -426,10 +421,10 @@ public class DrillParquetGroupConverter extends GroupConverter {
}
private <T> T getWriter(String name, BiFunction<MapWriter, String, T> fromMap, Function<BaseWriter.ListWriter, T> fromList) {
- if (baseWriter instanceof SingleMapWriter || baseWriter instanceof RepeatedMapWriter) {
+ if (isMapWriter()) {
return fromMap.apply((MapWriter) baseWriter, name);
- } else if (baseWriter instanceof BaseWriter.ListWriter) {
- return fromList.apply((BaseWriter.ListWriter) baseWriter);
+ } else if (baseWriter instanceof ListWriter) {
+ return fromList.apply((ListWriter) baseWriter);
} else {
throw new IllegalStateException(String.format("Parent writer with type [%s] is unsupported", baseWriter.getClass()));
}
@@ -744,7 +739,10 @@ public class DrillParquetGroupConverter extends GroupConverter {
super(mutator, baseWriter, schema, columns, options, containsCorruptedDates, skipRepeated, parentName);
}
+ @Override
public void start() {}
+
+ @Override
public void end() {}
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetMapGroupConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetMapGroupConverter.java
new file mode 100644
index 0000000..b2d37c2
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetMapGroupConverter.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet2;
+
+import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.exec.physical.impl.OutputMutator;
+import org.apache.drill.exec.server.options.OptionManager;
+import org.apache.drill.exec.store.parquet.ParquetReaderUtility;
+import org.apache.drill.exec.vector.complex.DictVector;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.DictWriter;
+import org.apache.parquet.io.api.Converter;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.Type;
+
+import java.util.Collections;
+
+class DrillParquetMapGroupConverter extends DrillParquetGroupConverter {
+
+ private final DictWriter writer;
+
+ DrillParquetMapGroupConverter(OutputMutator mutator, DictWriter mapWriter, GroupType schema, OptionManager options,
+ ParquetReaderUtility.DateCorruptionStatus containsCorruptedDates) {
+ super(mutator, mapWriter, options, containsCorruptedDates);
+ writer = mapWriter;
+
+ GroupType type = schema.getType(0).asGroupType();
+ Converter innerConverter = new KeyValueGroupConverter(mutator, type, options, containsCorruptedDates);
+ converters.add(innerConverter);
+ }
+
+ @Override
+ public void start() {
+ writer.start();
+ }
+
+ @Override
+ public void end() {
+ writer.end();
+ }
+
+ private class KeyValueGroupConverter extends DrillParquetGroupConverter {
+
+ private static final int INDEX_KEY = 0;
+ private static final int INDEX_VALUE = 1;
+
+ KeyValueGroupConverter(OutputMutator mutator, GroupType schema, OptionManager options,
+ ParquetReaderUtility.DateCorruptionStatus containsCorruptedDates) {
+ super(mutator, writer, options, containsCorruptedDates);
+
+ converters.add(getKeyConverter(schema));
+ converters.add(getValueConverter(schema, mutator, options, containsCorruptedDates));
+ }
+
+ private Converter getKeyConverter(GroupType schema) {
+ Type keyType = schema.getType(INDEX_KEY);
+ if (!keyType.isPrimitive()) {
+ throw new DrillRuntimeException("Dict supports primitive key only. Found: " + keyType);
+ } else {
+ return getConverterForType(DictVector.FIELD_KEY_NAME, keyType.asPrimitiveType());
+ }
+ }
+
+ private Converter getValueConverter(GroupType schema, OutputMutator mutator, OptionManager options,
+ ParquetReaderUtility.DateCorruptionStatus containsCorruptedDates) {
+ Type valueType = schema.getType(INDEX_VALUE);
+ Converter valueConverter;
+ if (!valueType.isPrimitive()) {
+ GroupType groupType = valueType.asGroupType();
+ if (ParquetReaderUtility.isLogicalMapType(groupType)) {
+ DictWriter valueWriter = writer.dict(DictVector.FIELD_VALUE_NAME);
+ valueConverter =
+ new DrillParquetMapGroupConverter(mutator, valueWriter, groupType, options, containsCorruptedDates);
+ } else {
+ boolean isListType = ParquetReaderUtility.isLogicalListType(groupType);
+ BaseWriter valueWriter = isListType
+ ? writer.list(DictVector.FIELD_VALUE_NAME)
+ : writer.map(DictVector.FIELD_VALUE_NAME);
+ valueConverter = new DrillParquetGroupConverter(mutator, valueWriter, groupType, Collections.emptyList(), options,
+ containsCorruptedDates, isListType, "KeyValueGroupConverter");
+ }
+ } else {
+ valueConverter = getConverterForType(DictVector.FIELD_VALUE_NAME, valueType.asPrimitiveType());
+ }
+ return valueConverter;
+ }
+
+ @Override
+ public void start() {
+ writer.startKeyValuePair();
+ }
+
+ @Override
+ public void end() {
+ writer.endKeyValuePair();
+ }
+
+ @Override
+ boolean isMapWriter() {
+ return true;
+ }
+ }
+}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java
index a82563e..b16c55a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java
@@ -48,6 +48,7 @@ import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.io.ColumnIOFactory;
import org.apache.parquet.io.MessageColumnIO;
import org.apache.parquet.io.RecordReader;
+import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Types;
@@ -123,14 +124,26 @@ public class DrillParquetReader extends CommonParquetRecordReader {
Set<SchemaPath> selectedSchemaPaths = new LinkedHashSet<>();
// get a list of modified columns which have the array elements removed from the schema path since parquet schema doesn't include array elements
+ // or if field is (Parquet's) MAP then array/name segments are removed from the schema as well as obtaining elements by key is handled in EvaluationVisitor.
List<SchemaPath> modifiedColumns = new LinkedList<>();
for (SchemaPath path : columns) {
List<String> segments = new ArrayList<>();
+ Type segmentType = schema;
for (PathSegment seg = path.getRootSegment(); seg != null; seg = seg.getChild()) {
+
if (seg.isNamed()) {
segments.add(seg.getNameSegment().getPath());
}
+
+ segmentType = getSegmentType(segmentType, seg);
+ boolean isMap = segmentType != null
+ && (!segmentType.isPrimitive() && ParquetReaderUtility.isLogicalMapType(segmentType.asGroupType()));
+ if (isMap) {
+ // stop the loop at a found MAP column to ensure the selection is not discarded
+ // later as values obtained from dict by key differ from the actual column's path
+ break;
+ }
}
modifiedColumns.add(SchemaPath.getCompoundPath(segments.toArray(new String[0])));
@@ -144,7 +157,6 @@ public class DrillParquetReader extends CommonParquetRecordReader {
SchemaPath schemaPath = SchemaPath.getCompoundPath(schemaColDesc);
schemaPaths.add(schemaPath);
}
-
// loop through projection columns and add any columns that are missing from parquet schema to columnsNotFound list
for (SchemaPath columnPath : modifiedColumns) {
boolean notFound = true;
@@ -168,7 +180,7 @@ public class DrillParquetReader extends CommonParquetRecordReader {
} while ((seg = seg.getChild()) != null);
String[] pathSegments = new String[segments.size()];
segments.toArray(pathSegments);
- Type t = getType(pathSegments, 0, schema);
+ Type t = getSegmentType(pathSegments, 0, schema);
if (projection == null) {
projection = new MessageType(messageName, t);
@@ -179,6 +191,31 @@ public class DrillParquetReader extends CommonParquetRecordReader {
return projection;
}
+ /**
+ * Get type from the supplied {@code type} corresponding to given {@code segment}.
+ *
+ * @param parentSegmentType type to extract field corresponding to segment
+ * @param segment segment which type will be returned
+ * @return type corresponding to the {@code segment} or {@code null} if there is no field found in {@code type}.
+ */
+ private static Type getSegmentType(Type parentSegmentType, PathSegment segment) {
+ Type segmentType = null;
+ if (parentSegmentType != null && !parentSegmentType.isPrimitive()) {
+ GroupType groupType = parentSegmentType.asGroupType();
+ if (segment.isNamed()) {
+ String fieldName = segment.getNameSegment().getPath();
+ segmentType = groupType.getFields().stream()
+ .filter(f -> f.getName().equalsIgnoreCase(fieldName))
+ .findAny().map(field -> groupType.getType(field.getName()))
+ .orElse(null);
+ } else if (ParquetReaderUtility.isLogicalListType(parentSegmentType.asGroupType())) { // the segment is array index
+ // get element type of the list
+ segmentType = groupType.getType(0).asGroupType().getType(0);
+ }
+ }
+ return segmentType;
+ }
+
@Override
public void allocate(Map<String, ValueVector> vectorMap) throws OutOfMemoryException {
try {
@@ -263,7 +300,7 @@ public class DrillParquetReader extends CommonParquetRecordReader {
}
}
- private static Type getType(String[] pathSegments, int depth, MessageType schema) {
+ private static Type getSegmentType(String[] pathSegments, int depth, MessageType schema) {
int nextDepth = depth + 1;
Type type = schema.getType(Arrays.copyOfRange(pathSegments, 0, nextDepth));
if (nextDepth == pathSegments.length) {
@@ -272,7 +309,7 @@ public class DrillParquetReader extends CommonParquetRecordReader {
Preconditions.checkState(!type.isPrimitive());
return Types.buildGroup(type.getRepetition())
.as(type.getOriginalType())
- .addField(getType(pathSegments, nextDepth, schema))
+ .addField(getSegmentType(pathSegments, nextDepth, schema))
.named(type.getName());
}
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/CopyUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/CopyUtil.java
index b62434a..28a98aa 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/CopyUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/CopyUtil.java
@@ -42,8 +42,16 @@ public class CopyUtil {
copyMethod = "copyFrom";
}
g.rotateBlock();
- JVar inVV = g.declareVectorValueSetupAndMember("incoming", new TypedFieldId(vv.getField().getType(), vv.isHyper(), fieldId));
- JVar outVV = g.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(vv.getField().getType(), false, fieldId));
+ TypedFieldId inFieldId = new TypedFieldId.Builder().finalType(vv.getField().getType())
+ .hyper(vv.isHyper())
+ .addId(fieldId)
+ .build();
+ JVar inVV = g.declareVectorValueSetupAndMember("incoming", inFieldId);
+ TypedFieldId outFieldId = new TypedFieldId.Builder().finalType(vv.getField().getType())
+ .hyper(false)
+ .addId(fieldId)
+ .build();
+ JVar outVV = g.declareVectorValueSetupAndMember("outgoing", outFieldId);
if(hyper){
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/FieldIdUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/FieldIdUtil.java
index 77b3fde..088bee6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/FieldIdUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/FieldIdUtil.java
@@ -24,11 +24,13 @@ import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.TypedFieldId;
import org.apache.drill.exec.vector.ValueVector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.util.List;
public class FieldIdUtil {
- static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FieldIdUtil.class);
+ private static final Logger logger = LoggerFactory.getLogger(FieldIdUtil.class);
public static TypedFieldId getFieldIdIfMatchesUnion(UnionVector unionVector, TypedFieldId.Builder builder, boolean addToBreadCrumb, PathSegment seg) {
if (seg != null) {
@@ -57,9 +59,26 @@ public class FieldIdUtil {
return null;
}
+ /**
+ * Utility method to obtain {@link TypedFieldId}, providing metadata
+ * for specified field given by value vector used in code generation.
+ *
+ * @param vector a value vector the metadata is obtained for
+ * @param builder a builder instance gathering metadata
+ * @param addToBreadCrumb flag to indicate whether to include intermediate type
+ * @param seg path segment corresponding to the vector
+ * @return type metadata for given vector
+ */
+ public static TypedFieldId getFieldIdIfMatches(ValueVector vector, TypedFieldId.Builder builder,
+ boolean addToBreadCrumb, PathSegment seg) {
+ return getFieldIdIfMatches(vector, builder, addToBreadCrumb, seg, 0);
+ }
- public static TypedFieldId getFieldIdIfMatches(ValueVector vector, TypedFieldId.Builder builder, boolean addToBreadCrumb, PathSegment seg) {
- if (vector instanceof RepeatedMapVector && seg != null && seg.isArray() && !seg.isLastPath()) {
+ private static TypedFieldId getFieldIdIfMatches(ValueVector vector, TypedFieldId.Builder builder,
+ boolean addToBreadCrumb, PathSegment seg, int depth) {
+ if (vector instanceof DictVector) {
+ builder.setDict(depth);
+ } else if (vector instanceof RepeatedMapVector && seg != null && seg.isArray() && !seg.isLastPath()) {
if (addToBreadCrumb) {
addToBreadCrumb = false;
builder.remainder(seg);
@@ -88,7 +107,7 @@ public class FieldIdUtil {
type = ((AbstractContainerVector) vector).getLastPathType();
} else if (vector instanceof ListVector) {
type = ((ListVector) vector).getDataVector().getField().getType();
- builder.listVector();
+ builder.listVector(true);
} else {
throw new UnsupportedOperationException("FieldIdUtil does not support vector of type " + vector.getField().getType());
}
@@ -116,8 +135,23 @@ public class FieldIdUtil {
}
ValueVector v;
- if (vector instanceof AbstractContainerVector) {
- VectorWithOrdinal vord = ((AbstractContainerVector) vector).getChildVectorWithOrdinal(seg.isArray() ? null : seg.getNameSegment().getPath());
+ if (vector instanceof DictVector) {
+ v = ((DictVector) vector).getValues();
+ if (addToBreadCrumb) {
+ builder.remainder(seg);
+ builder.intermediateType(vector.getField().getType());
+ addToBreadCrumb = false;
+ // reset bit set and depth as this Dict vector will be the first one in the schema
+ builder.resetDictBitSet();
+ depth = 0;
+ builder.setDict(depth);
+ }
+ } else if (vector instanceof AbstractContainerVector) {
+ String fieldName = null;
+ if (seg.isNamed()) {
+ fieldName = seg.getNameSegment().getPath();
+ }
+ VectorWithOrdinal vord = ((AbstractContainerVector) vector).getChildVectorWithOrdinal(fieldName);
if (vord == null) {
return null;
}
@@ -126,20 +160,15 @@ public class FieldIdUtil {
builder.intermediateType(v.getField().getType());
builder.addId(vord.ordinal);
}
- } else if (vector instanceof ListVector) {
- v = ((ListVector) vector).getDataVector();
+ } else if (vector instanceof ListVector || vector instanceof RepeatedDictVector) {
+ v = ((RepeatedValueVector) vector).getDataVector();
} else {
throw new UnsupportedOperationException("FieldIdUtil does not support vector of type " + vector.getField().getType());
}
- if (v instanceof AbstractContainerVector) {
- // we're looking for a multi path.
- AbstractContainerVector c = (AbstractContainerVector) v;
- return getFieldIdIfMatches(c, builder, addToBreadCrumb, seg.getChild());
- } else if(v instanceof ListVector) {
- ListVector list = (ListVector) v;
- return getFieldIdIfMatches(list, builder, addToBreadCrumb, seg.getChild());
- } else if (v instanceof UnionVector) {
+ if (v instanceof AbstractContainerVector || v instanceof ListVector) {
+ return getFieldIdIfMatches(v, builder, addToBreadCrumb, seg.getChild(), depth + 1);
+ } else if (v instanceof UnionVector) {
return getFieldIdIfMatchesUnion((UnionVector) v, builder, addToBreadCrumb, seg.getChild());
} else {
if (seg.isNamed()) {
@@ -176,10 +205,7 @@ public class FieldIdUtil {
}
PathSegment seg = expectedPath.getRootSegment();
- TypedFieldId.Builder builder = TypedFieldId.newBuilder();
- if (hyper) {
- builder.hyper();
- }
+ TypedFieldId.Builder builder = TypedFieldId.newBuilder().hyper(hyper);
if (vector instanceof UnionVector) {
builder.addId(id).remainder(expectedPath.getRootSegment().getChild());
List<MinorType> minorTypes = ((UnionVector) vector).getSubTypes();
@@ -196,18 +222,47 @@ public class FieldIdUtil {
return getFieldIdIfMatchesUnion((UnionVector) vector, builder, false, seg.getChild());
}
} else if (vector instanceof ListVector) {
- ListVector list = (ListVector) vector;
builder.intermediateType(vector.getField().getType());
builder.addId(id);
- return getFieldIdIfMatches(list, builder, true, expectedPath.getRootSegment().getChild());
- } else
- if (vector instanceof AbstractContainerVector) {
+ return getFieldIdIfMatches(vector, builder, true, expectedPath.getRootSegment().getChild(), 0);
+ } else if (vector instanceof DictVector) {
+ MajorType vectorType = vector.getField().getType();
+ builder.intermediateType(vectorType);
+ builder.addId(id);
+ if (seg.isLastPath()) {
+ builder.finalType(vectorType);
+ return builder.build();
+ } else {
+ PathSegment child = seg.getChild();
+ builder.remainder(child);
+ return getFieldIdIfMatches(vector, builder, false, expectedPath.getRootSegment().getChild(), 0);
+ }
+ } else if (vector instanceof AbstractContainerVector) {
// we're looking for a multi path.
- AbstractContainerVector c = (AbstractContainerVector) vector;
builder.intermediateType(vector.getField().getType());
builder.addId(id);
- return getFieldIdIfMatches(c, builder, true, expectedPath.getRootSegment().getChild());
-
+ return getFieldIdIfMatches(vector, builder, true, expectedPath.getRootSegment().getChild(), 0);
+ } else if (vector instanceof RepeatedDictVector) {
+ MajorType vectorType = vector.getField().getType();
+ builder.intermediateType(vectorType);
+ builder.addId(id);
+ if (seg.isLastPath()) {
+ builder.finalType(vectorType);
+ return builder.build();
+ } else {
+ PathSegment child = seg.getChild();
+ if (!child.isArray()) {
+ return null;
+ } else {
+ builder.remainder(child);
+ builder.withIndex();
+ if (child.isLastPath()) {
+ return builder.finalType(DictVector.TYPE).build();
+ } else {
+ return getFieldIdIfMatches(vector, builder, true, expectedPath.getRootSegment().getChild(), 0);
+ }
+ }
+ }
} else {
builder.intermediateType(vector.getField().getType());
builder.addId(id);
@@ -224,7 +279,6 @@ public class FieldIdUtil {
} else {
return null;
}
-
}
}
}
diff --git a/exec/java-exec/src/main/resources/drill-module.conf b/exec/java-exec/src/main/resources/drill-module.conf
index dd062ea..62af353 100644
--- a/exec/java-exec/src/main/resources/drill-module.conf
+++ b/exec/java-exec/src/main/resources/drill-module.conf
@@ -640,6 +640,7 @@ drill.exec.options: {
store.parquet.reader.pagereader.enforceTotalSize: false,
store.parquet.reader.pagereader.queuesize: 2,
store.parquet.reader.pagereader.usefadvise: false,
+ store.parquet.reader.enable_map_support: true,
store.parquet.use_new_reader: false,
store.parquet.vector_fill_check_threshold: 10,
store.parquet.vector_fill_threshold: 85,
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/compile/TestEvaluationVisitor.java b/exec/java-exec/src/test/java/org/apache/drill/exec/compile/TestEvaluationVisitor.java
index 631f1f5..68d165e 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/compile/TestEvaluationVisitor.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/compile/TestEvaluationVisitor.java
@@ -43,7 +43,7 @@ public class TestEvaluationVisitor {
.remainder(path.getRootSegment())
.intermediateType(Types.optional(MinorType.MAP))
.finalType(Types.repeated(MinorType.MAP))
- .hyper()
+ .hyper(true)
.withIndex()
.build();
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/ExpressionTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/ExpressionTest.java
index 6f096d3..4c505d2 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/ExpressionTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/ExpressionTest.java
@@ -69,7 +69,10 @@ public class ExpressionTest extends ExecTest {
final RecordBatch batch = mock(RecordBatch.class);
final VectorWrapper wrapper = mock(VectorWrapper.class);
final TypeProtos.MajorType type = Types.optional(MinorType.INT);
- final TypedFieldId tfid = new TypedFieldId(type, false, 0);
+ final TypedFieldId tfid = new TypedFieldId.Builder().finalType(type)
+ .hyper(false)
+ .addId(0)
+ .build();
when(wrapper.getValueVector()).thenReturn(new IntVector(MaterializedField.create("result", type), RootAllocatorFactory.newRoot(c)));
@@ -82,8 +85,12 @@ public class ExpressionTest extends ExecTest {
@Test
public void testSchemaExpression() throws Exception {
final RecordBatch batch = mock(RecordBatch.class);
+ TypedFieldId fieldId = new TypedFieldId.Builder().finalType(Types.optional(MinorType.BIGINT))
+ .hyper(false)
+ .addId(0)
+ .build();
when(batch.getValueVectorId(new SchemaPath("alpha", ExpressionPosition.UNKNOWN)))
- .thenReturn(new TypedFieldId(Types.optional(MinorType.BIGINT), false, 0));
+ .thenReturn(fieldId);
getExpressionCode("1 + alpha", batch);
}
@@ -116,7 +123,10 @@ public class ExpressionTest extends ExecTest {
FunctionImplementationRegistry funcReg = new FunctionImplementationRegistry(DrillConfig.create());
final ClassGenerator<Projector> cg = CodeGenerator.get(Projector.TEMPLATE_DEFINITION, null).getRoot();
- cg.addExpr(new ValueVectorWriteExpression(new TypedFieldId(materializedExpr.getMajorType(), -1), materializedExpr));
+ TypedFieldId fieldId = new TypedFieldId.Builder().finalType(materializedExpr.getMajorType())
+ .addId(-1)
+ .build();
+ cg.addExpr(new ValueVectorWriteExpression(fieldId, materializedExpr));
return cg.getCodeGenerator().generateAndGet();
}
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
index 5fa618f..170bf3d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
@@ -29,6 +29,7 @@ import org.apache.drill.exec.util.JsonStringArrayList;
import org.apache.drill.shaded.guava.com.google.common.base.Joiner;
import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
import org.apache.drill.test.BaseTestQuery;
+import org.apache.drill.test.TestBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -1330,6 +1331,83 @@ public class TestParquetWriter extends BaseTestQuery {
}
}
+ @Test
+ public void testCTASWithDictInSelect() throws Exception {
+ String tableName = "table_with_dict";
+ try {
+ test("use dfs.tmp");
+ test("create table %s as select id, mapcol from cp.`store/parquet/complex/map/parquet/000000_0.parquet`", tableName);
+ testBuilder()
+ .sqlQuery("select * from %s", tableName)
+ .unOrdered()
+ .baselineColumns("id", "mapcol")
+ .baselineValues(1, TestBuilder.mapOfObject("b", 6, "c", 7))
+ .baselineValues(3, TestBuilder.mapOfObject("b", null, "c", 8, "d", 9, "e", 10))
+ .baselineValues(5, TestBuilder.mapOfObject("b", 6, "c", 7, "a", 8, "abc4", 9, "bde", 10))
+ .baselineValues(4, TestBuilder.mapOfObject("a", 3, "b", 4, "c", 5))
+ .baselineValues(2, TestBuilder.mapOfObject("a", 1, "b", 2, "c", 3))
+ .go();
+ } finally {
+ test("DROP TABLE IF EXISTS %s", tableName);
+ }
+ }
+
+ @Test
+ public void testCTASWithRepeatedDictInSelect() throws Exception {
+ String tableName = "table_with_dict_array";
+ try {
+ test("use dfs.tmp");
+ test("create table %s as select id, map_array from cp.`store/parquet/complex/map/parquet/000000_0.parquet`", tableName);
+ testBuilder()
+ .sqlQuery("select * from %s", tableName)
+ .unOrdered()
+ .baselineColumns("id", "map_array")
+ .baselineValues(
+ 4,
+ TestBuilder.listOf(
+ TestBuilder.mapOfObject(1L, 2, 10L, 1, 42L, 3, 31L, 4),
+ TestBuilder.mapOfObject(-1L, 2, 3L, 1, 5L, 3, 54L, 4, 55L, 589, -78L, 2),
+ TestBuilder.mapOfObject(1L, 124, 3L, 1, -4L, 2, 19L, 3, 5L, 3, 9L, 1),
+ TestBuilder.mapOfObject(1L, 89, 2L, 1, 3L, 3, 4L, 21, 5L, 12, 6L, 34),
+ TestBuilder.mapOfObject(1L, -25, 3L, 1, 5L, 3, 6L, 2, 9L, 333, 10L, 344),
+ TestBuilder.mapOfObject(3L, 222, 4L, 1, 5L, 3, 6L, 2, 7L, 1, 8L, 3),
+ TestBuilder.mapOfObject(1L, 11, 3L, 12, 5L, 13)
+ )
+ )
+ .baselineValues(
+ 1,
+ TestBuilder.listOf(
+ TestBuilder.mapOfObject(8L, 1, 9L, 2, 523L, 4, 31L, 3),
+ TestBuilder.mapOfObject(1L, 2, 3L, 1, 5L, 3)
+ )
+ )
+ .baselineValues(
+ 3,
+ TestBuilder.listOf(
+ TestBuilder.mapOfObject(3L, 1),
+ TestBuilder.mapOfObject(1L, 2)
+ )
+ )
+ .baselineValues(
+ 2,
+ TestBuilder.listOf(
+ TestBuilder.mapOfObject(1L, 1, 2L, 2)
+ )
+ )
+ .baselineValues(
+ 5,
+ TestBuilder.listOf(
+ TestBuilder.mapOfObject(1L, 1, 2L, 2, 3L, 3, 4L, 4),
+ TestBuilder.mapOfObject(1L, -1, 2L, -2),
+ TestBuilder.mapOfObject(1L, 4, 2L, 5, 3L, 7)
+ )
+ )
+ .go();
+ } finally {
+ test("DROP TABLE IF EXISTS %s", tableName);
+ }
+ }
+
/**
* Checks that specified parquet table contains specified columns with specified types.
*
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/record/ExpressionTreeMaterializerTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/record/ExpressionTreeMaterializerTest.java
index 038639e..e00fc93 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/record/ExpressionTreeMaterializerTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/record/ExpressionTreeMaterializerTest.java
@@ -75,8 +75,11 @@ public class ExpressionTreeMaterializerTest extends ExecTest {
@Test
public void testMaterializingLateboundField() throws SchemaChangeException {
final RecordBatch batch = mock(RecordBatch.class);
+ TypedFieldId fieldId = new TypedFieldId.Builder().finalType(Types.required(MinorType.BIGINT))
+ .addId(-5)
+ .build();
when(batch.getValueVectorId(new SchemaPath("test", ExpressionPosition.UNKNOWN)))
- .thenReturn(new TypedFieldId(Types.required(MinorType.BIGINT), -5));
+ .thenReturn(fieldId);
final SchemaBuilder builder = BatchSchema.newBuilder();
builder.addField(getField("test", bigIntType));
@@ -93,8 +96,14 @@ public class ExpressionTreeMaterializerTest extends ExecTest {
public void testMaterializingLateboundTree() throws SchemaChangeException {
final RecordBatch batch = mock(RecordBatch.class);
- when(batch.getValueVectorId(SchemaPath.getSimplePath("test"))).thenReturn(new TypedFieldId(Types.required(MinorType.BIT), -4));
- when(batch.getValueVectorId(SchemaPath.getSimplePath("test1"))).thenReturn(new TypedFieldId(Types.required(MinorType.BIGINT), -5));
+ TypedFieldId fieldId = new TypedFieldId.Builder().finalType(Types.required(MinorType.BIT))
+ .addId(-4)
+ .build();
+ TypedFieldId fieldId2 = new TypedFieldId.Builder().finalType(Types.required(MinorType.BIGINT))
+ .addId(-5)
+ .build();
+ when(batch.getValueVectorId(SchemaPath.getSimplePath("test"))).thenReturn(fieldId);
+ when(batch.getValueVectorId(SchemaPath.getSimplePath("test1"))).thenReturn(fieldId2);
ErrorCollector ec = new ErrorCollectorImpl();
@@ -121,8 +130,11 @@ public class ExpressionTreeMaterializerTest extends ExecTest {
public void testMaterializingLateboundTreeValidated() throws SchemaChangeException {
final RecordBatch batch = mock(RecordBatch.class);
+ TypedFieldId fieldId = new TypedFieldId.Builder().finalType(Types.required(MinorType.BIGINT))
+ .addId(-5)
+ .build();
when(batch.getValueVectorId(new SchemaPath("test", ExpressionPosition.UNKNOWN)))
- .thenReturn(new TypedFieldId(Types.required(MinorType.BIGINT), -5));
+ .thenReturn(fieldId);
ErrorCollector ec = new ErrorCollector() {
int errorCount = 0;
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestDictVector.java b/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestDictVector.java
new file mode 100644
index 0000000..07efcd5
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestDictVector.java
@@ -0,0 +1,459 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.record.vector;
+
+import io.netty.buffer.DrillBuf;
+import org.apache.drill.categories.VectorTest;
+import org.apache.drill.common.config.DrillConfig;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.exec.ExecTest;
+import org.apache.drill.exec.expr.holders.NullableBigIntHolder;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.memory.RootAllocatorFactory;
+import org.apache.drill.exec.proto.UserBitShared;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatchLoader;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.WritableBatch;
+import org.apache.drill.exec.vector.complex.DictVector;
+import org.apache.drill.exec.vector.complex.impl.SingleDictWriter;
+import org.apache.drill.exec.vector.complex.reader.BaseReader;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+import org.apache.drill.test.TestBuilder;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.junit.Assert.assertEquals;
+
+@Category(VectorTest.class)
+public class TestDictVector extends ExecTest {
+
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void setUp() {
+ allocator = RootAllocatorFactory.newRoot(DrillConfig.create());
+ }
+
+ @After
+ public void tearDown(){
+ allocator.close();
+ }
+
+ @Test
+ public void testVectorCreation() {
+ MaterializedField field = MaterializedField.create("map", DictVector.TYPE);
+ try (DictVector mapVector = new DictVector(field, allocator, null)) {
+ mapVector.allocateNew();
+
+ List<Map<Object, Object>> maps = Arrays.asList(
+ TestBuilder.mapOfObject(4f, 1L, 5.3f, 2L, 0.3f, 3L, -0.2f, 4L, 102.07f, 5L),
+ TestBuilder.mapOfObject(45f, 6L, 9.2f, 7L),
+ TestBuilder.mapOfObject(4.01f, 8L, 9.2f, 9L, -2.3f, 10L),
+ TestBuilder.mapOfObject(),
+ TestBuilder.mapOfObject(11f, 11L, 9.73f, 12L, 0.03f, 13L)
+ );
+
+ BaseWriter.DictWriter mapWriter = new SingleDictWriter(mapVector, null);
+ int index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapWriter.setPosition(index++);
+ mapWriter.start();
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapWriter.startKeyValuePair();
+ mapWriter.float4(DictVector.FIELD_KEY_NAME).writeFloat4((float) entry.getKey());
+ mapWriter.bigInt(DictVector.FIELD_VALUE_NAME).writeBigInt((long) entry.getValue());
+ mapWriter.endKeyValuePair();
+ }
+ mapWriter.end();
+ }
+
+ BaseReader.DictReader mapReader = mapVector.getReader();
+ index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapReader.setPosition(index++);
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapReader.next();
+ Float actualKey = mapReader.reader(DictVector.FIELD_KEY_NAME).readFloat();
+ Long actualValue = mapReader.reader(DictVector.FIELD_VALUE_NAME).readLong();
+ assertEquals(entry.getKey(), actualKey);
+ assertEquals(entry.getValue(), actualValue);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testVectorCreationWithNullValues() {
+ MaterializedField field = MaterializedField.create("map", DictVector.TYPE);
+ try (DictVector mapVector = new DictVector(field, allocator, null)) {
+ mapVector.allocateNew();
+
+ List<Map<Object, Object>> maps = Arrays.asList(
+ TestBuilder.mapOfObject(4f, 1L, 5.3f, 2L, 0.3f, null, -0.2f, 4L, 102.07f, 5L),
+ TestBuilder.mapOfObject(45f, 6L, 9.2f, 7L),
+ TestBuilder.mapOfObject(4.01f, null, 9.2f, 9L, -2.3f, 10L),
+ TestBuilder.mapOfObject(),
+ TestBuilder.mapOfObject(11f, 11L, 9.73f, null, 0.03f, 13L)
+ );
+
+ BaseWriter.DictWriter mapWriter = new SingleDictWriter(mapVector, null);
+ int index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapWriter.setPosition(index++);
+ mapWriter.start();
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapWriter.startKeyValuePair();
+ mapWriter.float4(DictVector.FIELD_KEY_NAME).writeFloat4((float) entry.getKey());
+ Long value = (Long) entry.getValue();
+ if (value != null) {
+ mapWriter.bigInt(DictVector.FIELD_VALUE_NAME).writeBigInt(value);
+ } // else skip writing a value. Notice that index was incremented
+ mapWriter.endKeyValuePair();
+ }
+ mapWriter.end();
+ }
+
+ BaseReader.DictReader mapReader = mapVector.getReader();
+ index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapReader.setPosition(index++);
+ assertEquals(map.size(), mapReader.size());
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapReader.next();
+ Float actualKey = mapReader.reader(DictVector.FIELD_KEY_NAME).readFloat();
+ Long actualValue = mapReader.reader(DictVector.FIELD_VALUE_NAME).readLong();
+ assertEquals(entry.getKey(), actualKey);
+ assertEquals(entry.getValue(), actualValue);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testVectorCreationWithNullKeys() {
+ thrown.expect(DrillRuntimeException.class);
+ thrown.expectMessage(containsString("Key in DICT cannot be null. Index: 1"));
+
+ MaterializedField field = MaterializedField.create("map", DictVector.TYPE);
+ try (DictVector mapVector = new DictVector(field, allocator, null)) {
+ mapVector.allocateNew();
+
+ List<Map<Object, Object>> maps = Arrays.asList(
+ TestBuilder.mapOfObject(4f, 1L, 5.3f, 2L, 1.23f, 3L, -0.2f, 4L, 102.07f, 5L),
+ TestBuilder.mapOfObject(45f, 6L, null, 7L), // this map contains null key which is not supported
+ TestBuilder.mapOfObject(4.01f, 8L, 9.2f, 9L, -2.3f, 10L),
+ TestBuilder.mapOfObject(),
+ TestBuilder.mapOfObject(11f, 11L, 9.73f, 12L, 0.03f, 13L)
+ );
+
+ BaseWriter.DictWriter mapWriter = new SingleDictWriter(mapVector, null);
+ int index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapWriter.setPosition(index++);
+ mapWriter.start();
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapWriter.startKeyValuePair();
+ if (entry.getKey() != null) {
+ mapWriter.float4(DictVector.FIELD_KEY_NAME).writeFloat4((float) entry.getKey());
+ } // else skip writing a key. Notice that index was incremented.
+ // Whether the key is written will be checked in endKeyValuePair()
+ // and an Exception will be thrown in case it was not set (i.e. it is null).
+ mapWriter.bigInt(DictVector.FIELD_VALUE_NAME).writeBigInt((Long) entry.getValue());
+ mapWriter.endKeyValuePair();
+ }
+ mapWriter.end();
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() {
+ MaterializedField field = MaterializedField.create("map", DictVector.TYPE);
+ try (DictVector mapVector = new DictVector(field, allocator, null)) {
+ mapVector.allocateNew();
+
+ List<Map<Object, Object>> maps = Arrays.asList(
+ TestBuilder.mapOfObject(4f, 1L, 5.3f, 2L, 0.3f, 3L, -0.2f, 4L, 102.07f, 5L),
+ TestBuilder.mapOfObject(45f, 6L, 9.2f, 7L),
+ TestBuilder.mapOfObject(4.01f, 8L, 9.2f, 9L, -2.3f, 10L),
+ TestBuilder.mapOfObject(),
+ TestBuilder.mapOfObject(11f, 11L, 9.73f, 12L, 0.03f, 13L)
+ );
+
+ BaseWriter.DictWriter mapWriter = new SingleDictWriter(mapVector, null);
+ int index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapWriter.setPosition(index++);
+ mapWriter.start();
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapWriter.startKeyValuePair();
+ mapWriter.float4(DictVector.FIELD_KEY_NAME).writeFloat4((float) entry.getKey());
+ mapWriter.bigInt(DictVector.FIELD_VALUE_NAME).writeBigInt((long) entry.getValue());
+ mapWriter.endKeyValuePair();
+ }
+ mapWriter.end();
+ }
+
+ int start = 1;
+ int length = 2;
+
+ DictVector newMapVector = new DictVector(field, allocator, null);
+ TransferPair transferPair = mapVector.makeTransferPair(newMapVector);
+ transferPair.splitAndTransfer(start, length);
+
+ BaseReader.DictReader mapReader = newMapVector.getReader();
+ index = 0;
+ for (Map<Object, Object> map : maps.subList(start, start + length)) {
+ mapReader.setPosition(index++);
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapReader.next();
+ Float actualKey = mapReader.reader(DictVector.FIELD_KEY_NAME).readFloat();
+ Long actualValue = mapReader.reader(DictVector.FIELD_VALUE_NAME).readLong();
+ assertEquals(entry.getKey(), actualKey);
+ assertEquals(entry.getValue(), actualValue);
+ }
+ }
+
+ newMapVector.clear();
+ }
+ }
+
+ @Test
+ public void testLoadValueVector() {
+ MaterializedField field = MaterializedField.create("map", DictVector.TYPE);
+ try (DictVector mapVector = new DictVector(field, allocator, null)) {
+ mapVector.allocateNew();
+
+ List<Map<Object, Object>> maps = Arrays.asList(
+ TestBuilder.mapOfObject(4f, 1L, 5.3f, 2L, 0.3f, 3L, -0.2f, 4L, 102.07f, 5L),
+ TestBuilder.mapOfObject(45f, 6L, 9.2f, 7L),
+ TestBuilder.mapOfObject(4.01f, 8L, 9.2f, 9L, -2.3f, 10L),
+ TestBuilder.mapOfObject(),
+ TestBuilder.mapOfObject(11f, 11L, 9.73f, 12L, 0.03f, 13L)
+ );
+
+ BaseWriter.DictWriter mapWriter = new SingleDictWriter(mapVector, null);
+ int index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapWriter.setPosition(index++);
+ mapWriter.start();
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapWriter.startKeyValuePair();
+ mapWriter.float4(DictVector.FIELD_KEY_NAME).writeFloat4((float) entry.getKey());
+ mapWriter.bigInt(DictVector.FIELD_VALUE_NAME).writeBigInt((long) entry.getValue());
+ mapWriter.endKeyValuePair();
+ }
+ mapWriter.end();
+ }
+
+ UserBitShared.SerializedField oldField = mapVector.getMetadata();
+ WritableBatch writableBatch = WritableBatch.getBatchNoHV(oldField.getValueCount(), Collections.singletonList(mapVector), false);
+ DrillBuf byteBuf = TestLoad.serializeBatch(allocator, writableBatch);
+
+ DictVector newMapVector = new DictVector(field.clone(), allocator, null);
+ newMapVector.load(oldField, byteBuf);
+
+ BaseReader.DictReader mapReader = newMapVector.getReader();
+ index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapReader.setPosition(index++);
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapReader.next();
+ Float actualKey = mapReader.reader(DictVector.FIELD_KEY_NAME).readFloat();
+ Long actualValue = mapReader.reader(DictVector.FIELD_VALUE_NAME).readLong();
+ assertEquals(entry.getKey(), actualKey);
+ assertEquals(entry.getValue(), actualValue);
+ }
+ }
+
+ newMapVector.clear();
+ byteBuf.release();
+
+ writableBatch.clear();
+ }
+ }
+
+ @Test
+ public void testLoadBatchLoader() throws Exception {
+ MaterializedField field = MaterializedField.create("map", DictVector.TYPE);
+ try (DictVector mapVector = new DictVector(field, allocator, null)) {
+ mapVector.allocateNew();
+
+ List<Map<Object, Object>> maps = Arrays.asList(
+ TestBuilder.mapOfObject(4f, 1L, 5.3f, 2L, 0.3f, 3L, -0.2f, 4L, 102.07f, 5L),
+ TestBuilder.mapOfObject(45f, 6L, 9.2f, 7L),
+ TestBuilder.mapOfObject(4.01f, 8L, 9.2f, 9L, -2.3f, 10L),
+ TestBuilder.mapOfObject(),
+ TestBuilder.mapOfObject(11f, 11L, 9.73f, 12L, 0.03f, 13L)
+ );
+
+ BaseWriter.DictWriter mapWriter = new SingleDictWriter(mapVector, null);
+ int index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapWriter.setPosition(index++);
+ mapWriter.start();
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapWriter.startKeyValuePair();
+ mapWriter.float4(DictVector.FIELD_KEY_NAME).writeFloat4((float) entry.getKey());
+ mapWriter.bigInt(DictVector.FIELD_VALUE_NAME).writeBigInt((long) entry.getValue());
+ mapWriter.endKeyValuePair();
+ }
+ mapWriter.end();
+ }
+
+ WritableBatch writableBatch = WritableBatch.getBatchNoHV(maps.size(), Collections.singletonList(mapVector), false);
+ // Serialize the vector
+ DrillBuf byteBuf = TestLoad.serializeBatch(allocator, writableBatch);
+ RecordBatchLoader batchLoader = new RecordBatchLoader(allocator);
+ batchLoader.load(writableBatch.getDef(), byteBuf);
+
+ byteBuf.release();
+
+ assertEquals(maps.size(), batchLoader.getRecordCount());
+
+ writableBatch.clear();
+ batchLoader.clear();
+ }
+ }
+
+ @Test
+ public void testGetByKey() {
+ MaterializedField field = MaterializedField.create("map", DictVector.TYPE);
+ try (DictVector mapVector = new DictVector(field, allocator, null)) {
+ mapVector.allocateNew();
+
+ List<Map<Object, Object>> maps = Arrays.asList(
+ TestBuilder.mapOfObject(4f, 1L, 5.3f, 2L, 0.3f, 3L, -0.2f, 4L, 102.07f, 5L),
+ TestBuilder.mapOfObject(45f, 6L, 9.2f, 7L),
+ TestBuilder.mapOfObject(4.01f, 8L, 4f, 9L, -2.3f, 10L),
+ TestBuilder.mapOfObject(-2.5f, 11L),
+ TestBuilder.mapOfObject(),
+ TestBuilder.mapOfObject(11f, 12L, 9.73f, 13L, 4f, 14L)
+ );
+
+ BaseWriter.DictWriter mapWriter = new SingleDictWriter(mapVector, null);
+ int index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapWriter.setPosition(index++);
+ mapWriter.start();
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapWriter.startKeyValuePair();
+ mapWriter.float4(DictVector.FIELD_KEY_NAME).writeFloat4((float) entry.getKey());
+ mapWriter.bigInt(DictVector.FIELD_VALUE_NAME).writeBigInt((long) entry.getValue());
+ mapWriter.endKeyValuePair();
+ }
+ mapWriter.end();
+ }
+
+ BaseReader.DictReader mapReader = mapVector.getReader();
+
+ float key = 4.0f;
+ // Due to limitations of Calcite, we can pass NameSegment and ArraySegment
+ // only as String and int respectively hence we need to transform float key
+ // to String to be able to use it with DictReader which then
+ // will derive appropriate type internally.
+ String stringKey = String.valueOf(key);
+
+ NullableBigIntHolder valueHolder = new NullableBigIntHolder();
+ index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapReader.setPosition(index++);
+ mapReader.next();
+ mapReader.read(stringKey, valueHolder);
+ assertEquals(map.get(key), valueHolder.isSet == 1 ? valueHolder.value : null);
+ // reset value holder to reuse it for the next row
+ valueHolder.isSet = 0;
+ }
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ @Test
+ public void testVectorCreationListValue() {
+ MaterializedField field = MaterializedField.create("map", DictVector.TYPE);
+ try (DictVector mapVector = new DictVector(field, allocator, null)) {
+ mapVector.allocateNew();
+
+ List<Map<Object, Object>> maps = Arrays.asList(
+ TestBuilder.mapOfObject(
+ 1, TestBuilder.listOf(1.0, 2.3, 3.1),
+ 2, TestBuilder.listOf(4.9, -5.002)
+ ),
+ TestBuilder.mapOfObject(
+ 3, TestBuilder.listOf(6.901),
+ 4, TestBuilder.listOf(),
+ 5, TestBuilder.listOf(7.03, -8.973)
+ ),
+ TestBuilder.mapOfObject(),
+ TestBuilder.mapOfObject(
+ 6, TestBuilder.listOf(9.0, 10.0, 11.0),
+ 7, TestBuilder.listOf(12.07, 13.01, 14.58, 15.039),
+ 8, TestBuilder.listOf(-16.0, -17.0, 18.0, 19.23, 20.1234)
+ )
+ );
+
+ BaseWriter.DictWriter mapWriter = new SingleDictWriter(mapVector, null);
+ int index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapWriter.setPosition(index++);
+ mapWriter.start();
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapWriter.startKeyValuePair();
+ mapWriter.integer(DictVector.FIELD_KEY_NAME).writeInt((int) entry.getKey());
+ BaseWriter.ListWriter valueWriter = mapWriter.list(DictVector.FIELD_VALUE_NAME);
+
+ valueWriter.startList();
+ for (Object element : (List<Object>) entry.getValue()) {
+ valueWriter.float8().writeFloat8((double) element);
+ }
+ valueWriter.endList();
+
+ mapWriter.endKeyValuePair();
+ }
+ mapWriter.end();
+ }
+
+ BaseReader.DictReader mapReader = mapVector.getReader();
+ index = 0;
+ for (Map<Object, Object> map : maps) {
+ mapReader.setPosition(index++);
+ for (Map.Entry<Object, Object> entry : map.entrySet()) {
+ mapReader.next();
+ Integer actualKey = mapReader.reader(DictVector.FIELD_KEY_NAME).readInteger();
+ Object actualValue = mapReader.reader(DictVector.FIELD_VALUE_NAME).readObject();
+ assertEquals(entry.getKey(), actualKey);
+ assertEquals(entry.getValue(), actualValue);
+ }
+ }
+ }
+ }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestLoad.java b/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestLoad.java
index 8d62a2b..26a3a83 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestLoad.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestLoad.java
@@ -113,7 +113,7 @@ public class TestLoad extends ExecTest {
return vectors;
}
- private static DrillBuf serializeBatch(BufferAllocator allocator, WritableBatch writableBatch) {
+ static DrillBuf serializeBatch(BufferAllocator allocator, WritableBatch writableBatch) {
final ByteBuf[] byteBufs = writableBatch.getBuffers();
int bytes = 0;
for (ByteBuf buf : byteBufs) {
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
index 8f2339b..dfcb6e5 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java
@@ -17,8 +17,11 @@
*/
package org.apache.drill.exec.store.avro;
+import static org.apache.drill.exec.store.avro.AvroTestUtil.ARRAY_SIZE;
+import static org.apache.drill.exec.store.avro.AvroTestUtil.RECORD_COUNT;
import static org.apache.drill.exec.store.avro.AvroTestUtil.generateDoubleNestedSchema_NoNullValues;
import static org.apache.drill.exec.store.avro.AvroTestUtil.generateLinkedList;
+import static org.apache.drill.exec.store.avro.AvroTestUtil.generateMapSchema;
import static org.apache.drill.exec.store.avro.AvroTestUtil.generateMapSchemaComplex_withNullValues;
import static org.apache.drill.exec.store.avro.AvroTestUtil.generateMapSchema_withNullValues;
import static org.apache.drill.exec.store.avro.AvroTestUtil.generateNestedArraySchema;
@@ -33,6 +36,7 @@ import static org.apache.drill.exec.store.avro.AvroTestUtil.generateUnionSchema_
import static org.apache.drill.exec.store.avro.AvroTestUtil.generateUnionSchema_WithNullValues;
import static org.apache.drill.exec.store.avro.AvroTestUtil.write;
import static org.apache.drill.test.TestBuilder.listOf;
+import static org.apache.drill.test.TestBuilder.mapOfObject;
import java.io.File;
import java.math.BigDecimal;
@@ -55,6 +59,7 @@ import org.apache.drill.exec.work.ExecErrorConstants;
import org.apache.drill.test.BaseTestQuery;
import org.apache.drill.test.TestBuilder;
import org.junit.Assert;
+import org.junit.BeforeClass;
import org.junit.Test;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
@@ -64,6 +69,14 @@ import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
*/
public class AvroFormatTest extends BaseTestQuery {
+ private static String mapTableName;
+
+ @BeforeClass
+ public static void init() throws Exception {
+ // Create temporary table containing map and map array
+ mapTableName = generateMapSchema().getFileName();
+ }
+
// XXX
// 1. Need to test nested field names with same name as top-level names for conflict.
// 2. Avro supports recursive types? Can we test this?
@@ -488,9 +501,9 @@ public class AvroFormatTest extends BaseTestQuery {
.unOrdered()
.baselineColumns("a_string", "array_item");
- for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i++) {
+ for (int i = 0; i < RECORD_COUNT; i++) {
- for (int j = 0; j < AvroTestUtil.ARRAY_SIZE; j++) {
+ for (int j = 0; j < ARRAY_SIZE; j++) {
testBuilder.baselineValues("a_" + i, "c_string_array_" + i + "_" + j);
}
}
@@ -516,8 +529,8 @@ public class AvroFormatTest extends BaseTestQuery {
final String file = generateNestedArraySchema().getFileName();
TestBuilder testBuilder = nestedArrayQueryTestBuilder(file);
- for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i++) {
- for (int j = 0; j < AvroTestUtil.ARRAY_SIZE; j++) {
+ for (int i = 0; i < RECORD_COUNT; i++) {
+ for (int j = 0; j < ARRAY_SIZE; j++) {
testBuilder.baselineValues(i, j);
}
}
@@ -528,7 +541,7 @@ public class AvroFormatTest extends BaseTestQuery {
//DRILL-4574
@Test
public void testFlattenEmptyComplexArrayMustYieldNoResults() throws Exception {
- final String file = generateNestedArraySchema(AvroTestUtil.RECORD_COUNT, 0).getFilePath();
+ final String file = generateNestedArraySchema(RECORD_COUNT, 0).getFilePath();
TestBuilder testBuilder = nestedArrayQueryTestBuilder(file);
testBuilder.expectsEmptyResultSet();
}
@@ -558,11 +571,11 @@ public class AvroFormatTest extends BaseTestQuery {
.baselineColumns("nested_key1", "nested_key2");
final List<Object> expectedList = Lists.newArrayList();
- for (int i = 0; i < AvroTestUtil.ARRAY_SIZE; i++) {
+ for (int i = 0; i < ARRAY_SIZE; i++) {
expectedList.add((double)i);
}
final List<Object> emptyList = listOf();
- for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i += 2) {
+ for (int i = 0; i < RECORD_COUNT; i += 2) {
testBuilder.baselineValues(expectedList, expectedList);
testBuilder.baselineValues(emptyList, emptyList);
}
@@ -589,7 +602,184 @@ public class AvroFormatTest extends BaseTestQuery {
.sqlQuery(sql, file)
.ordered()
.baselineColumns("row_count")
- .baselineValues((long)AvroTestUtil.RECORD_COUNT)
+ .baselineValues((long) RECORD_COUNT)
.go();
}
+
+ @Test
+ public void testMapSchema() throws Exception {
+ String sql = "select map_field from dfs.`%s`";
+
+ TestBuilder testBuilder = testBuilder()
+ .sqlQuery(sql, mapTableName)
+ .unOrdered()
+ .baselineColumns("map_field");
+
+ for (long i = 0; i < RECORD_COUNT; i++) {
+ testBuilder.baselineValues(mapOfObject("key1", i, "key2", i + 1));
+ }
+ testBuilder.go();
+ }
+
+ @Test
+ public void testMapSchemaGetByKey() throws Exception {
+ String sql = "select map_field['key1'] val1, map_field['key2'] val2 from dfs.`%s`";
+
+ TestBuilder testBuilder = testBuilder()
+ .sqlQuery(sql, mapTableName)
+ .unOrdered()
+ .baselineColumns("val1", "val2");
+
+ for (long i = 0; i < RECORD_COUNT; i++) {
+ testBuilder.baselineValues(i, i + 1);
+ }
+ testBuilder.go();
+ }
+
+ @Test
+ public void testMapSchemaGetByKeyUsingDotNotation() throws Exception {
+ String sql = "select t.map_field.key1 val1, t.map_field.key2 val2 from dfs.`%s` t";
+
+ TestBuilder testBuilder = testBuilder()
+ .sqlQuery(sql, mapTableName)
+ .unOrdered()
+ .baselineColumns("val1", "val2");
+
+ for (long i = 0; i < RECORD_COUNT; i++) {
+ testBuilder.baselineValues(i, i + 1);
+ }
+ testBuilder.go();
+ }
+
+ @Test
+ public void testMapArraySchema() throws Exception {
+ String sql = "select map_array from dfs.`%s`";
+
+ TestBuilder testBuilder = testBuilder()
+ .sqlQuery(sql, mapTableName)
+ .unOrdered()
+ .baselineColumns("map_array");
+
+
+ for (int i = 0; i < RECORD_COUNT; i++) {
+ List<Object> array = listOf();
+ for (int j = 0; j < ARRAY_SIZE; j++) {
+ array.add(mapOfObject(
+ "key1", (i + 1) * (j + 50),
+ "key2", (i + 1) * (j + 100)
+ ));
+ }
+ testBuilder.baselineValues(array);
+ }
+ testBuilder.go();
+ }
+
+ @Test
+ public void testArrayMapSchemaGetElementByIndex() throws Exception {
+ int elementIndex = 1;
+ String sql = "select map_array[%d] element from dfs.`%s`";
+
+ TestBuilder testBuilder = testBuilder()
+ .sqlQuery(sql, elementIndex, mapTableName)
+ .unOrdered()
+ .baselineColumns("element");
+
+ for (int i = 0; i < RECORD_COUNT; i++) {
+ testBuilder.baselineValues(mapOfObject(
+ "key1", (i + 1) * (elementIndex + 50),
+ "key2", (i + 1) * (elementIndex + 100)
+ ));
+ }
+ testBuilder.go();
+ }
+
+ @Test
+ public void testArrayMapSchemaElementGetByKey() throws Exception {
+ int elementIndex = 1;
+ String sql = "select map_array[%d]['key2'] val from dfs.`%s`";
+
+ TestBuilder testBuilder = testBuilder()
+ .sqlQuery(sql, elementIndex, mapTableName)
+ .unOrdered()
+ .baselineColumns("val");
+
+ for (int i = 0; i < RECORD_COUNT; i++) {
+ testBuilder.baselineValues((i + 1) * (elementIndex + 100));
+ }
+ testBuilder.go();
+ }
+
+ @Test
+ public void testMapSchemaArrayValue() throws Exception {
+ String sql = "select map_array_value from dfs.`%s`";
+
+ TestBuilder testBuilder = testBuilder()
+ .sqlQuery(sql, mapTableName)
+ .unOrdered()
+ .baselineColumns("map_array_value");
+
+ for (int i = 0; i < RECORD_COUNT; i++) {
+ List<Object> doubleArray = listOf();
+ for (double j = 0; j < ARRAY_SIZE; j++) {
+ doubleArray.add((double) (i + 1) * j);
+ }
+ testBuilder.baselineValues(mapOfObject("key1", doubleArray, "key2", doubleArray));
+ }
+
+ testBuilder.go();
+ }
+
+ @Test
+ public void testMapSchemaArrayValueGetByKey() throws Exception {
+ String sql = "select map_array_value['key1'] element from dfs.`%s`";
+
+ TestBuilder testBuilder = testBuilder()
+ .sqlQuery(sql, generateMapSchema().getFileName())
+ .unOrdered()
+ .baselineColumns("element");
+
+ for (int i = 0; i < RECORD_COUNT; i++) {
+ List<Object> doubleArray = listOf();
+ for (double j = 0; j < ARRAY_SIZE; j++) {
+ doubleArray.add((double) (i + 1) * j);
+ }
+ testBuilder.baselineValues(doubleArray);
+ }
+
+ testBuilder.go();
+ }
+
+ @Test
+ public void testMapSchemaValueInFilter() throws Exception {
+ String sql = "select map_field['key1'] val from dfs.`%s` where map_field['key1'] < %d";
+
+ long filterValue = RECORD_COUNT / 10;
+ TestBuilder testBuilder = testBuilder()
+ .sqlQuery(sql, mapTableName, filterValue)
+ .unOrdered()
+ .baselineColumns("val");
+
+ for (long i = 0; i < filterValue; i++) {
+ testBuilder.baselineValues(i);
+ }
+ testBuilder.go();
+ }
+
+ @Test
+ public void testMapSchemaValueInFilter2() throws Exception {
+ String sql = "select map_array[%d]['key2'] val from dfs.`%s` where map_array[%d]['key2'] > %d";
+
+ int elementIndex = 1;
+ int startRecord = 5001;
+ int filterValue = 5002 * (elementIndex + 100);
+ TestBuilder testBuilder = testBuilder()
+ .sqlQuery(sql, elementIndex, mapTableName, elementIndex, filterValue)
+ .unOrdered()
+ .baselineColumns("val");
+
+ for (int i = startRecord + 1; i < RECORD_COUNT; i++) {
+ testBuilder.baselineValues((i + 1) * (elementIndex + 100));
+ }
+ testBuilder.go();
+ }
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
index c85cdf6..6f3d19d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java
@@ -742,4 +742,57 @@ public class AvroTestUtil {
return file;
}
+
+ public static AvroTestRecordWriter generateMapSchema() throws Exception {
+ final File file = File.createTempFile("avro-map-test", ".avro", BaseTestQuery.dirTestWatcher.getRootDir());
+ final Schema schema = SchemaBuilder.record("AvroRecordReaderTest")
+ .namespace("org.apache.drill.exec.store.avro")
+ .fields()
+ .name("map_field").type().optional().map().values(Schema.create(Type.LONG))
+ .name("map_array").type().optional().array().items(Schema.createMap(Schema.create(Type.INT)))
+ .name("map_array_value").type().optional().map().values(Schema.createArray(Schema.create(Type.DOUBLE)))
+ .endRecord();
+
+ final Schema mapArraySchema = schema.getField("map_array").schema();
+ final Schema arrayItemSchema = mapArraySchema.getTypes().get(1);
+
+ final AvroTestRecordWriter record = new AvroTestRecordWriter(schema, file);
+ try {
+ for (int i = 0; i < RECORD_COUNT; i++) {
+ record.startRecord();
+
+ // Create map with long values
+ Map<String, Long> map = new HashMap<>();
+ map.put("key1", (long) i);
+ map.put("key2", (long) i + 1);
+ record.put("map_field", map);
+
+ // Create list of map with int values
+ GenericArray<Map<String, Integer>> array = new GenericData.Array<>(ARRAY_SIZE, arrayItemSchema);
+ for (int j = 0; j < ARRAY_SIZE; j++) {
+ Map<String, Integer> mapInt = new HashMap<>();
+ mapInt.put("key1", (i + 1) * (j + 50));
+ mapInt.put("key2", (i + 1) * (j + 100));
+ array.add(mapInt);
+ }
+ record.put("map_array", array);
+
+ // create map with array value
+ Map<String, GenericArray<Double>> mapArrayValue = new HashMap<>();
+ GenericArray<Double> doubleArray = new GenericData.Array<>(ARRAY_SIZE, arrayItemSchema);
+ for (int j = 0; j < ARRAY_SIZE; j++) {
+ doubleArray.add((double) (i + 1) * j);
+ }
+ mapArrayValue.put("key1", doubleArray);
+ mapArrayValue.put("key2", doubleArray);
+ record.put("map_array_value", mapArrayValue);
+
+ record.endRecord();
+ }
+ } finally {
+ record.close();
+ }
+
+ return record;
+ }
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java
index 32ffb68..075f644 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java
@@ -21,6 +21,7 @@ import java.math.BigDecimal;
import org.apache.drill.exec.util.JsonStringArrayList;
import org.apache.drill.test.BaseTestQuery;
+import org.apache.drill.test.TestBuilder;
import org.junit.Test;
public class TestParquetComplex extends BaseTestQuery {
@@ -220,4 +221,618 @@ public class TestParquetComplex extends BaseTestQuery {
.baselineValues(ints, longs, fixedLen, fixedLen)
.go();
}
+
+ @Test
+ public void selectDictBigIntValue() throws Exception {
+ String query = "select order_items from cp.`store/parquet/complex/simple_map.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("order_items")
+ .baselineValues(TestBuilder.mapOfObject("Pencils", 1L))
+ .go();
+ }
+
+ @Test
+ public void selectDictStructValue() throws Exception {
+ String query = "select id, mapcol4 from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "mapcol4")
+ .baselineValues(1,
+ TestBuilder.mapOfObject(
+ 101L,
+ TestBuilder.mapOfObject(false, "item_amount", 1L, "item_type", "pencil"),
+ 102L,
+ TestBuilder.mapOfObject(false, "item_amount", 2L, "item_type", "eraser")
+ )
+ )
+ .baselineValues(2,
+ TestBuilder.mapOfObject(
+ 102L,
+ TestBuilder.mapOfObject(false, "item_amount", 3L, "item_type", "pen"),
+ 103L,
+ TestBuilder.mapOfObject(false, "item_amount", 4L, "item_type", "scissors")
+ )
+ )
+ .baselineValues(3,
+ TestBuilder.mapOfObject(
+ 110L,
+ TestBuilder.mapOfObject(false, "item_amount", 5L, "item_type", "glue"),
+ 113L,
+ TestBuilder.mapOfObject(false, "item_amount", 6L, "item_type", "pencil")
+ )
+ )
+ .baselineValues(4,
+ TestBuilder.mapOfObject(
+ 238L,
+ TestBuilder.mapOfObject(false, "item_amount", 7L, "item_type", "pen"),
+ 239L,
+ TestBuilder.mapOfObject(false, "item_amount", 8L, "item_type", "eraser"),
+ 240L,
+ TestBuilder.mapOfObject(false, "item_amount", 9L, "item_type", "scissors"),
+ 241L,
+ TestBuilder.mapOfObject(false, "item_amount", 10L, "item_type", "glue")
+ )
+ )
+ .baselineValues(5,
+ TestBuilder.mapOfObject(
+ 242L,
+ TestBuilder.mapOfObject(false, "item_amount", 11L, "item_type", "paper"),
+ 243L,
+ TestBuilder.mapOfObject(false, "item_amount", 13L, "item_type", "ink")
+ )
+ )
+ .go();
+ }
+
+ @Test
+ public void selectDictIntArrayValue() throws Exception {
+ String query = "select id, mapcol5 from cp.`store/parquet/complex/map/parquet/000000_0.parquet` order by id asc";
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("id", "mapcol5")
+ .baselineValues(
+ 1, TestBuilder.mapOfObject(
+ 3, TestBuilder.listOf(3, 4, 5),
+ 5, TestBuilder.listOf(5, 3)
+ )
+ )
+ .baselineValues(
+ 2, TestBuilder.mapOfObject(
+ 1, TestBuilder.listOf(1, 2, 3, 4, 5)
+ )
+ )
+ .baselineValues(
+ 3, TestBuilder.mapOfObject(
+ 1, TestBuilder.listOf(1, 2, 3, 4, 5),
+ 2, TestBuilder.listOf(2, 3)
+ )
+ )
+ .baselineValues(
+ 4, TestBuilder.mapOfObject(
+ 1, TestBuilder.listOf(3, 4, 5, 10, -2, -4),
+ 5, TestBuilder.listOf(), // this actually contains a null element
+ -2, TestBuilder.listOf(2, 2, 2, 2),
+ 8, TestBuilder.listOf(2, 2, 3, 4)
+ )
+ )
+ .baselineValues(
+ 5, TestBuilder.mapOfObject(
+ 2, TestBuilder.listOf(5),
+ 3, TestBuilder.listOf(8, -5, 3, 4)
+ )
+ )
+ .go();
+ }
+
+ @Test
+ public void selectDictIntArrayValueGetByKey() throws Exception {
+ String query = "select id, mapcol5[1] as val from cp.`store/parquet/complex/map/parquet/000000_0.parquet` order by id asc";
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("id", "val")
+ .baselineValues(1, TestBuilder.listOf())
+ .baselineValues(2, TestBuilder.listOf(1, 2, 3, 4, 5))
+ .baselineValues(3, TestBuilder.listOf(1, 2, 3, 4, 5))
+ .baselineValues(4, TestBuilder.listOf(3, 4, 5, 10, -2, -4))
+ .baselineValues(5, TestBuilder.listOf())
+ .go();
+ }
+
+ @Test
+ public void selectDictDictValue() throws Exception {
+ String query = "select id, mapcol3 from cp.`store/parquet/complex/map/parquet/000000_0.parquet` order by id asc";
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("id", "mapcol3")
+ .baselineValues(1, TestBuilder.mapOfObject(
+ 3, TestBuilder.mapOfObject("a", 1L, "b", 2L),
+ 4, TestBuilder.mapOfObject("c", 3L),
+ 5, TestBuilder.mapOfObject("d", 4L, "e", 5L)
+ )
+ )
+ .baselineValues(2, TestBuilder.mapOfObject(
+ 1, TestBuilder.mapOfObject("a", 1L, "b", 2L)
+ )
+ )
+ .baselineValues(3, TestBuilder.mapOfObject(
+ 2, TestBuilder.mapOfObject("a", 1L, "b", 2L),
+ 3, TestBuilder.mapOfObject("C", 3L)
+ )
+ )
+ .baselineValues(4, TestBuilder.mapOfObject(
+ 2, TestBuilder.mapOfObject("abc", 1L, "bce", 2L),
+ 4, TestBuilder.mapOfObject("c", 3L, "cf", 6L),
+ 5, TestBuilder.mapOfObject("d", 4L, "eh", 5L),
+ 8, TestBuilder.mapOfObject("d", 32L, "e", -17L)
+ )
+ )
+ .baselineValues(5, TestBuilder.mapOfObject(
+ 1, TestBuilder.mapOfObject("bee", -2L, "awg", 1L),
+ 2, TestBuilder.mapOfObject("cddd", 3L),
+ 4, TestBuilder.mapOfObject("deea", 4L, "eerie", 99L)
+ )
+ )
+ .go();
+ }
+
+ @Test
+ public void selectDictGetByIntKeyComplexValue() throws Exception {
+ String query = "select id, mapcol3[3] as val from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "val")
+ .baselineValues(4, TestBuilder.mapOfObject())
+ .baselineValues(1, TestBuilder.mapOfObject("a", 1L, "b", 2L))
+ .baselineValues(3, TestBuilder.mapOfObject("C", 3L))
+ .baselineValues(2, TestBuilder.mapOfObject())
+ .baselineValues(5, TestBuilder.mapOfObject())
+ .go();
+ }
+
+ @Test
+ public void selectDictGetByStringKey() throws Exception {
+ String query = "select mapcol['a'] val from cp.`store/parquet/complex/map/parquet/000000_0.parquet` order by id asc";
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("val")
+ .baselineValuesForSingleColumn(null, 1, null, 3, 8)
+ .go();
+ }
+
+ @Test
+ public void selectDictGetByStringKey2() throws Exception {
+ String query = "select id, mapcol['b'] val from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "val")
+ .baselineValues(4, 4)
+ .baselineValues(1, 6)
+ .baselineValues(3, null)
+ .baselineValues(2, 2)
+ .baselineValues(5, 6)
+ .go();
+ }
+
+ @Test
+ public void selectDictByKeyComplexValue2() throws Exception {
+ String query = "select id, mapcol3[4]['c'] val from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "val")
+ .baselineValues(4, 3L)
+ .baselineValues(1, 3L)
+ .baselineValues(3, null)
+ .baselineValues(2, null)
+ .baselineValues(5, null)
+ .go();
+ }
+
+ @Test
+ public void selectDictGetByKeyComplexValue3() throws Exception {
+ String query = "select id, mapcol3[3]['b'] val from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "val")
+ .baselineValues(4, null)
+ .baselineValues(1, 2L)
+ .baselineValues(3, null)
+ .baselineValues(2, null)
+ .baselineValues(5, null)
+ .go();
+ }
+
+ @Test
+ public void testDictOrderByAnotherField() throws Exception {
+ String query = "select id, mapcol from cp.`store/parquet/complex/map/parquet/000000_0.parquet` order by id desc";
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("id", "mapcol")
+ .baselineValues(5, TestBuilder.mapOfObject("b", 6, "c", 7, "a", 8, "abc4", 9, "bde", 10))
+ .baselineValues(4, TestBuilder.mapOfObject("a", 3, "b", 4, "c", 5))
+ .baselineValues(3, TestBuilder.mapOfObject("b", null, "c", 8, "d", 9, "e", 10))
+ .baselineValues(2, TestBuilder.mapOfObject("a", 1, "b", 2, "c", 3))
+ .baselineValues(1, TestBuilder.mapOfObject("b", 6, "c", 7))
+ .go();
+ }
+
+ @Test
+ public void testDictWithLimitAndOffset() throws Exception {
+ String query = "select id, mapcol from cp.`store/parquet/complex/map/parquet/000000_0.parquet` order by id desc limit 2 offset 2";
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("id", "mapcol")
+ .baselineValues(3, TestBuilder.mapOfObject("b", null, "c", 8, "d", 9, "e", 10))
+ .baselineValues(2, TestBuilder.mapOfObject("a", 1, "b", 2, "c", 3))
+ .go();
+ }
+
+ @Test
+ public void testDictDictArrayValue() throws Exception {
+ String query = "select id, map_array from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "map_array")
+ .baselineValues(
+ 4,
+ TestBuilder.listOf(
+ TestBuilder.mapOfObject(1L, 2, 10L, 1, 42L, 3, 31L, 4),
+ TestBuilder.mapOfObject(-1L, 2, 3L, 1, 5L, 3, 54L, 4, 55L, 589, -78L, 2),
+ TestBuilder.mapOfObject(1L, 124, 3L, 1, -4L, 2, 19L, 3, 5L, 3, 9L, 1),
+ TestBuilder.mapOfObject(1L, 89, 2L, 1, 3L, 3, 4L, 21, 5L, 12, 6L, 34),
+ TestBuilder.mapOfObject(1L, -25, 3L, 1, 5L, 3, 6L, 2, 9L, 333, 10L, 344),
+ TestBuilder.mapOfObject(3L, 222, 4L, 1, 5L, 3, 6L, 2, 7L, 1, 8L, 3),
+ TestBuilder.mapOfObject(1L, 11, 3L, 12, 5L, 13)
+ )
+ )
+ .baselineValues(
+ 1,
+ TestBuilder.listOf(
+ TestBuilder.mapOfObject(8L, 1, 9L, 2, 523L, 4, 31L, 3),
+ TestBuilder.mapOfObject(1L, 2, 3L, 1, 5L, 3)
+ )
+ )
+ .baselineValues(
+ 3,
+ TestBuilder.listOf(
+ TestBuilder.mapOfObject(3L, 1),
+ TestBuilder.mapOfObject(1L, 2)
+ )
+ )
+ .baselineValues(
+ 2,
+ TestBuilder.listOf(
+ TestBuilder.mapOfObject(1L, 1, 2L, 2)
+ )
+ )
+ .baselineValues(
+ 5,
+ TestBuilder.listOf(
+ TestBuilder.mapOfObject(1L, 1, 2L, 2, 3L, 3, 4L, 4),
+ TestBuilder.mapOfObject(1L, -1, 2L, -2),
+ TestBuilder.mapOfObject(1L, 4, 2L, 5, 3L, 7)
+ )
+ )
+ .go();
+ }
+
+ @Test
+ public void testDictArrayGetElementByIndex() throws Exception {
+ String query = "select id, map_array[0] as element from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "element")
+ .baselineValues(4, TestBuilder.mapOfObject(1L, 2, 10L, 1, 42L, 3, 31L, 4))
+ .baselineValues(1, TestBuilder.mapOfObject(8L, 1, 9L, 2, 523L, 4, 31L, 3))
+ .baselineValues(3, TestBuilder.mapOfObject(3L, 1))
+ .baselineValues(2, TestBuilder.mapOfObject(1L, 1, 2L, 2))
+ .baselineValues(5, TestBuilder.mapOfObject(1L, 1, 2L, 2, 3L, 3, 4L, 4))
+ .go();
+ }
+
+ @Test
+ public void testDictGetByLongKey() throws Exception {
+ String query = "select id, mapcol4[102] as val from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "val")
+ .baselineValues(1, TestBuilder.mapOfObject(false, "item_amount", 2L, "item_type", "eraser"))
+ .baselineValues(2, TestBuilder.mapOfObject(false, "item_amount", 3L, "item_type", "pen"))
+ .baselineValues(3, TestBuilder.mapOfObject())
+ .baselineValues(4, TestBuilder.mapOfObject())
+ .baselineValues(5, TestBuilder.mapOfObject())
+ .go();
+ }
+
+ @Test
+ public void testSelectDictFloatToFloat() throws Exception {
+ String query = "select id, mapcol2 as val from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "val")
+ .baselineValues(4, TestBuilder.mapOfObject(-9.01f, 2.0f, 0.43f, 4.3f))
+ .baselineValues(1, TestBuilder.mapOfObject(1.1f, -1.0f, 2.3f, 2.1f, 3.45f, 3.5f, 4.47f, 4.43f))
+ .baselineValues(3, TestBuilder.mapOfObject(7.9f, 0.43f, 3.1f, 21.1f, 1.1f, 3.53f))
+ .baselineValues(2, TestBuilder.mapOfObject(0.9f, 0.43f, 1.1f, 2.1f, 2.0f, 3.3f))
+ .baselineValues(5, TestBuilder.mapOfObject(1.1f, 255.34f, -2.0f, 24.0f, 45.53f, 78.22f))
+ .go();
+ }
+
+ @Test
+ public void testSelectDictGetByFloatKey() throws Exception {
+ String query = "select id, mapcol2['1.1'] as val from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "val")
+ .baselineValues(4, null)
+ .baselineValues(1, -1.0f)
+ .baselineValues(3, 3.53f)
+ .baselineValues(2, 2.1f)
+ .baselineValues(5, 255.34f)
+ .go();
+ }
+
+ @Test
+ public void testSelectDictGetByNegativeFloatKey() throws Exception {
+ String query = "select id, mapcol2['-9.01'] as val from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "val")
+ .baselineValues(4, 2.0f)
+ .baselineValues(1, null)
+ .baselineValues(3, null)
+ .baselineValues(2, null)
+ .baselineValues(5, null)
+ .go();
+ }
+
+ @Test
+ public void testDictOrderByValue() throws Exception {
+ String query = "select id, mapcol as val from cp.`store/parquet/complex/map/parquet/000000_0.parquet` order by mapcol['a'] desc";
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("id", "val")
+ .baselineValues(1, TestBuilder.mapOfObject("b", 6, "c", 7))
+ .baselineValues(3, TestBuilder.mapOfObject("b", null, "c", 8, "d", 9, "e", 10))
+ .baselineValues(5, TestBuilder.mapOfObject("b", 6, "c", 7, "a", 8, "abc4", 9, "bde", 10))
+ .baselineValues(4, TestBuilder.mapOfObject("a", 3, "b", 4, "c", 5))
+ .baselineValues(2, TestBuilder.mapOfObject("a", 1, "b", 2, "c", 3))
+ .go();
+ }
+
+ @Test
+ public void testDictArrayElementGetByKey() throws Exception {
+ String query = "select map_array[1][5] as val from cp.`store/parquet/complex/map/parquet/000000_0.parquet` order by map_array[1][5] desc";
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("val")
+ .baselineValuesForSingleColumn(null, null, null, 3, 3)
+ .go();
+ }
+
+ @Test
+ public void testDictArrayElementGetByStringKey() throws Exception {
+ String query = "select map_array[1]['1'] as val from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("val")
+ .baselineValuesForSingleColumn(null, 2, 2, null, -1)
+ .go();
+ }
+
+ @Test
+ public void testDictTypeOf() throws Exception {
+ String query = "select typeof(map_array[0]) as type from cp.`store/parquet/complex/map/parquet/000000_0.parquet` limit 1";
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("type")
+ .baselineValuesForSingleColumn("DICT<BIGINT,INT>")
+ .go();
+ }
+
+ @Test
+ public void testDictFlatten() throws Exception {
+ String query = "select id, flatten(mapcol) as flat from cp.`store/parquet/complex/map/parquet/000000_0.parquet` order by id";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "flat")
+ .baselineValues(1, TestBuilder.mapOfObject(false, "key", "b", "value", 6))
+ .baselineValues(1, TestBuilder.mapOfObject(false, "key", "c", "value", 7))
+ .baselineValues(3, TestBuilder.mapOfObject(false, "key", "b")) // "value" == null
+ .baselineValues(3, TestBuilder.mapOfObject(false, "key", "c", "value", 8))
+ .baselineValues(3, TestBuilder.mapOfObject(false, "key", "d", "value", 9))
+ .baselineValues(3, TestBuilder.mapOfObject(false, "key", "e", "value", 10))
+ .baselineValues(5, TestBuilder.mapOfObject(false, "key", "b", "value", 6))
+ .baselineValues(5, TestBuilder.mapOfObject(false, "key", "c", "value", 7))
+ .baselineValues(5, TestBuilder.mapOfObject(false, "key", "a", "value", 8))
+ .baselineValues(5, TestBuilder.mapOfObject(false, "key", "abc4", "value", 9))
+ .baselineValues(5, TestBuilder.mapOfObject(false, "key", "bde", "value", 10))
+ .baselineValues(4, TestBuilder.mapOfObject(false, "key", "a", "value", 3))
+ .baselineValues(4, TestBuilder.mapOfObject(false, "key", "b", "value", 4))
+ .baselineValues(4, TestBuilder.mapOfObject(false, "key", "c", "value", 5))
+ .baselineValues(2, TestBuilder.mapOfObject(false, "key", "a", "value", 1))
+ .baselineValues(2, TestBuilder.mapOfObject(false, "key", "b", "value", 2))
+ .baselineValues(2, TestBuilder.mapOfObject(false, "key", "c", "value", 3))
+ .go();
+ }
+
+ @Test
+ public void testDictArrayFlatten() throws Exception {
+ String query = "select id, flatten(map_array) flat from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "flat")
+ .baselineValues(4, TestBuilder.mapOfObject(1L, 2, 10L, 1, 42L, 3, 31L, 4))
+ .baselineValues(4, TestBuilder.mapOfObject(-1L, 2, 3L, 1, 5L, 3, 54L, 4, 55L, 589, -78L, 2))
+ .baselineValues(4, TestBuilder.mapOfObject(1L, 124, 3L, 1, -4L, 2, 19L, 3, 5L, 3, 9L, 1))
+ .baselineValues(4, TestBuilder.mapOfObject(1L, 89, 2L, 1, 3L, 3, 4L, 21, 5L, 12, 6L, 34))
+ .baselineValues(4, TestBuilder.mapOfObject(1L, -25, 3L, 1, 5L, 3, 6L, 2, 9L, 333, 10L, 344))
+ .baselineValues(4, TestBuilder.mapOfObject(3L, 222, 4L, 1, 5L, 3, 6L, 2, 7L, 1, 8L, 3))
+ .baselineValues(4, TestBuilder.mapOfObject(1L, 11, 3L, 12, 5L, 13))
+ .baselineValues(1, TestBuilder.mapOfObject(8L, 1, 9L, 2, 523L, 4, 31L, 3))
+ .baselineValues(1, TestBuilder.mapOfObject(1L, 2, 3L, 1, 5L, 3))
+ .baselineValues(3, TestBuilder.mapOfObject(3L, 1))
+ .baselineValues(3, TestBuilder.mapOfObject(1L, 2))
+ .baselineValues(2, TestBuilder.mapOfObject(1L, 1, 2L, 2))
+ .baselineValues(5, TestBuilder.mapOfObject(1L, 1, 2L, 2, 3L, 3, 4L, 4))
+ .baselineValues(5, TestBuilder.mapOfObject(1L, -1, 2L, -2))
+ .baselineValues(5, TestBuilder.mapOfObject(1L, 4, 2L, 5, 3L, 7))
+ .go();
+ }
+
+ @Test
+ public void testDictArrayAndElementFlatten() throws Exception {
+ String query = "select id, flatten(flatten(map_array)) flat from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "flat")
+ .baselineValues(4, TestBuilder.mapOf("key", 1L, "value", 2))
+ .baselineValues(4, TestBuilder.mapOf("key", 10L, "value", 1))
+ .baselineValues(4, TestBuilder.mapOf("key", 42L, "value", 3))
+ .baselineValues(4, TestBuilder.mapOf("key", 31L, "value", 4))
+ .baselineValues(4, TestBuilder.mapOf("key", -1L, "value", 2))
+ .baselineValues(4, TestBuilder.mapOf("key", 3L, "value", 1))
+ .baselineValues(4, TestBuilder.mapOf("key", 5L, "value", 3))
+ .baselineValues(4, TestBuilder.mapOf("key", 54L, "value", 4))
+ .baselineValues(4, TestBuilder.mapOf("key", 55L, "value", 589))
+ .baselineValues(4, TestBuilder.mapOf("key", -78L, "value", 2))
+ .baselineValues(4, TestBuilder.mapOf("key", 1L, "value", 124))
+ .baselineValues(4, TestBuilder.mapOf("key", 3L, "value", 1))
+ .baselineValues(4, TestBuilder.mapOf("key", -4L, "value", 2))
+ .baselineValues(4, TestBuilder.mapOf("key", 19L, "value", 3))
+ .baselineValues(4, TestBuilder.mapOf("key", 5L, "value", 3))
+ .baselineValues(4, TestBuilder.mapOf("key", 9L, "value", 1))
+ .baselineValues(4, TestBuilder.mapOf("key", 1L, "value", 89))
+ .baselineValues(4, TestBuilder.mapOf("key", 2L, "value", 1))
+ .baselineValues(4, TestBuilder.mapOf("key", 3L, "value", 3))
+ .baselineValues(4, TestBuilder.mapOf("key", 4L, "value", 21))
+ .baselineValues(4, TestBuilder.mapOf("key", 5L, "value", 12))
+ .baselineValues(4, TestBuilder.mapOf("key", 6L, "value", 34))
+ .baselineValues(4, TestBuilder.mapOf("key", 1L, "value", -25))
+ .baselineValues(4, TestBuilder.mapOf("key", 3L, "value", 1))
+ .baselineValues(4, TestBuilder.mapOf("key", 5L, "value", 3))
+ .baselineValues(4, TestBuilder.mapOf("key", 6L, "value", 2))
+ .baselineValues(4, TestBuilder.mapOf("key", 9L, "value", 333))
+ .baselineValues(4, TestBuilder.mapOf("key", 10L, "value", 344))
+ .baselineValues(4, TestBuilder.mapOf("key", 3L, "value", 222))
+ .baselineValues(4, TestBuilder.mapOf("key", 4L, "value", 1))
+ .baselineValues(4, TestBuilder.mapOf("key", 5L, "value", 3))
+ .baselineValues(4, TestBuilder.mapOf("key", 6L, "value", 2))
+ .baselineValues(4, TestBuilder.mapOf("key", 7L, "value", 1))
+ .baselineValues(4, TestBuilder.mapOf("key", 8L, "value", 3))
+ .baselineValues(4, TestBuilder.mapOf("key", 1L, "value", 11))
+ .baselineValues(4, TestBuilder.mapOf("key", 3L, "value", 12))
+ .baselineValues(4, TestBuilder.mapOf("key", 5L, "value", 13))
+ .baselineValues(1, TestBuilder.mapOf("key", 8L, "value", 1))
+ .baselineValues(1, TestBuilder.mapOf("key", 9L, "value", 2))
+ .baselineValues(1, TestBuilder.mapOf("key", 523L, "value", 4))
+ .baselineValues(1, TestBuilder.mapOf("key", 31L, "value", 3))
+ .baselineValues(1, TestBuilder.mapOf("key", 1L, "value", 2))
+ .baselineValues(1, TestBuilder.mapOf("key", 3L, "value", 1))
+ .baselineValues(1, TestBuilder.mapOf("key", 5L, "value", 3))
+ .baselineValues(3, TestBuilder.mapOf("key", 3L, "value", 1))
+ .baselineValues(3, TestBuilder.mapOf("key", 1L, "value", 2))
+ .baselineValues(2, TestBuilder.mapOf("key", 1L, "value", 1))
+ .baselineValues(2, TestBuilder.mapOf("key", 2L, "value", 2))
+ .baselineValues(5, TestBuilder.mapOf("key", 1L, "value", 1))
+ .baselineValues(5, TestBuilder.mapOf("key", 2L, "value", 2))
+ .baselineValues(5, TestBuilder.mapOf("key", 3L, "value", 3))
+ .baselineValues(5, TestBuilder.mapOf("key", 4L, "value", 4))
+ .baselineValues(5, TestBuilder.mapOf("key", 1L, "value", -1))
+ .baselineValues(5, TestBuilder.mapOf("key", 2L, "value", -2))
+ .baselineValues(5, TestBuilder.mapOf("key", 1L, "value", 4))
+ .baselineValues(5, TestBuilder.mapOf("key", 2L, "value", 5))
+ .baselineValues(5, TestBuilder.mapOf("key", 3L, "value", 7))
+ .go();
+ }
+
+ @Test
+ public void selectDictFlattenListValue() throws Exception {
+ String query = "select id, flatten(mapcol5[1]) as flat from cp.`store/parquet/complex/map/parquet/000000_0.parquet`";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "flat")
+ .baselineValues(2, 1)
+ .baselineValues(2, 2)
+ .baselineValues(2, 3)
+ .baselineValues(2, 4)
+ .baselineValues(2, 5)
+ .baselineValues(3, 1)
+ .baselineValues(3, 2)
+ .baselineValues(3, 3)
+ .baselineValues(3, 4)
+ .baselineValues(3, 5)
+ .baselineValues(4, 3)
+ .baselineValues(4, 4)
+ .baselineValues(4, 5)
+ .baselineValues(4, 10)
+ .baselineValues(4, -2)
+ .baselineValues(4, -4)
+ .go();
+ }
+
+ @Test
+ public void testDictValueInFilter() throws Exception {
+ String query = "select id, mapcol from cp.`store/parquet/complex/map/parquet/000000_0.parquet` where mapcol['c'] > 5";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "mapcol")
+ .baselineValues(1, TestBuilder.mapOfObject("b", 6, "c", 7))
+ .baselineValues(3, TestBuilder.mapOfObject("b", null, "c", 8, "d", 9, "e", 10))
+ .baselineValues(5, TestBuilder.mapOfObject("b", 6, "c", 7, "a", 8, "abc4", 9, "bde", 10))
+ .go();
+ }
+
+ @Test
+ public void testDictValueInFilter2() throws Exception {
+ String query = "select id, mapcol from cp.`store/parquet/complex/map/parquet/000000_0.parquet` where mapcol['a'] is null";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "mapcol")
+ .baselineValues(1, TestBuilder.mapOfObject("b", 6, "c", 7))
+ .baselineValues(3, TestBuilder.mapOfObject("b", null, "c", 8, "d", 9, "e", 10))
+ .go();
+ }
+
+ @Test
+ public void testDictValueInFilter3() throws Exception {
+ String query = "select id, mapcol from cp.`store/parquet/complex/map/parquet/000000_0.parquet` where mapcol['b'] is not null";
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "mapcol")
+ .baselineValues(1, TestBuilder.mapOfObject("b", 6, "c", 7))
+ .baselineValues(5, TestBuilder.mapOfObject("b", 6, "c", 7, "a", 8, "abc4", 9, "bde", 10))
+ .baselineValues(4, TestBuilder.mapOfObject("a", 3, "b", 4, "c", 5))
+ .baselineValues(2, TestBuilder.mapOfObject("a", 1, "b", 2, "c", 3))
+ .go();
+ }
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
index 955a764..5ad5a83 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
@@ -27,6 +27,7 @@ import org.apache.drill.exec.record.BatchSchemaBuilder;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.store.parquet.metadata.Metadata;
import org.apache.drill.exec.store.parquet.metadata.MetadataVersion;
+import org.apache.drill.test.TestBuilder;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;
@@ -1411,4 +1412,33 @@ public class TestParquetMetadataCache extends PlanTestBase {
.go();
checkForMetadataFile(tableName);
}
+
+ @Test
+ public void testRefreshWithDictColumn() throws Exception {
+ test("use dfs");
+
+ String tableName = "parquet_map_ctas";
+ test("create table `%s` as select * from cp.`store/parquet/complex/map/parquet/000000_0.parquet`", tableName);
+
+ String metadataQuery = String.format("refresh table metadata %s", tableName);
+ testBuilder()
+ .sqlQuery(metadataQuery)
+ .unOrdered()
+ .baselineColumns("ok", "summary")
+ .baselineValues(true, String.format("Successfully updated metadata for table %s.", tableName))
+ .go();
+
+ String query = String.format("select id, mapcol from %s where mapcol['b'] is not null", tableName);
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("id", "mapcol")
+ .baselineValues(5, TestBuilder.mapOfObject("b", 6, "c", 7, "a", 8, "abc4", 9, "bde", 10))
+ .baselineValues(4, TestBuilder.mapOfObject("a", 3, "b", 4, "c", 5))
+ .baselineValues(2, TestBuilder.mapOfObject("a", 1, "b", 2, "c", 3))
+ .baselineValues(1, TestBuilder.mapOfObject("b", 6, "c", 7))
+ .go();
+
+ PlanTestBase.testPlanMatchingPatterns(query, "numFiles=1", "usedMetadataFile=true");
+ }
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
index 757b619..0b88670 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
@@ -692,6 +692,41 @@ public class TestBuilder {
}
/**
+ * Convenience method to create an instance of {@link JsonStringHashMap}{@code <Object, Object>} with the given key-value sequence.
+ *
+ * By default, any {@link String} instance will be wrapped by {@link Text} instance. To disable wrapping pass
+ * {@code false} as the first object to key-value sequence.
+ *
+ * @param keyValueSequence sequence of key-value pairs with optional boolean
+ * flag which disables wrapping String instances by {@link Text}.
+ * @return map consisting of entries given in the key-value sequence.
+ */
+ public static JsonStringHashMap<Object, Object> mapOfObject(Object... keyValueSequence) {
+ boolean convertStringToText = true;
+ final int startIndex;
+ if (keyValueSequence.length % 2 == 1) {
+ convertStringToText = (boolean) keyValueSequence[0];
+ startIndex = 1;
+ } else {
+ startIndex = 0;
+ }
+
+ final JsonStringHashMap<Object, Object> map = new JsonStringHashMap<>();
+ for (int i = startIndex; i < keyValueSequence.length; i += 2) {
+ Object key = keyValueSequence[i];
+ if (convertStringToText && key instanceof CharSequence) {
+ key = new Text(key.toString());
+ }
+ Object value = keyValueSequence[i + 1];
+ if (value instanceof CharSequence) {
+ value = new Text(value.toString());
+ }
+ map.put(key, value);
+ }
+ return map;
+ }
+
+ /**
* Helper method for the timestamp values that depend on the local timezone
* @param value expected timestamp value in UTC
* @return timestamp value for the local timezone
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java
index fa61f87..7b2dbd0 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java
@@ -102,6 +102,7 @@ public class TestFillEmpties extends SubOperatorTest {
case LATE:
case LIST:
case MAP:
+ case DICT:
case NULL:
case UNION:
// Writer N/A
diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/map/parquet/000000_0.parquet b/exec/java-exec/src/test/resources/store/parquet/complex/map/parquet/000000_0.parquet
new file mode 100644
index 0000000..5d431e1
Binary files /dev/null and b/exec/java-exec/src/test/resources/store/parquet/complex/map/parquet/000000_0.parquet differ
diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/simple_map.parquet b/exec/java-exec/src/test/resources/store/parquet/complex/simple_map.parquet
new file mode 100644
index 0000000..3b05caf
Binary files /dev/null and b/exec/java-exec/src/test/resources/store/parquet/complex/simple_map.parquet differ
diff --git a/exec/vector/src/main/codegen/includes/vv_imports.ftl b/exec/vector/src/main/codegen/includes/vv_imports.ftl
index 5280dbb..b7bb2b9 100644
--- a/exec/vector/src/main/codegen/includes/vv_imports.ftl
+++ b/exec/vector/src/main/codegen/includes/vv_imports.ftl
@@ -49,6 +49,7 @@ import org.apache.drill.exec.vector.complex.reader.*;
import org.apache.drill.exec.vector.complex.impl.*;
import org.apache.drill.exec.vector.complex.writer.*;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.DictWriter;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter;
import org.apache.drill.exec.util.JsonStringArrayList;
import org.apache.drill.exec.memory.AllocationManager.BufferLedger;
diff --git a/exec/vector/src/main/codegen/templates/AbstractFieldReader.java b/exec/vector/src/main/codegen/templates/AbstractFieldReader.java
index b5ea6de..39420d9 100644
--- a/exec/vector/src/main/codegen/templates/AbstractFieldReader.java
+++ b/exec/vector/src/main/codegen/templates/AbstractFieldReader.java
@@ -42,6 +42,10 @@ public abstract class AbstractFieldReader extends AbstractBaseReader implements
return true;
}
+ public void read(ValueHolder holder) {
+ fail("read(ValueHolder holder)");
+ }
+
<#list ["Object", "BigDecimal", "Integer", "Long", "Boolean",
"Character", "LocalDate", "LocalTime", "LocalDateTime", "Period", "Double", "Float",
"Text", "String", "Byte", "Short", "byte[]"] as friendlyType>
@@ -61,8 +65,9 @@ public abstract class AbstractFieldReader extends AbstractBaseReader implements
</#list>
public void copyAsValue(MapWriter writer) {
- fail("CopyAsValue MapWriter");
+ fail("copyAsValue(MapWriter writer)");
}
+
public void copyAsField(String name, MapWriter writer) {
fail("CopyAsField MapWriter");
}
@@ -103,8 +108,12 @@ public abstract class AbstractFieldReader extends AbstractBaseReader implements
}
</#list></#list>
+ public void copyAsValue(DictWriter writer) {
+ fail("CopyAsValue(DictWriter writer)");
+ }
+
public void read(int arrayIndex, UntypedNullHolder holder) {
- fail("UntypedNullHolder");
+ fail("UntypedNullHolder");
}
public FieldReader reader(String name) {
@@ -122,6 +131,24 @@ public abstract class AbstractFieldReader extends AbstractBaseReader implements
return -1;
}
+ public int find(String key) {
+ fail("find(String key)");
+ return -1;
+ }
+
+ public int find(int key) {
+ fail("find(int key)");
+ return -1;
+ }
+
+ public void read(String key, ValueHolder holder) {
+ fail("read(String key, ValueHolder holder)");
+ }
+
+ public void read(int key, ValueHolder holder) {
+ fail("read(int key, ValueHolder holder)");
+ }
+
private void fail(String name) {
throw new IllegalArgumentException(String.format("You tried to read a [%s] type when you are using a field reader of type [%s].", name, this.getClass().getSimpleName()));
}
diff --git a/exec/vector/src/main/codegen/templates/AbstractFieldWriter.java b/exec/vector/src/main/codegen/templates/AbstractFieldWriter.java
index 9a5bd67..0ba0e9a 100644
--- a/exec/vector/src/main/codegen/templates/AbstractFieldWriter.java
+++ b/exec/vector/src/main/codegen/templates/AbstractFieldWriter.java
@@ -95,6 +95,12 @@ abstract class AbstractFieldWriter extends AbstractBaseWriter implements FieldWr
}
@Override
+ public DictWriter dict() {
+ fail("Dict");
+ return null;
+ }
+
+ @Override
public ListWriter list() {
fail("List");
return null;
@@ -107,6 +113,34 @@ abstract class AbstractFieldWriter extends AbstractBaseWriter implements FieldWr
}
@Override
+ public DictWriter dict(String name) {
+ fail("Dict");
+ return null;
+ }
+
+ @Override
+ public FieldWriter getKeyWriter() {
+ fail("KeyWriter");
+ return null;
+ }
+
+ @Override
+ public FieldWriter getValueWriter() {
+ fail("ValueWriter");
+ return null;
+ }
+
+ @Override
+ public void startKeyValuePair() {
+ fail("startKeyValuePair()");
+ }
+
+ @Override
+ public void endKeyValuePair() {
+ fail("endKeyValuePair()");
+ }
+
+ @Override
public ListWriter list(String name) {
fail("List");
return null;
diff --git a/exec/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/exec/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
index e065057..24d13c8 100644
--- a/exec/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
+++ b/exec/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
@@ -105,6 +105,11 @@ abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter {
}
@Override
+ public DictWriter dict() {
+ return getWriter(MinorType.LIST).dict();
+ }
+
+ @Override
public ListWriter list() {
return getWriter(MinorType.LIST).list();
}
diff --git a/exec/vector/src/main/codegen/templates/BaseReader.java b/exec/vector/src/main/codegen/templates/BaseReader.java
index 508f06c..d54f191 100644
--- a/exec/vector/src/main/codegen/templates/BaseReader.java
+++ b/exec/vector/src/main/codegen/templates/BaseReader.java
@@ -38,6 +38,7 @@ public interface BaseReader extends Positionable{
void read(int index, UnionHolder holder);
void copyAsValue(UnionWriter writer);
boolean isSet();
+ void read(ValueHolder holder);
public interface MapReader extends BaseReader, Iterable<String>{
FieldReader reader(String name);
@@ -48,6 +49,74 @@ public interface BaseReader extends Positionable{
int size();
void copyAsValue(MapWriter writer);
}
+
+ public interface DictReader extends RepeatedMapReader {
+ void copyAsValue(DictWriter writer);
+
+ /**
+ * Obtain the index for given key in current row used to find a corresponding value with.
+ * Used in generated code when retrieving value from Dict with
+ * {@link org.apache.drill.common.expression.PathSegment.NameSegment}
+ * in cases when {@link org.apache.drill.exec.vector.complex.DictVector#getValueType()} is complex.
+ *
+ * <p>Despite {@code key} is passed as {@code String} the value is converted to
+ * actual type based on {@link org.apache.drill.exec.vector.complex.DictVector#getKeyType()}.
+ *
+ * @param key literal representing key value
+ * @return index for the given key
+ * @see org.apache.drill.exec.vector.complex.DictVector
+ */
+ int find(String key);
+
+ /**
+ * Obtain the index for given key in current row used to find a corresponding value with.
+ * Used in generated code when retrieving value from Dict with
+ * {@link org.apache.drill.common.expression.PathSegment.ArraySegment}
+ * in cases when {@link org.apache.drill.exec.vector.complex.DictVector#getValueType()} is complex.
+ *
+ * <p>Despite {@code key} is passed as {@code int} the value is converted to
+ * actual type based on {@link org.apache.drill.exec.vector.complex.DictVector#getKeyType()}.
+ *
+ * @param key literal representing key value
+ * @return index for the given key
+ * @see org.apache.drill.exec.vector.complex.DictVector
+ */
+ int find(int key);
+
+ /**
+ * Reads a value corresponding to a {@code key} into the {@code holder}.
+ * If there is no entry in the row with the given {@code key}, value is set to null.
+ *
+ * <p>Used in generated code when retrieving value from Dict with
+ * {@link org.apache.drill.common.expression.PathSegment.NameSegment}
+ * in cases when {@link org.apache.drill.exec.vector.complex.DictVector#getValueType()} is primitive.
+ *
+ * <p>Despite {@code key} is passed as {@code String} the value is converted to
+ * actual type based on {@link org.apache.drill.exec.vector.complex.DictVector#getKeyType()}.
+ *
+ * @param key literal representing key value
+ * @param holder a holder to write value's value into
+ * @see org.apache.drill.exec.vector.complex.DictVector
+ */
+ void read(String key, ValueHolder holder);
+
+ /**
+ * Reads a value corresponding to a {@code key} into the {@code holder}.
+ * If there is no entry in the row with the given {@code key}, value is set to null.
+ *
+ * <p>Used in generated code when retrieving value from Dict with
+ * {@link org.apache.drill.common.expression.PathSegment.ArraySegment}
+ * in cases when {@link org.apache.drill.exec.vector.complex.DictVector#getValueType()} is primitive.
+ *
+ * <p>Despite {@code key} is passed as {@code int} the value is converted to
+ * actual type based on {@link org.apache.drill.exec.vector.complex.DictVector#getKeyType()}.
+ *
+ * @param key literal representing key value
+ * @param holder a holder to write value's value into
+ * @see org.apache.drill.exec.vector.complex.DictVector
+ */
+ void read(int key, ValueHolder holder);
+ }
public interface ListReader extends BaseReader{
FieldReader reader();
@@ -59,7 +128,7 @@ public interface BaseReader extends Positionable{
void copyAsValue(ListWriter writer);
}
- public interface ScalarReader extends
+ public interface ScalarReader extends
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, </#list></#list>
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> Repeated${name}Reader, </#list></#list>
BaseReader {}
diff --git a/exec/vector/src/main/codegen/templates/BaseWriter.java b/exec/vector/src/main/codegen/templates/BaseWriter.java
index 5ae2648..f6b5dd1 100644
--- a/exec/vector/src/main/codegen/templates/BaseWriter.java
+++ b/exec/vector/src/main/codegen/templates/BaseWriter.java
@@ -64,12 +64,30 @@ package org.apache.drill.exec.vector.complex.writer;
ListWriter list(String name);
void start();
void end();
+ DictWriter dict(String name);
+ }
+
+ public interface DictWriter extends MapWriter {
+ /**
+ * Prepares key and value writers to write new values.
+ * Must be invoked before writing data to these fields.
+ */
+ void startKeyValuePair();
+
+ /**
+ * Finalizes writing values to key and value writers.
+ * Must be invoked after the values for the fields are written.
+ */
+ void endKeyValuePair();
+ FieldWriter getKeyWriter();
+ FieldWriter getValueWriter();
}
public interface ListWriter extends BaseWriter {
void startList();
void endList();
MapWriter map();
+ DictWriter dict();
ListWriter list();
void copyReader(FieldReader reader);
@@ -105,6 +123,8 @@ package org.apache.drill.exec.vector.complex.writer;
void end();
MapOrListWriter map(String name);
MapOrListWriter listoftmap(String name);
+ MapOrListWriter dict(String name);
+ MapOrListWriter listOfDict();
MapOrListWriter list(String name);
boolean isMapWriter();
boolean isListWriter();
diff --git a/exec/vector/src/main/codegen/templates/BasicTypeHelper.java b/exec/vector/src/main/codegen/templates/BasicTypeHelper.java
index b35e189..0995091 100644
--- a/exec/vector/src/main/codegen/templates/BasicTypeHelper.java
+++ b/exec/vector/src/main/codegen/templates/BasicTypeHelper.java
@@ -15,8 +15,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.vector.UntypedNullHolder;
import org.apache.drill.exec.vector.UntypedNullVector;
+import org.apache.drill.exec.vector.complex.DictVector;
import org.apache.drill.exec.vector.complex.impl.UntypedHolderReaderImpl;
<@pp.dropOutputFile />
@@ -33,10 +35,13 @@ import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.vector.complex.RepeatedMapVector;
+import org.apache.drill.exec.vector.complex.DictVector;
import org.apache.drill.exec.util.CallBack;
import org.apache.drill.common.types.Types;
import org.apache.drill.shaded.guava.com.google.common.annotations.VisibleForTesting;
+import java.util.List;
+
/*
* This class is generated using freemarker and the ${.template_name} template.
*/
@@ -86,6 +91,14 @@ public class BasicTypeHelper {
return RepeatedMapVector.class;
}
+ case DICT:
+ switch (mode) {
+ case OPTIONAL:
+ case REQUIRED:
+ return DictVector.class;
+ case REPEATED:
+ return RepeatedDictVector.class;
+ }
case LIST:
switch (mode) {
case REPEATED:
@@ -129,6 +142,17 @@ public class BasicTypeHelper {
case REPEATED:
return RepeatedMapReaderImpl.class;
}
+ case DICT:
+ switch (mode) {
+ case REQUIRED:
+ if (!isSingularRepeated) {
+ return SingleDictReaderImpl.class;
+ } else {
+ throw new UnsupportedOperationException("DictVector required singular repeated reader is not supported yet");
+ }
+ case REPEATED:
+ return RepeatedDictReaderImpl.class;
+ }
case LIST:
switch (mode) {
case REQUIRED:
@@ -160,6 +184,7 @@ public class BasicTypeHelper {
switch (type) {
case UNION: return UnionWriter.class;
case MAP: return MapWriter.class;
+ case DICT: return DictWriter.class;
case LIST: return ListWriter.class;
<#list vv.types as type>
<#list type.minor as minor>
@@ -184,6 +209,14 @@ public class BasicTypeHelper {
case REPEATED:
return RepeatedMapWriter.class;
}
+ case DICT:
+ switch (mode) {
+ case REQUIRED:
+ case OPTIONAL:
+ return SingleDictWriter.class;
+ case REPEATED:
+ return RepeatedDictWriter.class;
+ }
case LIST:
switch (mode) {
case REQUIRED:
@@ -294,6 +327,14 @@ public class BasicTypeHelper {
case REPEATED:
return new RepeatedMapVector(field, allocator, callBack);
}
+ case DICT:
+ switch (type.getMode()) {
+ case REQUIRED:
+ case OPTIONAL:
+ return new DictVector(field, allocator, callBack);
+ case REPEATED:
+ return new RepeatedDictVector(field, allocator, callBack);
+ }
case LIST:
switch (type.getMode()) {
case REPEATED:
diff --git a/exec/vector/src/main/codegen/templates/ComplexCopier.java b/exec/vector/src/main/codegen/templates/ComplexCopier.java
index 256e79e..c61a268 100644
--- a/exec/vector/src/main/codegen/templates/ComplexCopier.java
+++ b/exec/vector/src/main/codegen/templates/ComplexCopier.java
@@ -71,6 +71,21 @@ public class ComplexCopier {
}
writer.end();
break;
+ case DICT:
+ DictWriter wr = (DictWriter) writer;
+ wr.start();
+ if (reader.isSet()) {
+ while (reader.next()) {
+ wr.startKeyValuePair();
+ FieldReader keyReader = reader.reader(DictVector.FIELD_KEY_NAME);
+ FieldReader valueReader = reader.reader(DictVector.FIELD_VALUE_NAME);
+ writeValue(keyReader, getMapWriterForReader(keyReader, writer, DictVector.FIELD_KEY_NAME));
+ writeValue(valueReader, getMapWriterForReader(valueReader, writer, DictVector.FIELD_VALUE_NAME));
+ wr.endKeyValuePair();
+ }
+ }
+ wr.end();
+ break;
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#assign uncappedName = name?uncap_first/>
@@ -108,6 +123,8 @@ public class ComplexCopier {
</#list></#list>
case MAP:
return (FieldWriter) writer.map(name);
+ case DICT:
+ return (FieldWriter) writer.dict(name);
case LIST:
return (FieldWriter) writer.list(name);
default:
diff --git a/exec/vector/src/main/codegen/templates/ComplexReaders.java b/exec/vector/src/main/codegen/templates/ComplexReaders.java
index 77252ad..4d894b1 100644
--- a/exec/vector/src/main/codegen/templates/ComplexReaders.java
+++ b/exec/vector/src/main/codegen/templates/ComplexReaders.java
@@ -134,6 +134,18 @@ public class ${nullMode}${name}ReaderImpl extends AbstractFieldReader {
public void read(${minor.class?cap_first}Holder h) {
vector.getAccessor().get(idx(), h);
}
+
+ public void read(ValueHolder h) {
+ if (h instanceof ${minor.class?cap_first}Holder) {
+ read((${minor.class?cap_first}Holder) h);
+ } else {
+ read((Nullable${minor.class?cap_first}Holder) h);
+ }
+ }
+ <#else>
+ public void read(ValueHolder h) {
+ read((Nullable${minor.class?cap_first}Holder) h);
+ }
</#if>
public void read(Nullable${minor.class?cap_first}Holder h) {
diff --git a/exec/vector/src/main/codegen/templates/ListWriters.java b/exec/vector/src/main/codegen/templates/ListWriters.java
index 904c6c5..ca5af62 100644
--- a/exec/vector/src/main/codegen/templates/ListWriters.java
+++ b/exec/vector/src/main/codegen/templates/ListWriters.java
@@ -43,7 +43,7 @@ public class ${mode}ListWriter extends AbstractFieldWriter {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(${mode}ListWriter.class);
enum Mode {
- INIT, IN_MAP, IN_LIST
+ INIT, IN_MAP, IN_LIST, IN_DICT
<#list vv.types as type><#list type.minor as minor>,
IN_${minor.class?upper_case}</#list></#list> }
@@ -130,6 +130,31 @@ public class ${mode}ListWriter extends AbstractFieldWriter {
}
@Override
+ public DictWriter dict() {
+ switch (mode) {
+ case INIT:
+ final ValueVector oldVector = container.getChild(name);
+ final RepeatedDictVector vector = container.addOrGet(name, RepeatedDictVector.TYPE, RepeatedDictVector.class);
+ innerVector = vector;
+ writer = new RepeatedDictWriter(vector, this);
+ // oldVector will be null if it's first batch being created and it might not be same as newly added vector
+ // if new batch has schema change
+ if (oldVector == null || oldVector != vector) {
+ writer.allocate();
+ }
+ writer.setPosition(${index});
+ mode = Mode.IN_DICT;
+ return writer;
+ case IN_DICT:
+ return writer;
+ default:
+ throw UserException.unsupportedError()
+ .message(getUnsupportedErrorMsg("DICT", mode.name()))
+ .build(logger);
+ }
+ }
+
+ @Override
public ListWriter list() {
switch (mode) {
case INIT:
@@ -247,12 +272,16 @@ public class ${mode}ListWriter extends AbstractFieldWriter {
@Override
public void startList() {
- // noop
+ if (mode == Mode.IN_DICT) {
+ writer.startList();
+ }
}
@Override
public void endList() {
- // noop
+ if (mode == Mode.IN_DICT) {
+ writer.endList();
+ }
}
</#if>
diff --git a/exec/vector/src/main/codegen/templates/MapWriters.java b/exec/vector/src/main/codegen/templates/MapWriters.java
index f6fda90..c41bdac 100644
--- a/exec/vector/src/main/codegen/templates/MapWriters.java
+++ b/exec/vector/src/main/codegen/templates/MapWriters.java
@@ -17,12 +17,19 @@
*/
<@pp.dropOutputFile />
<#list ["Single", "Repeated"] as mode>
-<@pp.changeOutputFile name="/org/apache/drill/exec/vector/complex/impl/${mode}MapWriter.java" />
+
+<#if mode == "Repeated">
+<#assign className = "AbstractRepeatedMapWriter">
+<#else>
+<#assign className = "${mode}MapWriter">
+</#if>
+
+<@pp.changeOutputFile name="/org/apache/drill/exec/vector/complex/impl/${className}.java" />
<#if mode == "Single">
<#assign containerClass = "MapVector" />
<#assign index = "idx()">
<#else>
-<#assign containerClass = "RepeatedMapVector" />
+<#assign containerClass = "T" />
<#assign index = "currentChildIndex">
</#if>
@@ -32,6 +39,7 @@ package org.apache.drill.exec.vector.complex.impl;
<#include "/@includes/vv_imports.ftl" />
import java.util.Map;
+import java.util.HashMap;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.exec.expr.holders.RepeatedMapHolder;
@@ -39,27 +47,29 @@ import org.apache.drill.exec.vector.AllocationHelper;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
import org.apache.drill.exec.vector.complex.writer.FieldWriter;
-import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
-
/*
* This class is generated using FreeMarker and the ${.template_name} template.
*/
@SuppressWarnings("unused")
-public class ${mode}MapWriter extends AbstractFieldWriter {
+<#if mode == "Repeated">
+public abstract class ${className}<${containerClass} extends AbstractRepeatedMapVector> extends AbstractFieldWriter {
+<#else>
+public class ${className} extends AbstractFieldWriter {
+</#if>
protected final ${containerClass} container;
- private final Map<String, FieldWriter> fields = Maps.newHashMap();
- <#if mode == "Repeated">private int currentChildIndex = 0;</#if>
+ <#if mode == "Repeated">protected<#else>private</#if> final Map<String, FieldWriter> fields = new HashMap<>();
+ <#if mode == "Repeated">protected int currentChildIndex = 0;</#if>
private final boolean unionEnabled;
- public ${mode}MapWriter(${containerClass} container, FieldWriter parent, boolean unionEnabled) {
+ public ${className}(${containerClass} container, FieldWriter parent, boolean unionEnabled) {
super(parent);
this.container = container;
this.unionEnabled = unionEnabled;
}
- public ${mode}MapWriter(${containerClass} container, FieldWriter parent) {
+ public ${className}(${containerClass} container, FieldWriter parent) {
this(container, parent, false);
}
@@ -70,7 +80,7 @@ public class ${mode}MapWriter extends AbstractFieldWriter {
@Override
public boolean isEmptyMap() {
- return 0 == container.size();
+ return container.size() == 0;
}
@Override
@@ -80,7 +90,7 @@ public class ${mode}MapWriter extends AbstractFieldWriter {
@Override
public MapWriter map(String name) {
- FieldWriter writer = fields.get(name.toLowerCase());
+ FieldWriter writer = fields.get(name.toLowerCase());
if(writer == null){
int vectorCount=container.size();
MapVector vector = container.addOrGet(name, MapVector.TYPE, MapVector.class);
@@ -99,6 +109,24 @@ public class ${mode}MapWriter extends AbstractFieldWriter {
}
@Override
+ public DictWriter dict(String name) {
+ FieldWriter writer = fields.get(name.toLowerCase());
+ if (writer == null) {
+ int vectorCount = container.size();
+
+ DictVector vector = container.addOrGet(name, DictVector.TYPE, DictVector.class);
+ writer = new SingleDictWriter(vector, this);
+
+ fields.put(name.toLowerCase(), writer);
+ if(vectorCount != container.size()) {
+ writer.allocate();
+ }
+ writer.setPosition(${index});
+ }
+ return writer;
+ }
+
+ @Override
public void close() throws Exception {
clear();
container.close();
@@ -138,34 +166,7 @@ public class ${mode}MapWriter extends AbstractFieldWriter {
}
return writer;
}
-
- <#if mode == "Repeated">
- public void start() {
- // update the repeated vector to state that there is current+1 objects.
- final RepeatedMapHolder h = new RepeatedMapHolder();
- final RepeatedMapVector map = (RepeatedMapVector) container;
- final RepeatedMapVector.Mutator mutator = map.getMutator();
-
- // Make sure that the current vector can support the end position of this list.
- if(container.getValueCapacity() <= idx()) {
- mutator.setValueCount(idx()+1);
- }
-
- map.getAccessor().get(idx(), h);
- if (h.start >= h.end) {
- container.getMutator().startNewValue(idx());
- }
- currentChildIndex = container.getMutator().add(idx());
- for(final FieldWriter w : fields.values()) {
- w.setPosition(currentChildIndex);
- }
- }
-
-
- public void end() {
- // noop
- }
- <#else>
+ <#if mode != "Repeated">
public void setValueCount(int count) {
container.getMutator().setValueCount(count);
@@ -186,7 +187,6 @@ public class ${mode}MapWriter extends AbstractFieldWriter {
@Override
public void end() {
}
-
</#if>
<#list vv.types as type><#list type.minor as minor>
@@ -237,6 +237,5 @@ public class ${mode}MapWriter extends AbstractFieldWriter {
}
</#list></#list>
-
}
</#list>
diff --git a/exec/vector/src/main/codegen/templates/NullReader.java b/exec/vector/src/main/codegen/templates/NullReader.java
index f67ff22..1d1b9bd 100644
--- a/exec/vector/src/main/codegen/templates/NullReader.java
+++ b/exec/vector/src/main/codegen/templates/NullReader.java
@@ -50,6 +50,10 @@ public class NullReader extends AbstractBaseReader implements FieldReader {
public MajorType getType() {
return type;
}
+
+ public void read(ValueHolder holder) {
+ throw new UnsupportedOperationException("NullReader cannot write into non-nullable holder");
+ }
public void copyAsValue(MapWriter writer) {}
@@ -136,7 +140,27 @@ public class NullReader extends AbstractBaseReader implements FieldReader {
return null;
}
</#list>
-
+
+ @Override
+ public void copyAsValue(DictWriter writer) {}
+
+ @Override
+ public int find(String key) {
+ return -1;
+ }
+
+ @Override
+ public int find(int key) {
+ return -1;
+ }
+
+ @Override
+ public void read(String key, ValueHolder holder) {
+ }
+
+ @Override
+ public void read(int key, ValueHolder holder) {
+ }
}
diff --git a/exec/vector/src/main/codegen/templates/RepeatedDictWriter.java b/exec/vector/src/main/codegen/templates/RepeatedDictWriter.java
new file mode 100644
index 0000000..d21336e
--- /dev/null
+++ b/exec/vector/src/main/codegen/templates/RepeatedDictWriter.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.drill.exec.vector.complex.writer.FieldWriter;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/drill/exec/vector/complex/impl/RepeatedDictWriter.java" />
+
+<#include "/@includes/license.ftl" />
+package org.apache.drill.exec.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+public class RepeatedDictWriter extends AbstractFieldWriter implements BaseWriter.DictWriter {
+
+ final RepeatedDictVector container;
+
+ private final SingleDictWriter dictWriter;
+ private int currentChildIndex;
+
+ public RepeatedDictWriter(RepeatedDictVector container, FieldWriter parent) {
+ super(parent);
+ this.container = Preconditions.checkNotNull(container, "Container cannot be null!");
+ this.dictWriter = new SingleDictWriter((DictVector) container.getDataVector(), this);
+ }
+
+ @Override
+ public void allocate() {
+ container.allocateNew();
+ }
+
+ @Override
+ public void clear() {
+ container.clear();
+ }
+
+ @Override
+ public void close() {
+ clear();
+ container.close();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return container.getValueCapacity();
+ }
+
+ public void setValueCount(int count){
+ container.getMutator().setValueCount(count);
+ }
+
+ @Override
+ public void startList() {
+ // make sure that the current vector can support the end position of this list.
+ if (container.getValueCapacity() <= idx()) {
+ container.getMutator().setValueCount(idx() + 1);
+ }
+
+ // update the repeated vector to state that there is current+1 objects.
+ final RepeatedDictHolder h = new RepeatedDictHolder();
+ container.getAccessor().get(idx(), h);
+ if (h.start >= h.end) {
+ container.getMutator().startNewValue(idx());
+ }
+ currentChildIndex = container.getOffsetVector().getAccessor().get(idx());
+ }
+
+ @Override
+ public void endList() {
+ // noop, we initialize state at start rather than end.
+ }
+
+ @Override
+ public MaterializedField getField() {
+ return container.getField();
+ }
+
+ @Override
+ public void start() {
+ currentChildIndex = container.getMutator().add(idx());
+ dictWriter.setPosition(currentChildIndex);
+ dictWriter.start();
+ }
+
+ @Override
+ public void end() {
+ dictWriter.end();
+ }
+
+ @Override
+ public void startKeyValuePair() {
+ dictWriter.startKeyValuePair();
+ }
+
+ @Override
+ public void endKeyValuePair() {
+ dictWriter.endKeyValuePair();
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ return dictWriter.list(name);
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ return dictWriter.map(name);
+ }
+
+ @Override
+ public DictWriter dict(String name) {
+ return dictWriter.dict(name);
+ }
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+
+ @Override
+ public ${minor.class}Writer ${lowerName}(String name) {
+ return (FieldWriter) dictWriter.${lowerName}(name);
+ }
+ <#if minor.class?contains("Decimal") >
+
+ @Override
+ public ${minor.class}Writer ${lowerName}(String name, int scale, int precision) {
+ return (FieldWriter) dictWriter.${lowerName}(name, scale, precision);
+ }
+ </#if>
+ </#list>
+ </#list>
+}
diff --git a/exec/vector/src/main/codegen/templates/UnionReader.java b/exec/vector/src/main/codegen/templates/UnionReader.java
index 84a2327..47957b6 100644
--- a/exec/vector/src/main/codegen/templates/UnionReader.java
+++ b/exec/vector/src/main/codegen/templates/UnionReader.java
@@ -34,14 +34,14 @@ package org.apache.drill.exec.vector.complex.impl;
@SuppressWarnings("unused")
public class UnionReader extends AbstractFieldReader {
- private BaseReader[] readers = new BaseReader[44];
+ private BaseReader[] readers = new BaseReader[45];
public UnionVector data;
public UnionReader(UnionVector data) {
this.data = data;
}
- private static MajorType[] TYPES = new MajorType[44];
+ private static MajorType[] TYPES = new MajorType[45];
static {
for (MinorType minorType : MinorType.values()) {
@@ -77,6 +77,8 @@ public class UnionReader extends AbstractFieldReader {
return NullReader.INSTANCE;
case MinorType.MAP_VALUE:
return (FieldReader) getMap();
+ case MinorType.DICT_VALUE:
+ return (FieldReader) getDict();
case MinorType.LIST_VALUE:
return (FieldReader) getList();
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
@@ -102,6 +104,17 @@ public class UnionReader extends AbstractFieldReader {
return mapReader;
}
+ private SingleDictReaderImpl dictReader;
+
+ private DictReader getDict() {
+ if (dictReader == null) {
+ dictReader = (SingleDictReaderImpl) data.getDict().getReader();
+ dictReader.setPosition(idx());
+ readers[MinorType.DICT_VALUE] = dictReader;
+ }
+ return dictReader;
+ }
+
private UnionListReader listReader;
private FieldReader getList() {
diff --git a/exec/vector/src/main/codegen/templates/UnionVector.java b/exec/vector/src/main/codegen/templates/UnionVector.java
index 9cf672a..a964c95 100644
--- a/exec/vector/src/main/codegen/templates/UnionVector.java
+++ b/exec/vector/src/main/codegen/templates/UnionVector.java
@@ -214,6 +214,14 @@ public class UnionVector implements ValueVector {
return mapVector;
}
+ public DictVector getDict() {
+ DictVector dictVector = subtype(MinorType.DICT);
+ if (dictVector == null) {
+ dictVector = classicAddType(MinorType.DICT, DictVector.class);
+ }
+ return dictVector;
+ }
+
public ListVector getList() {
ListVector listVector = subtype(MinorType.LIST);
if (listVector == null) {
diff --git a/exec/vector/src/main/codegen/templates/UnionWriter.java b/exec/vector/src/main/codegen/templates/UnionWriter.java
index 3e62415..e543bbc 100644
--- a/exec/vector/src/main/codegen/templates/UnionWriter.java
+++ b/exec/vector/src/main/codegen/templates/UnionWriter.java
@@ -156,6 +156,13 @@ public class UnionWriter extends AbstractFieldWriter implements FieldWriter {
}
@Override
+ public DictWriter dict() {
+ data.getMutator().setType(idx(), MinorType.LIST);
+ getListWriter().setPosition(idx());
+ return getListWriter().dict();
+ }
+
+ @Override
public ListWriter list() {
data.getMutator().setType(idx(), MinorType.LIST);
getListWriter().setPosition(idx());
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/DictHolder.java
similarity index 67%
copy from exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java
copy to exec/vector/src/main/java/org/apache/drill/exec/expr/holders/DictHolder.java
index 62012d0..b9c9e39 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/DictHolder.java
@@ -15,13 +15,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.drill.exec.vector.complex.writer;
+package org.apache.drill.exec.expr.holders;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.ScalarWriter;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.vector.complex.DictVector;
-public interface FieldWriter extends MapWriter, ListWriter, ScalarWriter {
- void allocate();
- void clear();
+public final class DictHolder extends RepeatedValueHolder {
+
+ public static final TypeProtos.MajorType TYPE = DictVector.TYPE;
+
+ public int isSet;
+
+ /** The Vector holding the actual values. **/
+ public DictVector vector;
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedDictHolder.java
similarity index 67%
copy from exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java
copy to exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedDictHolder.java
index 62012d0..adb604c 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedDictHolder.java
@@ -15,13 +15,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.drill.exec.vector.complex.writer;
+package org.apache.drill.exec.expr.holders;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.ScalarWriter;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.vector.complex.RepeatedDictVector;
-public interface FieldWriter extends MapWriter, ListWriter, ScalarWriter {
- void allocate();
- void clear();
+public final class RepeatedDictHolder extends RepeatedValueHolder {
+
+ public TypeProtos.MajorType TYPE = RepeatedDictVector.TYPE;
+
+ public RepeatedDictVector vector;
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedListHolder.java b/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedListHolder.java
index ce7e34d..5eeb108 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedListHolder.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedListHolder.java
@@ -20,22 +20,11 @@ package org.apache.drill.exec.expr.holders;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.vector.complex.ListVector;
-import org.apache.drill.exec.vector.complex.reader.FieldReader;
-public final class RepeatedListHolder implements ValueHolder{
+public final class RepeatedListHolder extends RepeatedValueHolder {
- public static final TypeProtos.MajorType TYPE = Types.repeated(TypeProtos.MinorType.LIST);
+ public static final TypeProtos.MajorType TYPE = Types.repeated(TypeProtos.MinorType.LIST);
- public TypeProtos.MajorType getType() {return TYPE;}
-
- /** The first index (inclusive) into the Vector. **/
- public int start;
-
- /** The last index (exclusive) into the Vector. **/
- public int end;
-
- /** The Vector holding the actual values. **/
- public ListVector vector;
-
- public FieldReader reader;
+ /** The Vector holding the actual values. **/
+ public ListVector vector;
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedMapHolder.java b/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedMapHolder.java
index 516d135..3390846 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedMapHolder.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedMapHolder.java
@@ -20,25 +20,11 @@ package org.apache.drill.exec.expr.holders;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.vector.complex.MapVector;
-import org.apache.drill.exec.vector.complex.reader.FieldReader;
-public final class RepeatedMapHolder implements ValueHolder{
+public final class RepeatedMapHolder extends RepeatedValueHolder {
- public static final TypeProtos.MajorType TYPE = Types.repeated(TypeProtos.MinorType.MAP);
-
-// public final LinkedHashSet<ValueHolder> children = null;
-
- public TypeProtos.MajorType getType() {return TYPE;}
-
- /** The first index (inclusive) into the Vector. **/
- public int start;
-
- /** The last index (exclusive) into the Vector. **/
- public int end;
-
- /** The Vector holding the actual values. **/
- public MapVector vector;
-
- public FieldReader reader;
+ public static final TypeProtos.MajorType TYPE = Types.repeated(TypeProtos.MinorType.MAP);
+ /** The Vector holding the actual values. **/
+ public MapVector vector;
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedValueHolder.java
similarity index 67%
copy from exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java
copy to exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedValueHolder.java
index 62012d0..f74742f 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/expr/holders/RepeatedValueHolder.java
@@ -15,13 +15,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.drill.exec.vector.complex.writer;
+package org.apache.drill.exec.expr.holders;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.ScalarWriter;
+import org.apache.drill.exec.vector.complex.reader.FieldReader;
-public interface FieldWriter extends MapWriter, ListWriter, ScalarWriter {
- void allocate();
- void clear();
+public abstract class RepeatedValueHolder implements ValueHolder {
+
+ /** The first index (inclusive) into the Vector. **/
+ public int start;
+
+ /** The last index (exclusive) into the Vector. **/
+ public int end;
+
+ public FieldReader reader;
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java
index 0eaaf3a..69926c4 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java
@@ -147,6 +147,11 @@ public abstract class AbstractColumnMetadata extends AbstractPropertied implemen
public boolean isMultiList() { return false; }
@Override
+ public boolean isDict() {
+ return false;
+ }
+
+ @Override
public TupleMetadata mapSchema() { return null; }
@Override
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractMapColumnMetadata.java
similarity index 55%
copy from exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java
copy to exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractMapColumnMetadata.java
index f9c05f4..1ae0007 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractMapColumnMetadata.java
@@ -25,87 +25,82 @@ import org.apache.drill.exec.record.MaterializedField;
import java.util.stream.Collectors;
/**
- * Describes a map and repeated map. Both are tuples that have a tuple
+ * Describes a base column type for map, dict, repeated map and repeated dict. All are tuples that have a tuple
* schema as part of the column definition.
*/
-public class MapColumnMetadata extends AbstractColumnMetadata {
+public abstract class AbstractMapColumnMetadata extends AbstractColumnMetadata {
- private TupleMetadata parentTuple;
- private final TupleSchema mapSchema;
+ protected TupleMetadata parentTuple;
+ protected final TupleSchema schema;
/**
* Build a new map column from the field provided
*
* @param schema materialized field description of the map
*/
- public MapColumnMetadata(MaterializedField schema) {
+ public AbstractMapColumnMetadata(MaterializedField schema) {
this(schema, null);
}
/**
- * Build a map column metadata by cloning the type information (but not
+ * Build column metadata by cloning the type information (but not
* the children) of the materialized field provided.
*
* @param schema the schema to use
* @param mapSchema parent schema
*/
- MapColumnMetadata(MaterializedField schema, TupleSchema mapSchema) {
+ AbstractMapColumnMetadata(MaterializedField schema, TupleSchema mapSchema) {
super(schema);
if (mapSchema == null) {
- this.mapSchema = new TupleSchema();
+ this.schema = new TupleSchema();
} else {
- this.mapSchema = mapSchema;
+ this.schema = mapSchema;
}
- this.mapSchema.bind(this);
+ this.schema.bind(this);
}
- public MapColumnMetadata(MapColumnMetadata from) {
+ public AbstractMapColumnMetadata(AbstractMapColumnMetadata from) {
super(from);
- mapSchema = (TupleSchema) from.mapSchema.copy();
+ schema = (TupleSchema) from.schema.copy();
}
- public MapColumnMetadata(String name, DataMode mode, TupleSchema mapSchema) {
- super(name, MinorType.MAP, mode);
- if (mapSchema == null) {
- this.mapSchema = new TupleSchema();
+ public AbstractMapColumnMetadata(String name, MinorType type, DataMode mode, TupleSchema schema) {
+ super(name, type, mode);
+ if (schema == null) {
+ this.schema = new TupleSchema();
} else {
- this.mapSchema = mapSchema;
+ this.schema = schema;
}
}
@Override
- public ColumnMetadata copy() {
- return new MapColumnMetadata(this);
- }
-
- @Override
public void bind(TupleMetadata parentTuple) {
this.parentTuple = parentTuple;
}
@Override
- public ColumnMetadata.StructureType structureType() { return ColumnMetadata.StructureType.TUPLE; }
-
- @Override
- public TupleMetadata mapSchema() { return mapSchema; }
+ public ColumnMetadata.StructureType structureType() {
+ return ColumnMetadata.StructureType.TUPLE;
+ }
@Override
- public int expectedWidth() { return 0; }
+ public TupleMetadata mapSchema() {
+ return schema;
+ }
@Override
- public boolean isMap() { return true; }
-
- public TupleMetadata parentTuple() { return parentTuple; }
+ public int expectedWidth() {
+ return 0;
+ }
- @Override
- public ColumnMetadata cloneEmpty() {
- return new MapColumnMetadata(name, mode, new TupleSchema());
+ public TupleMetadata parentTuple() {
+ return parentTuple;
}
@Override
public MaterializedField schema() {
MaterializedField field = emptySchema();
- for (MaterializedField member : mapSchema.toFieldList()) {
+ for (MaterializedField member : schema.toFieldList()) {
field.addChild(member);
}
return field;
@@ -113,11 +108,11 @@ public class MapColumnMetadata extends AbstractColumnMetadata {
@Override
public MaterializedField emptySchema() {
- return MaterializedField.create(name,
+ return MaterializedField.create(name,
MajorType.newBuilder()
- .setMinorType(type)
- .setMode(mode)
- .build());
+ .setMinorType(type)
+ .setMode(mode)
+ .build());
}
@Override
@@ -126,14 +121,22 @@ public class MapColumnMetadata extends AbstractColumnMetadata {
if (isArray()) {
builder.append("ARRAY<");
}
- builder.append("STRUCT<");
- builder.append(mapSchema().toMetadataList().stream()
- .map(ColumnMetadata::columnString)
- .collect(Collectors.joining(", ")));
- builder.append(">");
+ builder.append(getStringType())
+ .append("<").append(
+ mapSchema().toMetadataList().stream()
+ .map(ColumnMetadata::columnString)
+ .collect(Collectors.joining(", "))
+ )
+ .append(">");
if (isArray()) {
builder.append(">");
}
return builder.toString();
}
+
+ /**
+ * Returns string representation of type like {@code "STRUCT"} or {@code "MAP"}
+ * @return column type
+ */
+ protected abstract String getStringType();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java
index c72de0b..c8bc0fd 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java
@@ -168,6 +168,7 @@ public interface ColumnMetadata extends Propertied {
boolean isVariableWidth();
boolean isMap();
boolean isVariant();
+ boolean isDict();
/**
* Determine if the schema represents a column with a LIST type with
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/DictColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/DictColumnMetadata.java
new file mode 100644
index 0000000..967792e
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/DictColumnMetadata.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.record.metadata;
+
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.record.MaterializedField;
+
+public class DictColumnMetadata extends AbstractMapColumnMetadata {
+
+ /**
+ * Build a new dict column from the field provided
+ *
+ * @param schema materialized field description of the map
+ */
+ public DictColumnMetadata(MaterializedField schema) {
+ this(schema, null);
+ }
+
+ /**
+ * Build a dict column metadata by cloning the type information (but not
+ * the children) of the materialized field provided.
+ *
+ * @param schema the schema to use
+ * @param mapSchema parent schema
+ */
+ DictColumnMetadata(MaterializedField schema, TupleSchema mapSchema) {
+ super(schema, mapSchema);
+ }
+
+ public DictColumnMetadata(DictColumnMetadata from) {
+ super(from);
+ }
+
+ public DictColumnMetadata(String name, TypeProtos.DataMode mode, TupleSchema mapSchema) {
+ super(name, TypeProtos.MinorType.DICT, mode, mapSchema);
+ }
+
+ @Override
+ public ColumnMetadata copy() {
+ return new DictColumnMetadata(this);
+ }
+
+ @Override
+ public ColumnMetadata cloneEmpty() {
+ return new DictColumnMetadata(name, mode, new TupleSchema());
+ }
+
+ @Override
+ public boolean isDict() {
+ return true;
+ }
+
+ @Override
+ protected String getStringType() {
+ return "MAP";
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java
index f9c05f4..1efea91 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java
@@ -18,20 +18,14 @@
package org.apache.drill.exec.record.metadata;
import org.apache.drill.common.types.TypeProtos.DataMode;
-import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.MaterializedField;
-import java.util.stream.Collectors;
-
/**
* Describes a map and repeated map. Both are tuples that have a tuple
* schema as part of the column definition.
*/
-public class MapColumnMetadata extends AbstractColumnMetadata {
-
- private TupleMetadata parentTuple;
- private final TupleSchema mapSchema;
+public class MapColumnMetadata extends AbstractMapColumnMetadata {
/**
* Build a new map column from the field provided
@@ -47,30 +41,18 @@ public class MapColumnMetadata extends AbstractColumnMetadata {
* the children) of the materialized field provided.
*
* @param schema the schema to use
- * @param mapSchema parent schema
+ * @param tupleSchema parent schema
*/
- MapColumnMetadata(MaterializedField schema, TupleSchema mapSchema) {
- super(schema);
- if (mapSchema == null) {
- this.mapSchema = new TupleSchema();
- } else {
- this.mapSchema = mapSchema;
- }
- this.mapSchema.bind(this);
+ MapColumnMetadata(MaterializedField schema, TupleSchema tupleSchema) {
+ super(schema, tupleSchema);
}
public MapColumnMetadata(MapColumnMetadata from) {
super(from);
- mapSchema = (TupleSchema) from.mapSchema.copy();
}
- public MapColumnMetadata(String name, DataMode mode, TupleSchema mapSchema) {
- super(name, MinorType.MAP, mode);
- if (mapSchema == null) {
- this.mapSchema = new TupleSchema();
- } else {
- this.mapSchema = mapSchema;
- }
+ public MapColumnMetadata(String name, DataMode mode, TupleSchema tupleSchema) {
+ super(name, MinorType.MAP, mode, tupleSchema);
}
@Override
@@ -79,61 +61,17 @@ public class MapColumnMetadata extends AbstractColumnMetadata {
}
@Override
- public void bind(TupleMetadata parentTuple) {
- this.parentTuple = parentTuple;
- }
-
- @Override
- public ColumnMetadata.StructureType structureType() { return ColumnMetadata.StructureType.TUPLE; }
-
- @Override
- public TupleMetadata mapSchema() { return mapSchema; }
-
- @Override
- public int expectedWidth() { return 0; }
-
- @Override
- public boolean isMap() { return true; }
-
- public TupleMetadata parentTuple() { return parentTuple; }
-
- @Override
public ColumnMetadata cloneEmpty() {
return new MapColumnMetadata(name, mode, new TupleSchema());
}
@Override
- public MaterializedField schema() {
- MaterializedField field = emptySchema();
- for (MaterializedField member : mapSchema.toFieldList()) {
- field.addChild(member);
- }
- return field;
- }
-
- @Override
- public MaterializedField emptySchema() {
- return MaterializedField.create(name,
- MajorType.newBuilder()
- .setMinorType(type)
- .setMode(mode)
- .build());
+ public boolean isMap() {
+ return true;
}
@Override
- public String typeString() {
- StringBuilder builder = new StringBuilder();
- if (isArray()) {
- builder.append("ARRAY<");
- }
- builder.append("STRUCT<");
- builder.append(mapSchema().toMetadataList().stream()
- .map(ColumnMetadata::columnString)
- .collect(Collectors.joining(", ")));
- builder.append(">");
- if (isArray()) {
- builder.append(">");
- }
- return builder.toString();
+ protected String getStringType() {
+ return "STRUCT";
}
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java
index 0ded113..43b38c6 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java
@@ -49,6 +49,8 @@ public class MetadataUtils {
MajorType majorType = field.getType();
MinorType type = majorType.getMinorType();
switch (type) {
+ case DICT:
+ return MetadataUtils.newDict(field);
case MAP:
return MetadataUtils.newMap(field);
case UNION:
@@ -132,6 +134,14 @@ public class MetadataUtils {
return new MapColumnMetadata(name, DataMode.REQUIRED, (TupleSchema) schema);
}
+ public static DictColumnMetadata newDict(MaterializedField field) {
+ return new DictColumnMetadata(field, fromFields(field.getChildren()));
+ }
+
+ public static DictColumnMetadata newDict(String name, TupleMetadata schema) {
+ return new DictColumnMetadata(name, DataMode.OPTIONAL, (TupleSchema) schema);
+ }
+
public static VariantColumnMetadata newVariant(MaterializedField field, VariantSchema schema) {
return new VariantColumnMetadata(field, schema);
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/TupleSchema.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/TupleSchema.java
index b7c1208..97304c5 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/TupleSchema.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/TupleSchema.java
@@ -48,7 +48,7 @@ public class TupleSchema extends AbstractPropertied implements TupleMetadata {
public static final String TYPE = "tuple_schema";
- private MapColumnMetadata parentMap;
+ private AbstractMapColumnMetadata parentMap;
private final TupleNameSpace<ColumnMetadata> nameSpace = new TupleNameSpace<>();
public TupleSchema() { }
@@ -62,7 +62,7 @@ public class TupleSchema extends AbstractPropertied implements TupleMetadata {
setProperties(properties);
}
- public void bind(MapColumnMetadata parentMap) {
+ public void bind(AbstractMapColumnMetadata parentMap) {
this.parentMap = parentMap;
}
@@ -132,7 +132,7 @@ public class TupleSchema extends AbstractPropertied implements TupleMetadata {
}
@Override
- public MapColumnMetadata parent() { return parentMap; }
+ public AbstractMapColumnMetadata parent() { return parentMap; }
@Override
public int size() { return nameSpace.count(); }
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/util/Text.java b/exec/vector/src/main/java/org/apache/drill/exec/util/Text.java
index 03caa09..d8cf69b 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/util/Text.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/util/Text.java
@@ -316,7 +316,7 @@ public class Text {
@Override
public int hashCode() {
- return super.hashCode();
+ return Arrays.hashCode(getBytes());
}
// / STATIC UTILITIES FROM HERE DOWN
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java
index ae15e5d..c676c07 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java
@@ -47,6 +47,7 @@ public class ColumnReaderFactory {
case NULL:
case LIST:
case MAP:
+ case DICT:
throw new UnsupportedOperationException(type.toString());
default:
return newAccessor(type, requiredReaders);
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java
index 557336f..93f82ee 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java
@@ -72,6 +72,7 @@ public class ColumnWriterFactory {
case NULL:
case LIST:
case MAP:
+ case DICT:
case UNION:
throw new UnsupportedOperationException(schema.type().toString());
default:
@@ -127,6 +128,7 @@ public class ColumnWriterFactory {
case LATE:
case LIST:
case MAP:
+ case DICT:
case UNION:
throw new UnsupportedOperationException(schema.type().toString());
default:
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
index 4ddb85a..f5a033c 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
@@ -37,7 +37,7 @@ import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
/**
- * Base class for MapVectors. Currently used by RepeatedMapVector and MapVector
+ * Base class for MapVectors. Currently used by AbstractRepeatedMapVector and MapVector
*/
public abstract class AbstractMapVector extends AbstractContainerVector {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class);
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractRepeatedMapVector.java
similarity index 58%
copy from exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
copy to exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractRepeatedMapVector.java
index 2b00382..9e1da48 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractRepeatedMapVector.java
@@ -21,53 +21,37 @@ import io.netty.buffer.DrillBuf;
import java.util.Iterator;
import java.util.List;
-import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.ArrayUtils;
-import org.apache.drill.common.types.TypeProtos.DataMode;
-import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.expr.BasicTypeHelper;
-import org.apache.drill.exec.expr.holders.ComplexHolder;
-import org.apache.drill.exec.expr.holders.RepeatedMapHolder;
+import org.apache.drill.exec.expr.holders.RepeatedValueHolder;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.memory.AllocationManager.BufferLedger;
import org.apache.drill.exec.proto.UserBitShared.SerializedField;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.TransferPair;
import org.apache.drill.exec.util.CallBack;
-import org.apache.drill.exec.util.JsonStringArrayList;
import org.apache.drill.exec.vector.AddOrGetResult;
import org.apache.drill.exec.vector.AllocationHelper;
+import org.apache.drill.exec.vector.SchemaChangeCallBack;
import org.apache.drill.exec.vector.UInt4Vector;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.VectorDescriptor;
-import org.apache.drill.exec.vector.SchemaChangeCallBack;
-import org.apache.drill.exec.vector.complex.impl.NullReader;
-import org.apache.drill.exec.vector.complex.impl.RepeatedMapReaderImpl;
-import org.apache.drill.exec.vector.complex.reader.FieldReader;
-
-import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
-public class RepeatedMapVector extends AbstractMapVector
- implements RepeatedValueVector {
+public abstract class AbstractRepeatedMapVector extends AbstractMapVector implements RepeatedValueVector {
- public final static MajorType TYPE = MajorType.newBuilder().setMinorType(MinorType.MAP).setMode(DataMode.REPEATED).build();
+ protected final UInt4Vector offsets; // offsets to start of each record (considering record indices are 0-indexed)
+ protected final EmptyValuePopulator emptyPopulator;
- private final UInt4Vector offsets; // offsets to start of each record (considering record indices are 0-indexed)
- private final RepeatedMapReaderImpl reader = new RepeatedMapReaderImpl(RepeatedMapVector.this);
- private final RepeatedMapAccessor accessor = new RepeatedMapAccessor();
- private final Mutator mutator = new Mutator();
- private final EmptyValuePopulator emptyPopulator;
-
- public RepeatedMapVector(MaterializedField field, BufferAllocator allocator, CallBack callBack) {
+ protected AbstractRepeatedMapVector(MaterializedField field, BufferAllocator allocator, CallBack callBack) {
this(field, new UInt4Vector(BaseRepeatedValueVector.OFFSETS_FIELD, allocator), callBack);
}
- public RepeatedMapVector(MaterializedField field, UInt4Vector offsets, CallBack callBack) {
+ protected AbstractRepeatedMapVector(MaterializedField field, UInt4Vector offsets, CallBack callBack) {
super(field, offsets.getAllocator(), callBack);
this.offsets = offsets;
this.emptyPopulator = new EmptyValuePopulator(offsets);
@@ -89,14 +73,11 @@ public class RepeatedMapVector extends AbstractMapVector
@Override
public void setInitialCapacity(int numRecords) {
offsets.setInitialCapacity(numRecords + 1);
- for (final ValueVector v : this) {
+ for (ValueVector v : this) {
v.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD);
}
}
- @Override
- public RepeatedMapReaderImpl getReader() { return reader; }
-
public void allocateNew(int groupCount, int innerValueCount) {
clear();
try {
@@ -104,11 +85,11 @@ public class RepeatedMapVector extends AbstractMapVector
for (ValueVector v : getChildren()) {
AllocationHelper.allocatePrecomputedChildCount(v, groupCount, 50, innerValueCount);
}
- } catch (OutOfMemoryException e){
+ } catch (OutOfMemoryException e) {
clear();
throw e;
}
- mutator.reset();
+ getMutator().reset();
}
public void allocateOffsetsNew(int groupCount) {
@@ -122,7 +103,7 @@ public class RepeatedMapVector extends AbstractMapVector
@Override
public List<ValueVector> getPrimitiveVectors() {
- final List<ValueVector> primitiveVectors = super.getPrimitiveVectors();
+ List<ValueVector> primitiveVectors = super.getPrimitiveVectors();
primitiveVectors.add(offsets);
return primitiveVectors;
}
@@ -141,13 +122,13 @@ public class RepeatedMapVector extends AbstractMapVector
}
@Override
- public int getBufferSizeFor(final int valueCount) {
+ public int getBufferSizeFor(int valueCount) {
if (valueCount == 0) {
return 0;
}
long bufferSize = offsets.getBufferSizeFor(valueCount);
- for (final ValueVector v : this) {
+ for (ValueVector v : this) {
bufferSize += v.getBufferSizeFor(valueCount);
}
@@ -160,61 +141,11 @@ public class RepeatedMapVector extends AbstractMapVector
super.close();
}
- @Override
- public TransferPair getTransferPair(BufferAllocator allocator) {
- return new RepeatedMapTransferPair(this, getField().getName(), allocator);
- }
-
- @Override
- public TransferPair makeTransferPair(ValueVector to) {
- return new RepeatedMapTransferPair(this, (RepeatedMapVector)to);
- }
-
- MapSingleCopier makeSingularCopier(MapVector to) {
- return new MapSingleCopier(this, to);
- }
-
- protected static class MapSingleCopier {
- private final TransferPair[] pairs;
- public final RepeatedMapVector from;
-
- public MapSingleCopier(RepeatedMapVector from, MapVector to) {
- this.from = from;
- this.pairs = new TransferPair[from.size()];
-
- int i = 0;
- ValueVector vector;
- for (final String child:from.getChildFieldNames()) {
- int preSize = to.size();
- vector = from.getChild(child);
- if (vector == null) {
- continue;
- }
- final ValueVector newVector = to.addOrGet(child, vector.getField().getType(), vector.getClass());
- if (to.size() != preSize) {
- newVector.allocateNew();
- }
- pairs[i++] = vector.makeTransferPair(newVector);
- }
- }
-
- public void copySafe(int fromSubIndex, int toIndex) {
- for (TransferPair p : pairs) {
- p.copyValueSafe(fromSubIndex, toIndex);
- }
- }
- }
-
public TransferPair getTransferPairToSingleMap(String reference, BufferAllocator allocator) {
return new SingleMapTransferPair(this, reference, allocator);
}
@Override
- public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
- return new RepeatedMapTransferPair(this, ref, allocator);
- }
-
- @Override
public boolean allocateNewSafe() {
/* boolean to keep track if all the memory allocation were successful
* Used in the case of composite vectors when we need to allocate multiple
@@ -236,47 +167,46 @@ public class RepeatedMapVector extends AbstractMapVector
return success;
}
- protected static class SingleMapTransferPair implements TransferPair {
- private final TransferPair[] pairs;
- private final RepeatedMapVector from;
- private final MapVector to;
- private static final MajorType MAP_TYPE = Types.required(MinorType.MAP);
+ abstract class AbstractRepeatedMapTransferPair<T extends AbstractRepeatedMapVector> implements TransferPair {
- public SingleMapTransferPair(RepeatedMapVector from, String path, BufferAllocator allocator) {
- this(from, new MapVector(MaterializedField.create(path, MAP_TYPE), allocator, new SchemaChangeCallBack()), false);
- }
+ protected final T to;
+ protected final T from;
+ private final TransferPair[] pairs;
- public SingleMapTransferPair(RepeatedMapVector from, MapVector to) {
- this(from, to, true);
+ public AbstractRepeatedMapTransferPair(T to) {
+ this(to, true);
}
- public SingleMapTransferPair(RepeatedMapVector from, MapVector to, boolean allocate) {
- this.from = from;
+ @SuppressWarnings("unchecked")
+ public AbstractRepeatedMapTransferPair(T to, boolean allocate) {
+ this.from = (T) AbstractRepeatedMapVector.this;
this.to = to;
this.pairs = new TransferPair[from.size()];
+
int i = 0;
ValueVector vector;
- for (final String child : from.getChildFieldNames()) {
+ for (String child : from.getChildFieldNames()) {
int preSize = to.size();
vector = from.getChild(child);
if (vector == null) {
continue;
}
- final ValueVector newVector = to.addOrGet(child, vector.getField().getType(), vector.getClass());
+
+ ValueVector newVector = to.addOrGet(child, vector.getField().getType(), vector.getClass());
if (allocate && to.size() != preSize) {
newVector.allocateNew();
}
+
pairs[i++] = vector.makeTransferPair(newVector);
}
}
-
@Override
public void transfer() {
+ from.offsets.transferTo(to.offsets);
for (TransferPair p : pairs) {
p.transfer();
}
- to.getMutator().setValueCount(from.getAccessor().getValueCount());
from.clear();
}
@@ -286,65 +216,84 @@ public class RepeatedMapVector extends AbstractMapVector
}
@Override
- public void copyValueSafe(int from, int to) {
- for (TransferPair p : pairs) {
- p.copyValueSafe(from, to);
+ public void copyValueSafe(int srcIndex, int destIndex) {
+ RepeatedValueHolder holder = getValueHolder();
+ from.getAccessor().get(srcIndex, holder);
+ to.emptyPopulator.populate(destIndex + 1);
+ int newIndex = to.offsets.getAccessor().get(destIndex);
+ for (int i = holder.start; i < holder.end; i++, newIndex++) {
+ for (TransferPair p : pairs) {
+ p.copyValueSafe(i, newIndex);
+ }
}
+ to.offsets.getMutator().setSafe(destIndex + 1, newIndex);
}
@Override
- public void splitAndTransfer(int startIndex, int length) {
+ public void splitAndTransfer(int groupStart, int groups) {
+ UInt4Vector.Accessor a = from.offsets.getAccessor();
+ UInt4Vector.Mutator m = to.offsets.getMutator();
+
+ int startPos = a.get(groupStart);
+ int endPos = a.get(groupStart + groups);
+ int valuesToCopy = endPos - startPos;
+
+ to.offsets.clear();
+ to.offsets.allocateNew(groups + 1);
+
+ int normalizedPos;
+ for (int i = 0; i < groups + 1; i++) {
+ normalizedPos = a.get(groupStart + i) - startPos;
+ m.set(i, normalizedPos);
+ }
+
+ m.setValueCount(groups + 1);
+ to.emptyPopulator.populate(groups);
+
for (TransferPair p : pairs) {
- p.splitAndTransfer(startIndex, length);
+ p.splitAndTransfer(startPos, valuesToCopy);
}
- to.getMutator().setValueCount(length);
}
}
- private static class RepeatedMapTransferPair implements TransferPair{
+ static class SingleMapTransferPair implements TransferPair {
- private final TransferPair[] pairs;
- private final RepeatedMapVector to;
- private final RepeatedMapVector from;
+ private static final TypeProtos.MajorType MAP_TYPE = Types.required(TypeProtos.MinorType.MAP);
- public RepeatedMapTransferPair(RepeatedMapVector from, String path, BufferAllocator allocator) {
- this(from, new RepeatedMapVector(MaterializedField.create(path, TYPE), allocator, new SchemaChangeCallBack()), false);
- }
+ private final TransferPair[] pairs;
+ private final AbstractRepeatedMapVector from;
+ private final MapVector to;
- public RepeatedMapTransferPair(RepeatedMapVector from, RepeatedMapVector to) {
- this(from, to, true);
+ public SingleMapTransferPair(AbstractRepeatedMapVector from, String path, BufferAllocator allocator) {
+ this(from, new MapVector(MaterializedField.create(path, MAP_TYPE), allocator, new SchemaChangeCallBack()), false);
}
- public RepeatedMapTransferPair(RepeatedMapVector from, RepeatedMapVector to, boolean allocate) {
+ public SingleMapTransferPair(AbstractRepeatedMapVector from, MapVector to, boolean allocate) {
this.from = from;
this.to = to;
this.pairs = new TransferPair[from.size()];
- this.to.ephPair = null;
-
int i = 0;
ValueVector vector;
- for (final String child : from.getChildFieldNames()) {
- final int preSize = to.size();
+ for (String child : from.getChildFieldNames()) {
+ int preSize = to.size();
vector = from.getChild(child);
if (vector == null) {
continue;
}
-
- final ValueVector newVector = to.addOrGet(child, vector.getField().getType(), vector.getClass());
- if (to.size() != preSize) {
+ ValueVector newVector = to.addOrGet(child, vector.getField().getType(), vector.getClass());
+ if (allocate && to.size() != preSize) {
newVector.allocateNew();
}
-
pairs[i++] = vector.makeTransferPair(newVector);
}
}
@Override
public void transfer() {
- from.offsets.transferTo(to.offsets);
for (TransferPair p : pairs) {
p.transfer();
}
+ to.getMutator().setValueCount(from.getAccessor().getValueCount());
from.clear();
}
@@ -354,59 +303,33 @@ public class RepeatedMapVector extends AbstractMapVector
}
@Override
- public void copyValueSafe(int srcIndex, int destIndex) {
- RepeatedMapHolder holder = new RepeatedMapHolder();
- from.getAccessor().get(srcIndex, holder);
- to.emptyPopulator.populate(destIndex + 1);
- int newIndex = to.offsets.getAccessor().get(destIndex);
- //todo: make these bulk copies
- for (int i = holder.start; i < holder.end; i++, newIndex++) {
- for (TransferPair p : pairs) {
- p.copyValueSafe(i, newIndex);
- }
+ public void copyValueSafe(int from, int to) {
+ for (TransferPair p : pairs) {
+ p.copyValueSafe(from, to);
}
- to.offsets.getMutator().setSafe(destIndex + 1, newIndex);
}
@Override
- public void splitAndTransfer(final int groupStart, final int groups) {
- final UInt4Vector.Accessor a = from.offsets.getAccessor();
- final UInt4Vector.Mutator m = to.offsets.getMutator();
-
- final int startPos = a.get(groupStart);
- final int endPos = a.get(groupStart + groups);
- final int valuesToCopy = endPos - startPos;
-
- to.offsets.clear();
- to.offsets.allocateNew(groups + 1);
-
- int normalizedPos;
- for (int i = 0; i < groups + 1; i++) {
- normalizedPos = a.get(groupStart + i) - startPos;
- m.set(i, normalizedPos);
- }
-
- m.setValueCount(groups + 1);
- to.emptyPopulator.populate(groups);
-
- for (final TransferPair p : pairs) {
- p.splitAndTransfer(startPos, valuesToCopy);
+ public void splitAndTransfer(int startIndex, int length) {
+ for (TransferPair p : pairs) {
+ p.splitAndTransfer(startIndex, length);
}
+ to.getMutator().setValueCount(length);
}
}
- transient private RepeatedMapTransferPair ephPair;
+ transient private AbstractRepeatedMapTransferPair ephPair;
- public void copyFromSafe(int fromIndex, int thisIndex, RepeatedMapVector from) {
+ public void copyFromSafe(int fromIndex, int thisIndex, AbstractRepeatedMapVector from) {
if (ephPair == null || ephPair.from != from) {
- ephPair = (RepeatedMapTransferPair) from.makeTransferPair(this);
+ ephPair = (AbstractRepeatedMapTransferPair) from.makeTransferPair(this);
}
ephPair.copyValueSafe(fromIndex, thisIndex);
}
@Override
public void copyEntry(int toIndex, ValueVector from, int fromIndex) {
- copyFromSafe(fromIndex, toIndex, (RepeatedMapVector) from);
+ copyFromSafe(fromIndex, toIndex, (AbstractRepeatedMapVector) from);
}
@Override
@@ -415,14 +338,9 @@ public class RepeatedMapVector extends AbstractMapVector
}
@Override
- public RepeatedMapAccessor getAccessor() {
- return accessor;
- }
-
- @Override
public void exchange(ValueVector other) {
super.exchange(other);
- offsets.exchange(((RepeatedMapVector) other).offsets);
+ offsets.exchange(((AbstractRepeatedMapVector) other).offsets);
}
@Override
@@ -432,22 +350,22 @@ public class RepeatedMapVector extends AbstractMapVector
@Override
public void load(SerializedField metadata, DrillBuf buffer) {
- final List<SerializedField> children = metadata.getChildList();
+ List<SerializedField> children = metadata.getChildList();
- final SerializedField offsetField = children.get(0);
+ SerializedField offsetField = children.get(0);
offsets.load(offsetField, buffer);
int bufOffset = offsetField.getBufferLength();
for (int i = 1; i < children.size(); i++) {
- final SerializedField child = children.get(i);
- final MaterializedField fieldDef = MaterializedField.create(child);
+ SerializedField child = children.get(i);
+ MaterializedField fieldDef = MaterializedField.create(child);
ValueVector vector = getChild(fieldDef.getName());
if (vector == null) {
// if we arrive here, we didn't have a matching vector.
vector = BasicTypeHelper.getNewVector(fieldDef, allocator);
putChild(fieldDef.getName(), vector);
}
- final int vectorLength = child.getBufferLength();
+ int vectorLength = child.getBufferLength();
vector.load(child, buffer.slice(bufOffset, vectorLength));
bufOffset += vectorLength;
}
@@ -462,39 +380,23 @@ public class RepeatedMapVector extends AbstractMapVector
.setBufferLength(getBufferSize())
// while we don't need to actually read this on load, we need it to
// make sure we don't skip deserialization of this vector
- .setValueCount(accessor.getValueCount());
+ .setValueCount(getAccessor().getValueCount());
builder.addChild(offsets.getMetadata());
- for (final ValueVector child : getChildren()) {
+ for (ValueVector child : getChildren()) {
builder.addChild(child.getMetadata());
}
return builder.build();
}
- @Override
- public Mutator getMutator() {
- return mutator;
- }
+ public abstract class Accessor implements RepeatedAccessor {
- public class RepeatedMapAccessor implements RepeatedAccessor {
- @Override
- public Object getObject(int index) {
- final List<Object> list = new JsonStringArrayList<>();
- final int end = offsets.getAccessor().get(index+1);
- String fieldName;
- for (int i = offsets.getAccessor().get(index); i < end; i++) {
- final Map<String, Object> vv = Maps.newLinkedHashMap();
- for (final MaterializedField field : getField().getChildren()) {
- if (!field.equals(BaseRepeatedValueVector.OFFSETS_FIELD)) {
- fieldName = field.getName();
- final Object value = getChild(fieldName).getAccessor().getObject(i);
- if (value != null) {
- vv.put(fieldName, value);
- }
- }
- }
- list.add(vv);
- }
- return list;
+ public void get(int index, RepeatedValueHolder holder) {
+ assert index < getValueCapacity() :
+ String.format("Attempted to access index %d when value capacity is %d",
+ index, getValueCapacity());
+ UInt4Vector.Accessor offsetsAccessor = offsets.getAccessor();
+ holder.start = offsetsAccessor.get(index);
+ holder.end = offsetsAccessor.get(index + 1);
}
@Override
@@ -504,7 +406,7 @@ public class RepeatedMapVector extends AbstractMapVector
@Override
public int getInnerValueCount() {
- final int valueCount = getValueCount();
+ int valueCount = getValueCount();
if (valueCount == 0) {
return 0;
}
@@ -525,37 +427,9 @@ public class RepeatedMapVector extends AbstractMapVector
public boolean isNull(int index) {
return false;
}
-
- public void get(int index, RepeatedMapHolder holder) {
- assert index < getValueCapacity() :
- String.format("Attempted to access index %d when value capacity is %d",
- index, getValueCapacity());
- final UInt4Vector.Accessor offsetsAccessor = offsets.getAccessor();
- holder.start = offsetsAccessor.get(index);
- holder.end = offsetsAccessor.get(index + 1);
- }
-
- public void get(int index, ComplexHolder holder) {
- final FieldReader reader = getReader();
- reader.setPosition(index);
- holder.reader = reader;
- }
-
- public void get(int index, int arrayIndex, ComplexHolder holder) {
- final RepeatedMapHolder h = new RepeatedMapHolder();
- get(index, h);
- final int offset = h.start + arrayIndex;
-
- if (offset >= h.end) {
- holder.reader = NullReader.INSTANCE;
- } else {
- reader.setSinglePosition(index, arrayIndex);
- holder.reader = reader;
- }
- }
}
- public class Mutator implements RepeatedMutator {
+ public abstract class Mutator implements RepeatedMutator {
@Override
public void startNewValue(int index) {
emptyPopulator.populate(index + 1);
@@ -567,25 +441,28 @@ public class RepeatedMapVector extends AbstractMapVector
emptyPopulator.populate(topLevelValueCount);
offsets.getMutator().setValueCount(topLevelValueCount == 0 ? 0 : topLevelValueCount + 1);
int childValueCount = offsets.getAccessor().get(topLevelValueCount);
- for (final ValueVector v : getChildren()) {
+ for (ValueVector v : getChildren()) {
v.getMutator().setValueCount(childValueCount);
}
}
@Override
- public void reset() {}
+ public void reset() {
+ }
@Override
- public void generateTestData(int values) {}
+ public void generateTestData(int values) {
+ }
public int add(int index) {
- final int prevEnd = offsets.getAccessor().get(index + 1);
+ int prevEnd = offsets.getAccessor().get(index + 1);
offsets.getMutator().setSafe(index + 1, prevEnd + 1);
return prevEnd;
}
@Override
- public void exchange(ValueVector.Mutator other) { }
+ public void exchange(ValueVector.Mutator other) {
+ }
}
@Override
@@ -593,7 +470,7 @@ public class RepeatedMapVector extends AbstractMapVector
getMutator().reset();
offsets.clear();
- for(final ValueVector vector : getChildren()) {
+ for (ValueVector vector : getChildren()) {
vector.clear();
}
}
@@ -618,10 +495,19 @@ public class RepeatedMapVector extends AbstractMapVector
int entryCount = offsets.getAccessor().get(valueCount);
int count = offsets.getPayloadByteCount(valueCount);
- for (final ValueVector v : getChildren()) {
+ for (ValueVector v : getChildren()) {
count += v.getPayloadByteCount(entryCount);
}
return count;
}
+ @Override
+ public abstract Accessor getAccessor();
+
+ /**
+ * Creates an instance of value holder corresponding to the vector.
+ *
+ * @return value holder for the vector
+ */
+ abstract RepeatedValueHolder getValueHolder();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java
index c5d2967..f19709d 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java
@@ -31,6 +31,7 @@ import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.memory.AllocationManager.BufferLedger;
import org.apache.drill.exec.proto.UserBitShared;
import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TransferPair;
import org.apache.drill.exec.vector.AddOrGetResult;
import org.apache.drill.exec.vector.BaseValueVector;
import org.apache.drill.exec.vector.UInt4Vector;
@@ -177,7 +178,7 @@ public abstract class BaseRepeatedValueVector extends BaseValueVector implements
/**
* Returns 1 if inner vector is explicitly set via #addOrGetVector else 0
*
- * @see {@link org.apache.drill.exec.vector.complex.ContainerVectorLike#size()}
+ * @see org.apache.drill.exec.vector.complex.ContainerVectorLike#size()
*/
@Override
public int size() {
@@ -250,6 +251,54 @@ public abstract class BaseRepeatedValueVector extends BaseValueVector implements
offsets.exchange(target.offsets);
}
+ protected abstract class BaseRepeatedValueVectorTransferPair<T extends BaseRepeatedValueVector> implements TransferPair {
+
+ protected final T target;
+ protected final TransferPair[] children;
+
+ protected BaseRepeatedValueVectorTransferPair(T target) {
+ this.target = Preconditions.checkNotNull(target);
+ if (target.getDataVector() == DEFAULT_DATA_VECTOR) {
+ target.addOrGetVector(VectorDescriptor.create(getDataVector().getField()));
+ target.getDataVector().allocateNew();
+ }
+ this.children = new TransferPair[] {
+ getOffsetVector().makeTransferPair(target.getOffsetVector()),
+ getDataVector().makeTransferPair(target.getDataVector())
+ };
+ }
+
+ @Override
+ public void transfer() {
+ for (TransferPair child : children) {
+ child.transfer();
+ }
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return target;
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ target.allocateNew();
+ for (int i = 0; i < length; i++) {
+ copyValueSafe(startIndex + i, i);
+ }
+ }
+
+ protected void copyValueSafe(int destIndex, int start, int end) {
+ TransferPair vectorTransfer = children[1];
+ int newIndex = target.getOffsetVector().getAccessor().get(destIndex);
+ // TODO: make this a bulk copy.
+ for (int i = start; i < end; i++, newIndex++) {
+ vectorTransfer.copyValueSafe(i, newIndex);
+ }
+ target.getOffsetVector().getMutator().setSafe(destIndex + 1, newIndex);
+ }
+ }
+
public abstract class BaseRepeatedAccessor extends BaseValueVector.BaseAccessor implements RepeatedAccessor {
@Override
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/DictVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/DictVector.java
new file mode 100644
index 0000000..c535807
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/DictVector.java
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.complex;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.expr.holders.RepeatedValueHolder;
+import org.apache.drill.exec.expr.holders.DictHolder;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.util.CallBack;
+import org.apache.drill.exec.util.JsonStringHashMap;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.complex.impl.SingleDictReaderImpl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A {@link ValueVector} holding key-value pairs.
+ * <p>This vector is essentially a {@link RepeatedMapVector} but with constraints:
+ * it may have 2 children only, named {@link #FIELD_KEY_NAME} and {@link #FIELD_VALUE_NAME}.
+ * The {@link #FIELD_KEY_NAME} can be of primitive type only and its values should not be {@code null},
+ * while the other, {@link #FIELD_VALUE_NAME}, field can be either of primitive or complex type.
+ *
+ * <p>This vector has it's own {@link org.apache.drill.exec.vector.complex.reader.FieldReader} and
+ * {@link org.apache.drill.exec.vector.complex.writer.FieldWriter} to ensure data is read and written correctly.
+ * In addition, the reader is responsible for getting a value for a given key.
+ *
+ * <p>Additionally, {@code Object} representation is changed in {@link Accessor#getObject(int)}
+ * to represent it as {@link JsonStringHashMap} with appropriate {@code key} and {@code value} types.
+ *
+ * <p>(The structure corresponds to Java's notion of {@link Map}).
+ *
+ * @see SingleDictReaderImpl reader corresponding to the vector
+ * @see org.apache.drill.exec.vector.complex.impl.SingleDictWriter writer corresponding to the vector
+ */
+public final class DictVector extends AbstractRepeatedMapVector {
+
+ public final static MajorType TYPE = Types.optional(MinorType.DICT);
+
+ public static final String FIELD_KEY_NAME = "key";
+ public static final String FIELD_VALUE_NAME = "value";
+ public static final List<String> fieldNames = Arrays.asList(FIELD_KEY_NAME, FIELD_VALUE_NAME);
+
+ private static final Logger logger = LoggerFactory.getLogger(DictVector.class);
+
+ private final Accessor accessor = new Accessor();
+ private final Mutator mutator = new Mutator();
+ private final SingleDictReaderImpl reader = new SingleDictReaderImpl(this);
+
+ private MajorType keyType;
+ private MajorType valueType;
+
+ /**
+ * Denotes if the value field is nullable. Initialized lazily on first
+ * invocation of its getter method {@link #isValueNullable()}.
+ */
+ private Boolean valueNullable;
+
+ public DictVector(MaterializedField field, BufferAllocator allocator, CallBack callBack) {
+ super(field.clone(), allocator, callBack);
+ }
+
+ public DictVector(MaterializedField field, BufferAllocator allocator, CallBack callBack, MajorType keyType, MajorType valueType) {
+ this(field, allocator, callBack);
+ setKeyValueTypes(keyType, valueType);
+ }
+
+ @Override
+ public SingleDictReaderImpl getReader() {
+ return reader;
+ }
+
+ @Override
+ protected Collection<String> getChildFieldNames() {
+ return fieldNames;
+ }
+
+ public void transferTo(DictVector target) {
+ makeTransferPair(target);
+ target.setKeyValueTypes(keyType, valueType);
+ }
+
+ public TransferPair makeTransferPair(DictVector to) {
+ return new DictTransferPair(to);
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return new DictTransferPair(getField().getName(), allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new DictTransferPair((DictVector) to);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new DictTransferPair(ref, allocator);
+ }
+
+ private class DictTransferPair extends AbstractRepeatedMapTransferPair<DictVector> {
+
+ DictTransferPair(String path, BufferAllocator allocator) {
+ this(new DictVector(MaterializedField.create(path, TYPE), allocator, DictVector.this.callBack), false);
+ }
+
+ DictTransferPair(DictVector to) {
+ this(to, true);
+ }
+
+ DictTransferPair(DictVector to, boolean allocate) {
+ super(to, allocate);
+ to.keyType = from.keyType;
+ to.valueType = from.valueType;
+ }
+ }
+
+ /**
+ * Inserts the vector with the given name if it does not exist else replaces it with the new value.
+ * If the vector is replaced, old and new type are expected to be equal.
+ * Validates that the {@code name} is either {@link #FIELD_KEY_NAME} or {@link #FIELD_VALUE_NAME} and
+ * that key is of primitive type.
+ *
+ * @param name field name
+ * @param vector vector to be added
+ * @throws DrillRuntimeException if {@code name} is not equal to {@link #FIELD_KEY_NAME} or {@link #FIELD_VALUE_NAME}
+ * or if {@code name.equals(FIELD_KEY_NAME)} and vector is of repeated or complex type.
+ */
+ @Override
+ public void putChild(String name, ValueVector vector) {
+ if (!fieldNames.contains(name)) {
+ throw new DrillRuntimeException(
+ String.format("Unexpected field '%s' added to DictVector: the vector can have '%s' and '%s' children only",
+ name, FIELD_KEY_NAME, FIELD_VALUE_NAME)
+ );
+ }
+ MajorType fieldType = vector.getField().getType();
+ if (name.equals(FIELD_KEY_NAME)) {
+
+ if (Types.isRepeated(fieldType) || Types.isComplex(fieldType)) {
+ throw new DrillRuntimeException("DictVector supports primitive key type only. Found: " + fieldType);
+ }
+
+ checkTypes(keyType, fieldType, FIELD_KEY_NAME);
+ keyType = fieldType;
+ } else {
+ checkTypes(valueType, fieldType, FIELD_VALUE_NAME);
+ valueType = fieldType;
+ }
+ super.putChild(name, vector);
+ }
+
+ private void checkTypes(MajorType type, MajorType newType, String fieldName) {
+ assert type == null || newType.equals(type)
+ : String.format("Type mismatch for %s field in DICT: expected '%s' but found '%s'", fieldName, type, newType);
+ }
+
+ /**
+ * Returns a {@code ValueVector} corresponding to the given field name if exists or null.
+ * Expects either {@link #FIELD_KEY_NAME} or {@link #FIELD_VALUE_NAME}.
+ *
+ * @param name field's name
+ */
+ @Override
+ public ValueVector getChild(String name) {
+ assert fieldNames.contains(name) : String.format(
+ "DictVector has '%s' and '%s' ValueVectors only", FIELD_KEY_NAME, FIELD_VALUE_NAME);
+ return super.getChild(name);
+ }
+
+ public class Accessor extends AbstractRepeatedMapVector.Accessor {
+
+ @Override
+ public Object getObject(int index) {
+ int start = offsets.getAccessor().get(index);
+ int end = offsets.getAccessor().get(index + 1);
+
+ ValueVector keys = getKeys();
+ ValueVector values = getValues();
+
+ Map<Object, Object> result = new JsonStringHashMap<>();
+ for (int i = start; i < end; i++) {
+ Object key = keys.getAccessor().getObject(i);
+ Object value = values.getAccessor().getObject(i);
+ result.put(key, value);
+ }
+ return result;
+ }
+
+ public void get(int index, DictHolder holder) {
+ int valueCapacity = getValueCapacity();
+ assert index < valueCapacity :
+ String.format("Attempted to access index %d when value capacity is %d", index, valueCapacity);
+
+ holder.vector = DictVector.this;
+ holder.reader = reader;
+ holder.start = offsets.getAccessor().get(index);
+ holder.end = offsets.getAccessor().get(index + 1);
+ }
+ }
+
+ public class Mutator extends AbstractRepeatedMapVector.Mutator {
+ }
+
+ @Override
+ public void exchange(ValueVector other) {
+ DictVector map = (DictVector) other;
+ assert this.keyType == null || this.keyType.equals(map.keyType)
+ : "Cannot exchange DictVector with different key types";
+ assert this.valueType == null || this.valueType.equals(map.valueType)
+ : "Cannot exchange DictVector with different value types";
+ super.exchange(other);
+ }
+
+ @Override
+ public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+ assert fieldNames.contains(name) : String.format(
+ "DictVector has '%s' and '%s' children only", FIELD_KEY_NAME, FIELD_VALUE_NAME);
+ ValueVector vector = getChild(name);
+ switch (name) {
+ case FIELD_KEY_NAME:
+ return new VectorWithOrdinal(vector, 0);
+ case FIELD_VALUE_NAME:
+ return new VectorWithOrdinal(vector, 1);
+ default:
+ logger.warn("Field with name '{}' is not present in map vector.", name);
+ return null;
+ }
+ }
+
+ @Override
+ MajorType getLastPathType() {
+ return valueType;
+ }
+
+ @Override
+ public <T extends ValueVector> T getChild(String name, Class<T> clazz) {
+ assert fieldNames.contains(name) : "No such field in DictVector: " + name;
+ return super.getChild(name, clazz);
+ }
+
+ @Override
+ public Accessor getAccessor() {
+ return accessor;
+ }
+
+ @Override
+ public Mutator getMutator() {
+ return mutator;
+ }
+
+ public ValueVector getKeys() {
+ return getChild(FIELD_KEY_NAME);
+ }
+
+ public ValueVector getValues() {
+ return getChild(FIELD_VALUE_NAME);
+ }
+
+ public MajorType getKeyType() {
+ return keyType;
+ }
+
+ public MajorType getValueType() {
+ if (valueType == null) {
+ valueType = getValues().getField().getType();
+ }
+ return valueType;
+ }
+
+ public boolean isValueNullable() {
+ if (valueNullable == null) {
+ valueNullable = Types.isNullable(getValueType());
+ }
+ return valueNullable;
+ }
+
+ @Override
+ RepeatedValueHolder getValueHolder() {
+ return new DictHolder();
+ }
+
+ private void setKeyValueTypes(MajorType keyType, MajorType valueType) {
+ this.keyType = keyType;
+ this.valueType = valueType;
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedDictVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedDictVector.java
new file mode 100644
index 0000000..af4664c
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedDictVector.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.complex;
+
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.expr.holders.RepeatedDictHolder;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.util.CallBack;
+import org.apache.drill.exec.util.JsonStringArrayList;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.complex.impl.RepeatedDictReaderImpl;
+import org.apache.drill.exec.vector.complex.reader.FieldReader;
+
+import java.util.List;
+
+public class RepeatedDictVector extends BaseRepeatedValueVector {
+
+ public final static TypeProtos.MajorType TYPE = Types.repeated(TypeProtos.MinorType.DICT);
+
+ private final static String DICT_VECTOR_NAME = "$inner$";
+ private final static MaterializedField DICT_VECTOR_FIELD =
+ MaterializedField.create(DICT_VECTOR_NAME, DictVector.TYPE);
+
+ private final Accessor accessor = new Accessor();
+ private final Mutator mutator = new Mutator();
+ private final FieldReader reader = new RepeatedDictReaderImpl(this);
+ private final EmptyValuePopulator emptyPopulator;
+
+ public RepeatedDictVector(String path, BufferAllocator allocator) {
+ this(MaterializedField.create(path, TYPE), allocator, null);
+ }
+
+ public RepeatedDictVector(MaterializedField field, BufferAllocator allocator, CallBack callback) {
+ super(field, allocator, new DictVector(DICT_VECTOR_FIELD, allocator, callback));
+ emptyPopulator = new EmptyValuePopulator(getOffsetVector());
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ if (!allocateNewSafe()) {
+ throw new OutOfMemoryException();
+ }
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return makeTransferPair(new RepeatedDictVector(ref, allocator));
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new RepeatedDictTransferPair((RepeatedDictVector) target);
+ }
+
+ public class RepeatedDictTransferPair extends BaseRepeatedValueVectorTransferPair<RepeatedDictVector> {
+
+ public RepeatedDictTransferPair(RepeatedDictVector target) {
+ super(target);
+ }
+
+ @Override
+ public void copyValueSafe(int srcIndex, int destIndex) {
+ final RepeatedDictHolder holder = new RepeatedDictHolder();
+ getAccessor().get(srcIndex, holder);
+ target.emptyPopulator.populate(destIndex+1);
+ copyValueSafe(destIndex, holder.start, holder.end);
+ }
+ }
+
+ @Override
+ public MaterializedField getField() {
+ MaterializedField field = this.field.clone();
+ field.addChild(vector.getField());
+ return field;
+ }
+
+ @Override
+ public Accessor getAccessor() {
+ return accessor;
+ }
+
+ @Override
+ public Mutator getMutator() {
+ return mutator;
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ @Override
+ public void copyEntry(int toIndex, ValueVector from, int fromIndex) {
+ RepeatedDictTransferPair pair = (RepeatedDictTransferPair) from.makeTransferPair(this);
+ pair.copyValueSafe(fromIndex, toIndex);
+ }
+
+ public class Accessor extends BaseRepeatedValueVector.BaseRepeatedAccessor {
+
+ @Override
+ public Object getObject(int index) {
+
+ List<Object> list = new JsonStringArrayList<>();
+ int start = offsets.getAccessor().get(index);
+ int end = offsets.getAccessor().get(index + 1);
+ for (int i = start; i < end; i++) {
+ list.add(vector.getAccessor().getObject(i));
+ }
+ return list;
+ }
+
+ public void get(int index, RepeatedDictHolder holder) {
+ int valueCapacity = getValueCapacity();
+ assert index < valueCapacity :
+ String.format("Attempted to access index %d when value capacity is %d", index, valueCapacity);
+
+ holder.vector = RepeatedDictVector.this;
+ holder.reader = reader;
+ holder.start = getOffsetVector().getAccessor().get(index);
+ holder.end = getOffsetVector().getAccessor().get(index + 1);
+ }
+ }
+
+ public class Mutator extends BaseRepeatedValueVector.BaseRepeatedMutator {
+
+ @Override
+ public void startNewValue(int index) {
+ emptyPopulator.populate(index + 1);
+ offsets.getMutator().setSafe(index + 1, offsets.getAccessor().get(index));
+ }
+
+ @Override
+ public void setValueCount(int topLevelValueCount) {
+ emptyPopulator.populate(topLevelValueCount);
+ offsets.getMutator().setValueCount(topLevelValueCount == 0 ? 0 : topLevelValueCount + 1);
+ int childValueCount = offsets.getAccessor().get(topLevelValueCount);
+ vector.getMutator().setValueCount(childValueCount);
+ }
+
+ public int add(int index) {
+ int prevEnd = offsets.getAccessor().get(index + 1);
+ offsets.getMutator().setSafe(index + 1, prevEnd + 1);
+ return prevEnd;
+ }
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
index 490381b..c0b6158 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
@@ -116,40 +116,10 @@ public class RepeatedListVector extends AbstractContainerVector
}
}
- public class DelegateTransferPair implements TransferPair {
- private final DelegateRepeatedVector target;
- private final TransferPair[] children;
+ public class DelegateTransferPair extends BaseRepeatedValueVectorTransferPair<DelegateRepeatedVector> {
public DelegateTransferPair(DelegateRepeatedVector target) {
- this.target = Preconditions.checkNotNull(target);
- if (target.getDataVector() == DEFAULT_DATA_VECTOR) {
- target.addOrGetVector(VectorDescriptor.create(getDataVector().getField()));
- target.getDataVector().allocateNew();
- }
- this.children = new TransferPair[] {
- getOffsetVector().makeTransferPair(target.getOffsetVector()),
- getDataVector().makeTransferPair(target.getDataVector())
- };
- }
-
- @Override
- public void transfer() {
- for (TransferPair child:children) {
- child.transfer();
- }
- }
-
- @Override
- public ValueVector getTo() {
- return target;
- }
-
- @Override
- public void splitAndTransfer(int startIndex, int length) {
- target.allocateNew();
- for (int i = 0; i < length; i++) {
- copyValueSafe(startIndex + i, i);
- }
+ super(target);
}
@Override
@@ -157,13 +127,7 @@ public class RepeatedListVector extends AbstractContainerVector
final RepeatedListHolder holder = new RepeatedListHolder();
getAccessor().get(srcIndex, holder);
target.emptyPopulator.populate(destIndex+1);
- final TransferPair vectorTransfer = children[1];
- int newIndex = target.getOffsetVector().getAccessor().get(destIndex);
- //todo: make this a bulk copy.
- for (int i = holder.start; i < holder.end; i++, newIndex++) {
- vectorTransfer.copyValueSafe(i, newIndex);
- }
- target.getOffsetVector().getMutator().setSafe(destIndex + 1, newIndex);
+ copyValueSafe(destIndex, holder.start, holder.end);
}
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
index 2b00382..311fe92 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
@@ -17,34 +17,22 @@
*/
package org.apache.drill.exec.vector.complex;
-import io.netty.buffer.DrillBuf;
-
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Set;
-import org.apache.commons.lang3.ArrayUtils;
-import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.types.Types;
-import org.apache.drill.exec.exception.OutOfMemoryException;
-import org.apache.drill.exec.expr.BasicTypeHelper;
import org.apache.drill.exec.expr.holders.ComplexHolder;
import org.apache.drill.exec.expr.holders.RepeatedMapHolder;
+import org.apache.drill.exec.expr.holders.RepeatedValueHolder;
import org.apache.drill.exec.memory.BufferAllocator;
-import org.apache.drill.exec.memory.AllocationManager.BufferLedger;
-import org.apache.drill.exec.proto.UserBitShared.SerializedField;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.TransferPair;
import org.apache.drill.exec.util.CallBack;
import org.apache.drill.exec.util.JsonStringArrayList;
-import org.apache.drill.exec.vector.AddOrGetResult;
-import org.apache.drill.exec.vector.AllocationHelper;
import org.apache.drill.exec.vector.UInt4Vector;
import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.VectorDescriptor;
import org.apache.drill.exec.vector.SchemaChangeCallBack;
import org.apache.drill.exec.vector.complex.impl.NullReader;
import org.apache.drill.exec.vector.complex.impl.RepeatedMapReaderImpl;
@@ -52,122 +40,33 @@ import org.apache.drill.exec.vector.complex.reader.FieldReader;
import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
-public class RepeatedMapVector extends AbstractMapVector
- implements RepeatedValueVector {
+public class RepeatedMapVector extends AbstractRepeatedMapVector {
- public final static MajorType TYPE = MajorType.newBuilder().setMinorType(MinorType.MAP).setMode(DataMode.REPEATED).build();
+ public final static MajorType TYPE = Types.repeated(MinorType.MAP);
- private final UInt4Vector offsets; // offsets to start of each record (considering record indices are 0-indexed)
- private final RepeatedMapReaderImpl reader = new RepeatedMapReaderImpl(RepeatedMapVector.this);
- private final RepeatedMapAccessor accessor = new RepeatedMapAccessor();
+ private final Accessor accessor = new Accessor();
private final Mutator mutator = new Mutator();
- private final EmptyValuePopulator emptyPopulator;
+ private final RepeatedMapReaderImpl reader = new RepeatedMapReaderImpl(this);
public RepeatedMapVector(MaterializedField field, BufferAllocator allocator, CallBack callBack) {
- this(field, new UInt4Vector(BaseRepeatedValueVector.OFFSETS_FIELD, allocator), callBack);
+ super(field, allocator, callBack);
}
public RepeatedMapVector(MaterializedField field, UInt4Vector offsets, CallBack callBack) {
- super(field, offsets.getAllocator(), callBack);
- this.offsets = offsets;
- this.emptyPopulator = new EmptyValuePopulator(offsets);
- }
-
- @Override
- public UInt4Vector getOffsetVector() { return offsets; }
-
- @Override
- public ValueVector getDataVector() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(VectorDescriptor descriptor) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setInitialCapacity(int numRecords) {
- offsets.setInitialCapacity(numRecords + 1);
- for (final ValueVector v : this) {
- v.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD);
- }
+ super(field, offsets, callBack);
}
@Override
public RepeatedMapReaderImpl getReader() { return reader; }
- public void allocateNew(int groupCount, int innerValueCount) {
- clear();
- try {
- allocateOffsetsNew(groupCount);
- for (ValueVector v : getChildren()) {
- AllocationHelper.allocatePrecomputedChildCount(v, groupCount, 50, innerValueCount);
- }
- } catch (OutOfMemoryException e){
- clear();
- throw e;
- }
- mutator.reset();
- }
-
- public void allocateOffsetsNew(int groupCount) {
- offsets.allocateNew(groupCount + 1);
- offsets.zeroVector();
- }
-
- public Iterator<String> fieldNameIterator() {
- return getChildFieldNames().iterator();
- }
-
- @Override
- public List<ValueVector> getPrimitiveVectors() {
- final List<ValueVector> primitiveVectors = super.getPrimitiveVectors();
- primitiveVectors.add(offsets);
- return primitiveVectors;
- }
-
- @Override
- public int getBufferSize() {
- if (getAccessor().getValueCount() == 0) {
- return 0;
- }
- return offsets.getBufferSize() + super.getBufferSize();
- }
-
- @Override
- public int getAllocatedSize() {
- return offsets.getAllocatedSize() + super.getAllocatedSize();
- }
-
- @Override
- public int getBufferSizeFor(final int valueCount) {
- if (valueCount == 0) {
- return 0;
- }
-
- long bufferSize = offsets.getBufferSizeFor(valueCount);
- for (final ValueVector v : this) {
- bufferSize += v.getBufferSizeFor(valueCount);
- }
-
- return (int) bufferSize;
- }
-
- @Override
- public void close() {
- offsets.close();
- super.close();
- }
-
@Override
public TransferPair getTransferPair(BufferAllocator allocator) {
- return new RepeatedMapTransferPair(this, getField().getName(), allocator);
+ return new RepeatedMapTransferPair(getField().getName(), allocator);
}
@Override
public TransferPair makeTransferPair(ValueVector to) {
- return new RepeatedMapTransferPair(this, (RepeatedMapVector)to);
+ return new RepeatedMapTransferPair((RepeatedMapVector) to);
}
MapSingleCopier makeSingularCopier(MapVector to) {
@@ -184,13 +83,13 @@ public class RepeatedMapVector extends AbstractMapVector
int i = 0;
ValueVector vector;
- for (final String child:from.getChildFieldNames()) {
+ for (String child:from.getChildFieldNames()) {
int preSize = to.size();
vector = from.getChild(child);
if (vector == null) {
continue;
}
- final ValueVector newVector = to.addOrGet(child, vector.getField().getType(), vector.getClass());
+ ValueVector newVector = to.addOrGet(child, vector.getField().getType(), vector.getClass());
if (to.size() != preSize) {
newVector.allocateNew();
}
@@ -205,288 +104,38 @@ public class RepeatedMapVector extends AbstractMapVector
}
}
- public TransferPair getTransferPairToSingleMap(String reference, BufferAllocator allocator) {
- return new SingleMapTransferPair(this, reference, allocator);
- }
-
@Override
public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
- return new RepeatedMapTransferPair(this, ref, allocator);
+ return new RepeatedMapTransferPair(ref, allocator);
}
- @Override
- public boolean allocateNewSafe() {
- /* boolean to keep track if all the memory allocation were successful
- * Used in the case of composite vectors when we need to allocate multiple
- * buffers for multiple vectors. If one of the allocations failed we need to
- * clear all the memory that we allocated
- */
- boolean success = false;
- try {
- if (!offsets.allocateNewSafe()) {
- return false;
- }
- success = super.allocateNewSafe();
- } finally {
- if (!success) {
- clear();
- }
- }
- offsets.zeroVector();
- return success;
- }
+ private class RepeatedMapTransferPair extends AbstractRepeatedMapTransferPair<RepeatedMapVector> {
- protected static class SingleMapTransferPair implements TransferPair {
- private final TransferPair[] pairs;
- private final RepeatedMapVector from;
- private final MapVector to;
- private static final MajorType MAP_TYPE = Types.required(MinorType.MAP);
-
- public SingleMapTransferPair(RepeatedMapVector from, String path, BufferAllocator allocator) {
- this(from, new MapVector(MaterializedField.create(path, MAP_TYPE), allocator, new SchemaChangeCallBack()), false);
+ RepeatedMapTransferPair(String path, BufferAllocator allocator) {
+ super(new RepeatedMapVector(MaterializedField.create(path, TYPE), allocator, new SchemaChangeCallBack()), false);
}
- public SingleMapTransferPair(RepeatedMapVector from, MapVector to) {
- this(from, to, true);
+ RepeatedMapTransferPair(RepeatedMapVector to) {
+ super(to);
}
- public SingleMapTransferPair(RepeatedMapVector from, MapVector to, boolean allocate) {
- this.from = from;
- this.to = to;
- this.pairs = new TransferPair[from.size()];
- int i = 0;
- ValueVector vector;
- for (final String child : from.getChildFieldNames()) {
- int preSize = to.size();
- vector = from.getChild(child);
- if (vector == null) {
- continue;
- }
- final ValueVector newVector = to.addOrGet(child, vector.getField().getType(), vector.getClass());
- if (allocate && to.size() != preSize) {
- newVector.allocateNew();
- }
- pairs[i++] = vector.makeTransferPair(newVector);
- }
- }
-
-
- @Override
- public void transfer() {
- for (TransferPair p : pairs) {
- p.transfer();
- }
- to.getMutator().setValueCount(from.getAccessor().getValueCount());
- from.clear();
- }
-
- @Override
- public ValueVector getTo() {
- return to;
- }
-
- @Override
- public void copyValueSafe(int from, int to) {
- for (TransferPair p : pairs) {
- p.copyValueSafe(from, to);
- }
- }
-
- @Override
- public void splitAndTransfer(int startIndex, int length) {
- for (TransferPair p : pairs) {
- p.splitAndTransfer(startIndex, length);
- }
- to.getMutator().setValueCount(length);
+ RepeatedMapTransferPair(RepeatedMapVector to, boolean allocate) {
+ super(to, allocate);
}
}
- private static class RepeatedMapTransferPair implements TransferPair{
-
- private final TransferPair[] pairs;
- private final RepeatedMapVector to;
- private final RepeatedMapVector from;
-
- public RepeatedMapTransferPair(RepeatedMapVector from, String path, BufferAllocator allocator) {
- this(from, new RepeatedMapVector(MaterializedField.create(path, TYPE), allocator, new SchemaChangeCallBack()), false);
- }
-
- public RepeatedMapTransferPair(RepeatedMapVector from, RepeatedMapVector to) {
- this(from, to, true);
- }
-
- public RepeatedMapTransferPair(RepeatedMapVector from, RepeatedMapVector to, boolean allocate) {
- this.from = from;
- this.to = to;
- this.pairs = new TransferPair[from.size()];
- this.to.ephPair = null;
-
- int i = 0;
- ValueVector vector;
- for (final String child : from.getChildFieldNames()) {
- final int preSize = to.size();
- vector = from.getChild(child);
- if (vector == null) {
- continue;
- }
-
- final ValueVector newVector = to.addOrGet(child, vector.getField().getType(), vector.getClass());
- if (to.size() != preSize) {
- newVector.allocateNew();
- }
-
- pairs[i++] = vector.makeTransferPair(newVector);
- }
- }
-
- @Override
- public void transfer() {
- from.offsets.transferTo(to.offsets);
- for (TransferPair p : pairs) {
- p.transfer();
- }
- from.clear();
- }
-
- @Override
- public ValueVector getTo() {
- return to;
- }
-
- @Override
- public void copyValueSafe(int srcIndex, int destIndex) {
- RepeatedMapHolder holder = new RepeatedMapHolder();
- from.getAccessor().get(srcIndex, holder);
- to.emptyPopulator.populate(destIndex + 1);
- int newIndex = to.offsets.getAccessor().get(destIndex);
- //todo: make these bulk copies
- for (int i = holder.start; i < holder.end; i++, newIndex++) {
- for (TransferPair p : pairs) {
- p.copyValueSafe(i, newIndex);
- }
- }
- to.offsets.getMutator().setSafe(destIndex + 1, newIndex);
- }
-
- @Override
- public void splitAndTransfer(final int groupStart, final int groups) {
- final UInt4Vector.Accessor a = from.offsets.getAccessor();
- final UInt4Vector.Mutator m = to.offsets.getMutator();
-
- final int startPos = a.get(groupStart);
- final int endPos = a.get(groupStart + groups);
- final int valuesToCopy = endPos - startPos;
-
- to.offsets.clear();
- to.offsets.allocateNew(groups + 1);
-
- int normalizedPos;
- for (int i = 0; i < groups + 1; i++) {
- normalizedPos = a.get(groupStart + i) - startPos;
- m.set(i, normalizedPos);
- }
-
- m.setValueCount(groups + 1);
- to.emptyPopulator.populate(groups);
-
- for (final TransferPair p : pairs) {
- p.splitAndTransfer(startPos, valuesToCopy);
- }
- }
- }
-
- transient private RepeatedMapTransferPair ephPair;
-
- public void copyFromSafe(int fromIndex, int thisIndex, RepeatedMapVector from) {
- if (ephPair == null || ephPair.from != from) {
- ephPair = (RepeatedMapTransferPair) from.makeTransferPair(this);
- }
- ephPair.copyValueSafe(fromIndex, thisIndex);
- }
-
- @Override
- public void copyEntry(int toIndex, ValueVector from, int fromIndex) {
- copyFromSafe(fromIndex, toIndex, (RepeatedMapVector) from);
- }
-
- @Override
- public int getValueCapacity() {
- return Math.max(offsets.getValueCapacity() - 1, 0);
- }
-
- @Override
- public RepeatedMapAccessor getAccessor() {
- return accessor;
- }
-
- @Override
- public void exchange(ValueVector other) {
- super.exchange(other);
- offsets.exchange(((RepeatedMapVector) other).offsets);
- }
-
- @Override
- public DrillBuf[] getBuffers(boolean clear) {
- return ArrayUtils.addAll(offsets.getBuffers(clear), super.getBuffers(clear));
- }
-
- @Override
- public void load(SerializedField metadata, DrillBuf buffer) {
- final List<SerializedField> children = metadata.getChildList();
-
- final SerializedField offsetField = children.get(0);
- offsets.load(offsetField, buffer);
- int bufOffset = offsetField.getBufferLength();
-
- for (int i = 1; i < children.size(); i++) {
- final SerializedField child = children.get(i);
- final MaterializedField fieldDef = MaterializedField.create(child);
- ValueVector vector = getChild(fieldDef.getName());
- if (vector == null) {
- // if we arrive here, we didn't have a matching vector.
- vector = BasicTypeHelper.getNewVector(fieldDef, allocator);
- putChild(fieldDef.getName(), vector);
- }
- final int vectorLength = child.getBufferLength();
- vector.load(child, buffer.slice(bufOffset, vectorLength));
- bufOffset += vectorLength;
- }
-
- assert bufOffset == buffer.writerIndex();
- }
-
- @Override
- public SerializedField getMetadata() {
- SerializedField.Builder builder = getField()
- .getAsBuilder()
- .setBufferLength(getBufferSize())
- // while we don't need to actually read this on load, we need it to
- // make sure we don't skip deserialization of this vector
- .setValueCount(accessor.getValueCount());
- builder.addChild(offsets.getMetadata());
- for (final ValueVector child : getChildren()) {
- builder.addChild(child.getMetadata());
- }
- return builder.build();
- }
-
- @Override
- public Mutator getMutator() {
- return mutator;
- }
-
- public class RepeatedMapAccessor implements RepeatedAccessor {
+ public class Accessor extends AbstractRepeatedMapVector.Accessor {
@Override
public Object getObject(int index) {
- final List<Object> list = new JsonStringArrayList<>();
- final int end = offsets.getAccessor().get(index+1);
+ List<Object> list = new JsonStringArrayList<>();
+ int end = offsets.getAccessor().get(index+1);
String fieldName;
for (int i = offsets.getAccessor().get(index); i < end; i++) {
- final Map<String, Object> vv = Maps.newLinkedHashMap();
- for (final MaterializedField field : getField().getChildren()) {
+ Map<String, Object> vv = Maps.newLinkedHashMap();
+ for (MaterializedField field : getField().getChildren()) {
if (!field.equals(BaseRepeatedValueVector.OFFSETS_FIELD)) {
fieldName = field.getName();
- final Object value = getChild(fieldName).getAccessor().getObject(i);
+ Object value = getChild(fieldName).getAccessor().getObject(i);
if (value != null) {
vv.put(fieldName, value);
}
@@ -497,54 +146,16 @@ public class RepeatedMapVector extends AbstractMapVector
return list;
}
- @Override
- public int getValueCount() {
- return Math.max(offsets.getAccessor().getValueCount() - 1, 0);
- }
-
- @Override
- public int getInnerValueCount() {
- final int valueCount = getValueCount();
- if (valueCount == 0) {
- return 0;
- }
- return offsets.getAccessor().get(valueCount);
- }
-
- @Override
- public int getInnerValueCountAt(int index) {
- return offsets.getAccessor().get(index+1) - offsets.getAccessor().get(index);
- }
-
- @Override
- public boolean isEmpty(int index) {
- return false;
- }
-
- @Override
- public boolean isNull(int index) {
- return false;
- }
-
- public void get(int index, RepeatedMapHolder holder) {
- assert index < getValueCapacity() :
- String.format("Attempted to access index %d when value capacity is %d",
- index, getValueCapacity());
- final UInt4Vector.Accessor offsetsAccessor = offsets.getAccessor();
- holder.start = offsetsAccessor.get(index);
- holder.end = offsetsAccessor.get(index + 1);
- }
-
public void get(int index, ComplexHolder holder) {
- final FieldReader reader = getReader();
+ FieldReader reader = getReader();
reader.setPosition(index);
holder.reader = reader;
}
public void get(int index, int arrayIndex, ComplexHolder holder) {
- final RepeatedMapHolder h = new RepeatedMapHolder();
+ RepeatedMapHolder h = new RepeatedMapHolder();
get(index, h);
- final int offset = h.start + arrayIndex;
+ int offset = h.start + arrayIndex;
if (offset >= h.end) {
holder.reader = NullReader.INSTANCE;
@@ -555,73 +166,21 @@ public class RepeatedMapVector extends AbstractMapVector
}
}
- public class Mutator implements RepeatedMutator {
- @Override
- public void startNewValue(int index) {
- emptyPopulator.populate(index + 1);
- offsets.getMutator().setSafe(index + 1, offsets.getAccessor().get(index));
- }
-
- @Override
- public void setValueCount(int topLevelValueCount) {
- emptyPopulator.populate(topLevelValueCount);
- offsets.getMutator().setValueCount(topLevelValueCount == 0 ? 0 : topLevelValueCount + 1);
- int childValueCount = offsets.getAccessor().get(topLevelValueCount);
- for (final ValueVector v : getChildren()) {
- v.getMutator().setValueCount(childValueCount);
- }
- }
-
- @Override
- public void reset() {}
-
- @Override
- public void generateTestData(int values) {}
-
- public int add(int index) {
- final int prevEnd = offsets.getAccessor().get(index + 1);
- offsets.getMutator().setSafe(index + 1, prevEnd + 1);
- return prevEnd;
- }
-
- @Override
- public void exchange(ValueVector.Mutator other) { }
+ public class Mutator extends AbstractRepeatedMapVector.Mutator {
}
@Override
- public void clear() {
- getMutator().reset();
-
- offsets.clear();
- for(final ValueVector vector : getChildren()) {
- vector.clear();
- }
- }
-
- @Override
- public void collectLedgers(Set<BufferLedger> ledgers) {
- super.collectLedgers(ledgers);
- offsets.collectLedgers(ledgers);
+ public Accessor getAccessor() {
+ return accessor;
}
@Override
- public void toNullable(ValueVector nullableVector) {
- throw new UnsupportedOperationException();
+ public Mutator getMutator() {
+ return mutator;
}
@Override
- public int getPayloadByteCount(int valueCount) {
- if (valueCount == 0) {
- return 0;
- }
-
- int entryCount = offsets.getAccessor().get(valueCount);
- int count = offsets.getPayloadByteCount(valueCount);
-
- for (final ValueVector v : getChildren()) {
- count += v.getPayloadByteCount(entryCount);
- }
- return count;
+ RepeatedValueHolder getValueHolder() {
+ return new RepeatedMapHolder();
}
-
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/AbstractBaseReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/AbstractBaseReader.java
index 668a332..b050005 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/AbstractBaseReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/AbstractBaseReader.java
@@ -100,4 +100,9 @@ abstract class AbstractBaseReader implements FieldReader{
public void copyAsValue(ListWriter writer) {
ComplexCopier.copy(this, (FieldWriter)writer);
}
+
+ @Override
+ public String getTypeString() {
+ return getType().getMinorType().name();
+ }
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedMapReaderImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/AbstractRepeatedMapReaderImpl.java
similarity index 58%
copy from exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedMapReaderImpl.java
copy to exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/AbstractRepeatedMapReaderImpl.java
index 58cb4f3..e8d8276 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedMapReaderImpl.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/AbstractRepeatedMapReaderImpl.java
@@ -17,27 +17,27 @@
*/
package org.apache.drill.exec.vector.complex.impl;
+import java.util.HashMap;
+import java.util.Iterator;
import java.util.Map;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.exec.expr.holders.RepeatedMapHolder;
+import org.apache.drill.exec.vector.UInt4Vector;
import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.complex.RepeatedMapVector;
+import org.apache.drill.exec.vector.complex.AbstractRepeatedMapVector;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
-
-import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
@SuppressWarnings("unused")
-public class RepeatedMapReaderImpl extends AbstractFieldReader{
- private static final int NO_VALUES = Integer.MAX_VALUE - 1;
+public abstract class AbstractRepeatedMapReaderImpl<V extends AbstractRepeatedMapVector> extends AbstractFieldReader {
+ protected static final int NO_VALUES = Integer.MAX_VALUE - 1;
- private final RepeatedMapVector vector;
- private final Map<String, FieldReader> fields = Maps.newHashMap();
- private int currentOffset;
- private int maxOffset;
+ protected final V vector;
+ protected final Map<String, FieldReader> fields = new HashMap<>();
+ protected int currentOffset;
+ protected int maxOffset;
- public RepeatedMapReaderImpl(RepeatedMapVector vector) {
+ public AbstractRepeatedMapReaderImpl(V vector) {
this.vector = vector;
}
@@ -58,16 +58,6 @@ public class RepeatedMapReaderImpl extends AbstractFieldReader{
}
@Override
- public FieldReader reader() {
- if (isNull()) {
- return NullReader.INSTANCE;
- }
-
- setChildrenPosition(currentOffset);
- return new SingleLikeRepeatedMapReaderImpl(vector, this);
- }
-
- @Override
public void reset() {
super.reset();
currentOffset = 0;
@@ -80,7 +70,8 @@ public class RepeatedMapReaderImpl extends AbstractFieldReader{
@Override
public int size() {
- return isNull() ? 0 : maxOffset - currentOffset;
+ UInt4Vector.Accessor offsetsAccessor = vector.getOffsetVector().getAccessor();
+ return isEmpty() ? 0 : offsetsAccessor.get(idx() + 1) - offsetsAccessor.get(idx());
}
@Override
@@ -102,21 +93,6 @@ public class RepeatedMapReaderImpl extends AbstractFieldReader{
}
}
- public void setSinglePosition(int index, int childIndex) {
- super.setPosition(index);
- RepeatedMapHolder h = new RepeatedMapHolder();
- vector.getAccessor().get(index, h);
- if (h.start == h.end) {
- currentOffset = NO_VALUES;
- } else {
- int singleOffset = h.start + childIndex;
- assert singleOffset < h.end;
- currentOffset = singleOffset;
- maxOffset = singleOffset + 1;
- setChildrenPosition(singleOffset);
- }
- }
-
@Override
public boolean next() {
if (currentOffset < maxOffset) {
@@ -128,7 +104,7 @@ public class RepeatedMapReaderImpl extends AbstractFieldReader{
}
}
- public boolean isNull() {
+ public boolean isEmpty() {
return currentOffset == NO_VALUES;
}
@@ -143,7 +119,7 @@ public class RepeatedMapReaderImpl extends AbstractFieldReader{
}
@Override
- public java.util.Iterator<String> iterator() {
+ public Iterator<String> iterator() {
return vector.fieldNameIterator();
}
@@ -152,33 +128,7 @@ public class RepeatedMapReaderImpl extends AbstractFieldReader{
return true;
}
- @Override
- public void copyAsValue(MapWriter writer) {
- if (isNull()) {
- return;
- }
- RepeatedMapWriter impl = (RepeatedMapWriter) writer;
- impl.container.copyFromSafe(idx(), impl.idx(), vector);
- }
-
- public void copyAsValueSingle(MapWriter writer) {
- if (isNull()) {
- return;
- }
- SingleMapWriter impl = (SingleMapWriter) writer;
- impl.container.copyFromSafe(currentOffset, impl.idx(), vector);
- }
-
- @Override
- public void copyAsField(String name, MapWriter writer) {
- if (isNull()) {
- return;
- }
- RepeatedMapWriter impl = (RepeatedMapWriter) writer.map(name);
- impl.container.copyFromSafe(idx(), impl.idx(), vector);
- }
-
- private void setChildrenPosition(int index) {
+ void setChildrenPosition(int index) {
for (FieldReader r : fields.values()) {
r.setPosition(index);
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java
index 2cdaead..00b2595 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java
@@ -88,6 +88,17 @@ public class MapOrListWriterImpl implements MapOrListWriter {
return new MapOrListWriterImpl(list.map());
}
+ @Override
+ public MapOrListWriter dict(String name) {
+ return new MapOrListWriterImpl(map != null ? map.dict(name) : list.dict());
+ }
+
+ @Override
+ public MapOrListWriter listOfDict() {
+ assert list != null;
+ return new MapOrListWriterImpl(list.dict());
+ }
+
public MapOrListWriter list(final String name) {
assert map != null;
return new MapOrListWriterImpl(map.list(name));
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedListReaderImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedDictReaderImpl.java
similarity index 64%
copy from exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedListReaderImpl.java
copy to exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedDictReaderImpl.java
index a98132a..7ebac29 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedListReaderImpl.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedDictReaderImpl.java
@@ -17,53 +17,31 @@
*/
package org.apache.drill.exec.vector.complex.impl;
-
-import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.common.types.Types;
-import org.apache.drill.exec.expr.holders.RepeatedListHolder;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.expr.holders.RepeatedDictHolder;
import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.complex.RepeatedListVector;
+import org.apache.drill.exec.vector.complex.RepeatedDictVector;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
-
-public class RepeatedListReaderImpl extends AbstractFieldReader{
- private static final int NO_VALUES = Integer.MAX_VALUE - 1;
- private static final MajorType TYPE = Types.repeated(MinorType.LIST);
- private final String name;
- private final RepeatedListVector container;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+
+public class RepeatedDictReaderImpl extends AbstractFieldReader {
+
+ private static final int NO_VALUES = Integer.MIN_VALUE;
+
+ private final RepeatedDictVector container;
+
private FieldReader reader;
private int currentOffset;
private int maxOffset;
- public RepeatedListReaderImpl(String name, RepeatedListVector container) {
+ public RepeatedDictReaderImpl(RepeatedDictVector container) {
super();
- this.name = name;
this.container = container;
}
@Override
- public MajorType getType() {
- return TYPE;
- }
-
- @Override
- public void copyAsValue(ListWriter writer) {
- if (isEmpty()) {
- return;
- }
- RepeatedListWriter impl = (RepeatedListWriter) writer;
- impl.container.copyFromSafe(idx(), impl.idx(), container);
- }
-
- @Override
- public void copyAsField(String name, MapWriter writer) {
- if (isEmpty()) {
- return;
- }
- RepeatedListWriter impl = (RepeatedListWriter) writer.list(name);
- impl.container.copyFromSafe(idx(), impl.idx(), container);
+ public TypeProtos.MajorType getType() {
+ return RepeatedDictVector.TYPE;
}
@Override
@@ -84,13 +62,13 @@ public class RepeatedListReaderImpl extends AbstractFieldReader{
@Override
public void setPosition(int index) {
- if (index < 0 || index == NO_VALUES) {
+ if (index < 0) {
currentOffset = NO_VALUES;
return;
}
super.setPosition(index);
- RepeatedListHolder h = new RepeatedListHolder();
+ RepeatedDictHolder h = new RepeatedDictHolder();
container.getAccessor().get(index, h);
if (h.start == h.end) {
currentOffset = NO_VALUES;
@@ -125,7 +103,7 @@ public class RepeatedListReaderImpl extends AbstractFieldReader{
@Override
public FieldReader reader() {
if (reader == null) {
- ValueVector child = container.getChild(name);
+ ValueVector child = container.getDataVector();
if (child == null) {
reader = NullReader.INSTANCE;
} else {
@@ -140,7 +118,26 @@ public class RepeatedListReaderImpl extends AbstractFieldReader{
return currentOffset == NO_VALUES;
}
- public boolean isSet() {
- return true;
+ @Override
+ public void copyAsValue(BaseWriter.DictWriter writer) {
+ if (isEmpty()) {
+ return;
+ }
+
+ ValueVector vector;
+ int srcId;
+ if (writer instanceof RepeatedDictWriter) {
+ vector = ((RepeatedDictWriter) writer).container;
+ srcId = ((RepeatedDictWriter) writer).idx();
+ } else {
+ vector = ((SingleDictWriter) writer).container;
+ srcId = ((SingleDictWriter) writer).idx();
+ }
+ vector.copyEntry(srcId, container, idx());
+ }
+
+ @Override
+ public String getTypeString() {
+ return "ARRAY<" + reader.getTypeString() + '>';
}
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedListReaderImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedListReaderImpl.java
index a98132a..fded215 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedListReaderImpl.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedListReaderImpl.java
@@ -22,6 +22,7 @@ import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.expr.holders.RepeatedListHolder;
+import org.apache.drill.exec.vector.UInt4Vector;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.complex.RepeatedListVector;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
@@ -79,7 +80,8 @@ public class RepeatedListReaderImpl extends AbstractFieldReader{
@Override
public int size() {
- return isEmpty() ? 0 : maxOffset - currentOffset;
+ UInt4Vector.Accessor offsetsAccessor = container.getOffsetVector().getAccessor();
+ return isEmpty() ? 0 : offsetsAccessor.get(idx() + 1) - offsetsAccessor.get(idx());
}
@Override
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedMapReaderImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedMapReaderImpl.java
index 58cb4f3..5695198 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedMapReaderImpl.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedMapReaderImpl.java
@@ -17,49 +17,21 @@
*/
package org.apache.drill.exec.vector.complex.impl;
-import java.util.Map;
-
-import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.exec.expr.holders.RepeatedMapHolder;
-import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.complex.RepeatedMapVector;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
-import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
-
@SuppressWarnings("unused")
-public class RepeatedMapReaderImpl extends AbstractFieldReader{
- private static final int NO_VALUES = Integer.MAX_VALUE - 1;
-
- private final RepeatedMapVector vector;
- private final Map<String, FieldReader> fields = Maps.newHashMap();
- private int currentOffset;
- private int maxOffset;
+public class RepeatedMapReaderImpl extends AbstractRepeatedMapReaderImpl<RepeatedMapVector> {
public RepeatedMapReaderImpl(RepeatedMapVector vector) {
- this.vector = vector;
- }
-
- @Override
- public FieldReader reader(String name) {
- FieldReader reader = fields.get(name);
- if (reader == null) {
- ValueVector child = vector.getChild(name);
- if (child == null) {
- reader = NullReader.INSTANCE;
- } else {
- reader = child.getReader();
- }
- fields.put(name, reader);
- reader.setPosition(currentOffset);
- }
- return reader;
+ super(vector);
}
@Override
public FieldReader reader() {
- if (isNull()) {
+ if (isEmpty()) {
return NullReader.INSTANCE;
}
@@ -67,41 +39,6 @@ public class RepeatedMapReaderImpl extends AbstractFieldReader{
return new SingleLikeRepeatedMapReaderImpl(vector, this);
}
- @Override
- public void reset() {
- super.reset();
- currentOffset = 0;
- maxOffset = 0;
- for (FieldReader reader:fields.values()) {
- reader.reset();
- }
- fields.clear();
- }
-
- @Override
- public int size() {
- return isNull() ? 0 : maxOffset - currentOffset;
- }
-
- @Override
- public void setPosition(int index) {
- if (index < 0 || index == NO_VALUES) {
- currentOffset = NO_VALUES;
- return;
- }
-
- super.setPosition(index);
- RepeatedMapHolder h = new RepeatedMapHolder();
- vector.getAccessor().get(index, h);
- if (h.start == h.end) {
- currentOffset = NO_VALUES;
- } else {
- currentOffset = h.start - 1;
- maxOffset = h.end - 1;
- setChildrenPosition(currentOffset);
- }
- }
-
public void setSinglePosition(int index, int childIndex) {
super.setPosition(index);
RepeatedMapHolder h = new RepeatedMapHolder();
@@ -118,43 +55,8 @@ public class RepeatedMapReaderImpl extends AbstractFieldReader{
}
@Override
- public boolean next() {
- if (currentOffset < maxOffset) {
- setChildrenPosition(++currentOffset);
- return true;
- } else {
- currentOffset = NO_VALUES;
- return false;
- }
- }
-
- public boolean isNull() {
- return currentOffset == NO_VALUES;
- }
-
- @Override
- public Object readObject() {
- return vector.getAccessor().getObject(idx());
- }
-
- @Override
- public MajorType getType() {
- return vector.getField().getType();
- }
-
- @Override
- public java.util.Iterator<String> iterator() {
- return vector.fieldNameIterator();
- }
-
- @Override
- public boolean isSet() {
- return true;
- }
-
- @Override
public void copyAsValue(MapWriter writer) {
- if (isNull()) {
+ if (isEmpty()) {
return;
}
RepeatedMapWriter impl = (RepeatedMapWriter) writer;
@@ -162,7 +64,7 @@ public class RepeatedMapReaderImpl extends AbstractFieldReader{
}
public void copyAsValueSingle(MapWriter writer) {
- if (isNull()) {
+ if (isEmpty()) {
return;
}
SingleMapWriter impl = (SingleMapWriter) writer;
@@ -171,16 +73,10 @@ public class RepeatedMapReaderImpl extends AbstractFieldReader{
@Override
public void copyAsField(String name, MapWriter writer) {
- if (isNull()) {
+ if (isEmpty()) {
return;
}
RepeatedMapWriter impl = (RepeatedMapWriter) writer.map(name);
impl.container.copyFromSafe(idx(), impl.idx(), vector);
}
-
- private void setChildrenPosition(int index) {
- for (FieldReader r : fields.values()) {
- r.setPosition(index);
- }
- }
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedMapWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedMapWriter.java
new file mode 100644
index 0000000..91cec9c
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/RepeatedMapWriter.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.complex.impl;
+
+import org.apache.drill.exec.expr.holders.RepeatedMapHolder;
+import org.apache.drill.exec.vector.complex.RepeatedMapVector;
+import org.apache.drill.exec.vector.complex.writer.FieldWriter;
+
+public class RepeatedMapWriter extends AbstractRepeatedMapWriter<RepeatedMapVector> {
+
+ public RepeatedMapWriter(RepeatedMapVector container, FieldWriter parent, boolean unionEnabled) {
+ super(container, parent, unionEnabled);
+ }
+
+ public RepeatedMapWriter(RepeatedMapVector container, FieldWriter parent) {
+ this(container, parent, false);
+ }
+
+ @Override
+ public void start() {
+ // update the repeated vector to state that there is (current + 1) objects.
+
+ // Make sure that the current vector can support the end position of this list.
+ if (container.getValueCapacity() <= idx()) {
+ container.getMutator().setValueCount(idx() + 1);
+ }
+
+ RepeatedMapHolder h = new RepeatedMapHolder();
+ container.getAccessor().get(idx(), h);
+ if (h.start >= h.end) {
+ container.getMutator().startNewValue(idx());
+ }
+ currentChildIndex = container.getMutator().add(idx());
+ for (FieldWriter w : fields.values()) {
+ w.setPosition(currentChildIndex);
+ }
+ }
+
+ @Override
+ public void end() {
+ // noop
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/SingleDictReaderImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/SingleDictReaderImpl.java
new file mode 100644
index 0000000..45fa420
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/SingleDictReaderImpl.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.complex.impl;
+
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.util.Text;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.complex.DictVector;
+import org.apache.drill.exec.vector.complex.reader.BaseReader.DictReader;
+import org.apache.drill.exec.vector.complex.reader.FieldReader;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.DictWriter;
+import org.apache.drill.exec.vector.complex.writer.FieldWriter;
+
+import java.math.BigDecimal;
+
+public class SingleDictReaderImpl extends AbstractRepeatedMapReaderImpl<DictVector> implements DictReader {
+
+ private static final int NOT_FOUND = -1;
+
+ public SingleDictReaderImpl(DictVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public FieldReader reader(String name){
+ assert DictVector.fieldNames.contains(name);
+ return super.reader(name);
+ }
+
+ @Override
+ public int find(String key) {
+ Object typifiedKey = getAppropriateKey(key);
+ return find(typifiedKey);
+ }
+
+ @Override
+ public int find(int key) {
+ Object typifiedKey = getAppropriateKey(key);
+ return find(typifiedKey);
+ }
+
+ private int find(Object key) {
+ int start = vector.getOffsetVector().getAccessor().get(idx());
+ int end = vector.getOffsetVector().getAccessor().get(idx() + 1);
+ int index = NOT_FOUND;
+ ValueVector keys = vector.getKeys();
+
+ // start from the end to ensure the most recent value for a key is found (in case if key is not unique)
+ for (int i = end - 1; i >= start; i--) {
+ Object keyValue = keys.getAccessor().getObject(i);
+ if (keyValue.equals(key)) {
+ index = i;
+ break;
+ }
+ }
+
+ return index;
+ }
+
+ private Object getAppropriateKey(int key) {
+ TypeProtos.MajorType keyType = vector.getKeyType();
+ switch (keyType.getMinorType()) {
+ case SMALLINT:
+ return (short) key;
+ case INT:
+ return key;
+ case BIGINT:
+ return (long) key;
+ case FLOAT4:
+ return (float) key;
+ case FLOAT8:
+ return (double) key;
+ case VARDECIMAL:
+ return BigDecimal.valueOf(key);
+ case BIT:
+ return key != 0;
+ default:
+ String message = String.format("Unknown value %d for key of type %s", key, keyType.getMinorType().toString());
+ throw new IllegalArgumentException(message);
+ }
+ }
+
+ private Object getAppropriateKey(String key) {
+ TypeProtos.MajorType keyType = vector.getKeyType();
+ switch (keyType.getMinorType()) {
+ case VARCHAR:
+ case VARBINARY:
+ return new Text(key);
+ case BIT:
+ return Boolean.valueOf(key);
+ case SMALLINT:
+ return Short.valueOf(key);
+ case INT:
+ return Integer.valueOf(key);
+ case BIGINT:
+ return Long.valueOf(key);
+ case FLOAT4:
+ return Float.valueOf(key);
+ case FLOAT8:
+ return Double.valueOf(key);
+ default:
+ String message = String.format("Unknown value %s for key of type %s", key, keyType.getMinorType().toString());
+ throw new IllegalArgumentException(message);
+ }
+ }
+
+ @Override
+ public void read(String key, ValueHolder holder) {
+ Object typifiedKey = getAppropriateKey(key);
+ read(typifiedKey, holder);
+ }
+
+ @Override
+ public void read(int key, ValueHolder holder) {
+ Object typifiedKey = getAppropriateKey(key);
+ read(typifiedKey, holder);
+ }
+
+ private void read(Object key, ValueHolder holder) {
+ if (isEmpty()) {
+ return;
+ }
+
+ int index = find(key);
+ FieldReader valueReader = reader(DictVector.FIELD_VALUE_NAME);
+ valueReader.setPosition(index);
+ if (index != NOT_FOUND) {
+ valueReader.read(holder);
+ }
+ }
+
+ @Override
+ public void setPosition(int index) {
+ if (index == NOT_FOUND) {
+ for (FieldReader reader : fields.values()) {
+ reader.setPosition(index);
+ }
+ }
+ super.setPosition(index);
+ }
+
+ @Override
+ public void copyAsValue(DictWriter writer) {
+ if (isEmpty()) {
+ return;
+ }
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+
+ @Override
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer.dict());
+ }
+
+ @Override
+ public String getTypeString() {
+ StringBuilder sb = new StringBuilder(super.getTypeString());
+ // child readers may be empty so vector is used instead to get key and value type
+ if (vector.getKeyType() != null && vector.getValueType() != null) {
+ sb.append('<')
+ .append(vector.getKeyType().getMinorType().name())
+ .append(',')
+ .append(vector.getValueType().getMinorType().name())
+ .append('>');
+ }
+ return sb.toString();
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/SingleDictWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/SingleDictWriter.java
new file mode 100644
index 0000000..6ef2972
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/SingleDictWriter.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.complex.impl;
+
+import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.exec.expr.holders.DictHolder;
+import org.apache.drill.exec.vector.complex.DictVector;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+import org.apache.drill.exec.vector.complex.writer.FieldWriter;
+
+public class SingleDictWriter extends AbstractRepeatedMapWriter<DictVector> implements BaseWriter.DictWriter {
+
+ private boolean mapStarted;
+
+ public SingleDictWriter(DictVector container, FieldWriter parent, boolean unionEnabled) {
+ super(container, parent, unionEnabled);
+ }
+
+ public SingleDictWriter(DictVector container, FieldWriter parent) {
+ this(container, parent, false);
+ }
+
+ @Override
+ public void start() {
+ assert !mapStarted : "Map should not be started";
+
+ // Make sure that the current vector can support the end position of this list.
+ if (container.getValueCapacity() <= idx()) {
+ container.getMutator().setValueCount(idx() + 1);
+ }
+
+ DictHolder h = new DictHolder();
+ container.getAccessor().get(idx(), h);
+ if (h.start >= h.end) {
+ container.getMutator().startNewValue(idx());
+ }
+
+ mapStarted = true;
+ }
+
+ @Override
+ public void end() {
+ checkStarted();
+ mapStarted = false;
+ }
+
+ @Override
+ public void startKeyValuePair() {
+ checkStarted();
+ currentChildIndex = container.getMutator().add(idx());
+ for (FieldWriter w : fields.values()) {
+ w.setPosition(currentChildIndex);
+ }
+ }
+
+ @Override
+ public void endKeyValuePair() {
+ checkStarted();
+ // Check whether key was written
+ if (container.getKeys().getAccessor().getValueCount() == currentChildIndex) {
+ throw new DrillRuntimeException("Key in DICT cannot be null. Index: " + idx());
+ }
+ // If value was not written, write it as null explicitly if supported
+ if (container.getValues() != null && container.isValueNullable()
+ && container.getValues().getAccessor().getValueCount() == currentChildIndex) {
+ ((AbstractFieldWriter) getValueWriter()).writeNull();
+ }
+ }
+
+ @Override
+ public FieldWriter getKeyWriter() {
+ return fields.get(DictVector.FIELD_KEY_NAME);
+ }
+
+ @Override
+ public FieldWriter getValueWriter() {
+ return fields.get(DictVector.FIELD_VALUE_NAME);
+ }
+
+ private void checkStarted() {
+ assert mapStarted : "Must start map (startRow()) before";
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/SingleLikeRepeatedMapReaderImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/SingleLikeRepeatedMapReaderImpl.java
index 9b3cf27..df8ee58 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/SingleLikeRepeatedMapReaderImpl.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/SingleLikeRepeatedMapReaderImpl.java
@@ -81,7 +81,7 @@ public class SingleLikeRepeatedMapReaderImpl extends AbstractFieldReader{
@Override
public boolean isSet() {
- return ! delegate.isNull();
+ return !delegate.isEmpty();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/reader/FieldReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/reader/FieldReader.java
index de165d0..4dacb99 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/reader/FieldReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/reader/FieldReader.java
@@ -22,7 +22,19 @@ import org.apache.drill.exec.vector.complex.reader.BaseReader.ListReader;
import org.apache.drill.exec.vector.complex.reader.BaseReader.MapReader;
import org.apache.drill.exec.vector.complex.reader.BaseReader.RepeatedListReader;
import org.apache.drill.exec.vector.complex.reader.BaseReader.RepeatedMapReader;
+import org.apache.drill.exec.vector.complex.reader.BaseReader.DictReader;
import org.apache.drill.exec.vector.complex.reader.BaseReader.ScalarReader;
-public interface FieldReader extends MapReader, ListReader, ScalarReader, RepeatedMapReader, RepeatedListReader, UntypedReader {
+public interface FieldReader extends MapReader, DictReader, ListReader, ScalarReader, RepeatedMapReader, RepeatedListReader, UntypedReader {
+
+ /**
+ * Returns {@code String} representation of the reader's type. In case if {@link #getType()} is primitive,
+ * the method is equivalent to {@link #getType().getMinorType().name()}. If the reader has minor type equal to
+ * {@link org.apache.drill.common.types.TypeProtos.MinorType#DICT}, {@code DICT<keyMinorType,valueMinorType>},
+ * with {@code keyMinorType} and {@code valueMinorType} being key's and value's minor types respectively,
+ * will be returned. Used in {@code typeOf} UDF.
+ *
+ * @return {@code String} representation of reader's type.
+ */
+ String getTypeString();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java
index 62012d0..b2e771d 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/writer/FieldWriter.java
@@ -20,8 +20,9 @@ package org.apache.drill.exec.vector.complex.writer;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.ScalarWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.DictWriter;
-public interface FieldWriter extends MapWriter, ListWriter, ScalarWriter {
+public interface FieldWriter extends MapWriter, ListWriter, ScalarWriter, DictWriter {
void allocate();
void clear();
}
diff --git a/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java b/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java
index 573a492..4943b58 100644
--- a/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java
+++ b/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java
@@ -65,11 +65,26 @@ public class SchemaPath extends LogicalExpressionBase {
return getCompoundPath(name);
}
- public static SchemaPath getCompoundPath(String... strings) {
+ public static SchemaPath getCompoundPath(String... path) {
+ return getCompoundPath(path.length, path);
+ }
+
+ /**
+ * Constructs {@code SchemaPath} based on given {@code path} array up to {@literal n}th element (inclusively).
+ *
+ * Example: for case when {@code n = 2} and {@code path = {"a", "b", "c", "d", "e", ...}}
+ * the method returns {@code a.b}
+ *
+ * @param n number of elements in {@literal path} array to take when constructing {@code SchemaPath}
+ * @param path column path used to construct schema path
+ * @return schema path containing {@literal n - 1} children
+ */
+ public static SchemaPath getCompoundPath(int n, String... path) {
+ Preconditions.checkArgument(n > 0);
NameSegment s = null;
// loop through strings in reverse order
- for (int i = strings.length - 1; i >= 0; i--) {
- s = new NameSegment(strings[i], s);
+ for (int i = n - 1; i >= 0; i--) {
+ s = new NameSegment(path[i], s);
}
return new SchemaPath(s);
}
diff --git a/logical/src/test/java/org/apache/drill/common/expression/SchemaPathTest.java b/logical/src/test/java/org/apache/drill/common/expression/SchemaPathTest.java
index 8d78ff8..6a820ef 100644
--- a/logical/src/test/java/org/apache/drill/common/expression/SchemaPathTest.java
+++ b/logical/src/test/java/org/apache/drill/common/expression/SchemaPathTest.java
@@ -50,5 +50,10 @@ public class SchemaPathTest {
assertEquals("Schema path should match", SchemaPath.parseFromString("`a`.`b`.`c`.`d`"), schemaPath.getUnIndexed());
}
+ @Test
+ public void testCompoundPathN() {
+ SchemaPath schemaPath = SchemaPath.getCompoundPath(3, "a", "b", "c", "d", "e");
+ assertEquals("Schema path should match", SchemaPath.getCompoundPath("a", "b", "c"), schemaPath);
+ }
}
diff --git a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/util/SchemaPathUtils.java b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/util/SchemaPathUtils.java
index e0dc922..3f3e40b 100644
--- a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/util/SchemaPathUtils.java
+++ b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/util/SchemaPathUtils.java
@@ -27,6 +27,10 @@ import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata;
import org.apache.drill.exec.record.metadata.TupleMetadata;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
public class SchemaPathUtils {
private SchemaPathUtils() {
@@ -44,6 +48,11 @@ public class SchemaPathUtils {
PathSegment.NameSegment colPath = schemaPath.getUnIndexed().getRootSegment();
ColumnMetadata colMetadata = schema.metadata(colPath.getPath());
while (!colPath.isLastPath() && colMetadata != null) {
+ if (colMetadata.isDict()) {
+ // get dict's value field metadata
+ colMetadata = colMetadata.mapSchema().metadata(0).mapSchema().metadata(1);
+ break;
+ }
if (!colMetadata.isMap()) {
colMetadata = null;
break;
@@ -54,7 +63,6 @@ public class SchemaPathUtils {
return colMetadata;
}
-
/**
* Adds column with specified schema path and type into specified {@code TupleMetadata schema}.
* For the case when specified {@link SchemaPath} has children, corresponding maps will be created
@@ -63,19 +71,27 @@ public class SchemaPathUtils {
* @param schema tuple schema where column should be added
* @param schemaPath schema path of the column which should be added
* @param type type of the column which should be added
+ * @param types list of column's parent types
*/
- public static void addColumnMetadata(TupleMetadata schema, SchemaPath schemaPath, TypeProtos.MajorType type) {
+ public static void addColumnMetadata(TupleMetadata schema, SchemaPath schemaPath, TypeProtos.MajorType type, Map<SchemaPath, TypeProtos.MajorType> types) {
PathSegment.NameSegment colPath = schemaPath.getUnIndexed().getRootSegment();
+ List<String> names = new ArrayList<>(types.size());
ColumnMetadata colMetadata;
-
while (!colPath.isLastPath()) {
+ names.add(colPath.getPath());
colMetadata = schema.metadata(colPath.getPath());
+ TypeProtos.MajorType pathType = types.get(SchemaPath.getCompoundPath(names.toArray(new String[0])));
if (colMetadata == null) {
- colMetadata = MetadataUtils.newMap(colPath.getPath(), null);
+ if (pathType != null && pathType.getMinorType() == TypeProtos.MinorType.DICT) {
+ colMetadata = MetadataUtils.newDict(colPath.getPath(), null);
+ } else {
+ colMetadata = MetadataUtils.newMap(colPath.getPath(), null);
+ }
schema.addColumn(colMetadata);
}
- if (!colMetadata.isMap()) {
- throw new DrillRuntimeException(String.format("Expected map, but was %s", colMetadata.majorType()));
+
+ if (!colMetadata.isMap() && !colMetadata.isDict()) {
+ throw new DrillRuntimeException(String.format("Expected map or dict, but was %s", colMetadata.majorType()));
}
schema = colMetadata.mapSchema();
diff --git a/protocol/src/main/java/org/apache/drill/common/types/TypeProtos.java b/protocol/src/main/java/org/apache/drill/common/types/TypeProtos.java
index 8bbe28b..73c622a 100644
--- a/protocol/src/main/java/org/apache/drill/common/types/TypeProtos.java
+++ b/protocol/src/main/java/org/apache/drill/common/types/TypeProtos.java
@@ -335,6 +335,10 @@ public final class TypeProtos {
* <code>VARDECIMAL = 43;</code>
*/
VARDECIMAL(43),
+ /**
+ * <code>DICT = 44;</code>
+ */
+ DICT(44),
;
/**
@@ -636,6 +640,10 @@ public final class TypeProtos {
* <code>VARDECIMAL = 43;</code>
*/
public static final int VARDECIMAL_VALUE = 43;
+ /**
+ * <code>DICT = 44;</code>
+ */
+ public static final int DICT_VALUE = 44;
public final int getNumber() {
@@ -691,6 +699,7 @@ public final class TypeProtos {
case 41: return GENERIC_OBJECT;
case 42: return UNION;
case 43: return VARDECIMAL;
+ case 44: return DICT;
default: return null;
}
}
@@ -2188,7 +2197,7 @@ public final class TypeProtos {
"de\030\002 \001(\0162\020.common.DataMode\022\r\n\005width\030\003 \001(" +
"\005\022\021\n\tprecision\030\004 \001(\005\022\r\n\005scale\030\005 \001(\005\022\020\n\010t" +
"imeZone\030\006 \001(\005\022#\n\010sub_type\030\007 \003(\0162\021.common" +
- ".MinorType*\245\004\n\tMinorType\022\010\n\004LATE\020\000\022\007\n\003MA" +
+ ".MinorType*\257\004\n\tMinorType\022\010\n\004LATE\020\000\022\007\n\003MA" +
"P\020\001\022\013\n\007TINYINT\020\003\022\014\n\010SMALLINT\020\004\022\007\n\003INT\020\005\022" +
"\n\n\006BIGINT\020\006\022\014\n\010DECIMAL9\020\007\022\r\n\tDECIMAL18\020\010" +
"\022\023\n\017DECIMAL28SPARSE\020\t\022\023\n\017DECIMAL38SPARSE" +
@@ -2202,9 +2211,9 @@ public final class TypeProtos {
"\022\022\n\016DECIMAL38DENSE\020\"\022\010\n\004NULL\020%\022\020\n\014INTERV" +
"ALYEAR\020&\022\017\n\013INTERVALDAY\020\'\022\010\n\004LIST\020(\022\022\n\016G" +
"ENERIC_OBJECT\020)\022\t\n\005UNION\020*\022\016\n\nVARDECIMAL" +
- "\020+*4\n\010DataMode\022\014\n\010OPTIONAL\020\000\022\014\n\010REQUIRED" +
- "\020\001\022\014\n\010REPEATED\020\002B-\n\035org.apache.drill.com" +
- "mon.typesB\nTypeProtosH\001"
+ "\020+\022\010\n\004DICT\020,*4\n\010DataMode\022\014\n\010OPTIONAL\020\000\022\014" +
+ "\n\010REQUIRED\020\001\022\014\n\010REPEATED\020\002B-\n\035org.apache" +
+ ".drill.common.typesB\nTypeProtosH\001"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor. InternalDescriptorAssigner() {
diff --git a/protocol/src/main/protobuf/Types.proto b/protocol/src/main/protobuf/Types.proto
index 6e4b66e..5e819c1 100644
--- a/protocol/src/main/protobuf/Types.proto
+++ b/protocol/src/main/protobuf/Types.proto
@@ -66,6 +66,7 @@ enum MinorType {
GENERIC_OBJECT = 41;
UNION = 42;
VARDECIMAL = 43; // variable width decimal (arbitrary precision)
+ DICT = 44;
}
message MajorType {