You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/08/03 04:04:48 UTC
[1/2] hive git commit: HIVE-20294: Vectorization: Fix NULL / Wrong
Results issues in COALESCE / ELT (Matt McCline, reviewed by Teddy Choi)
Repository: hive
Updated Branches:
refs/heads/master e9896bfd7 -> 66ea3266c
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java
new file mode 100644
index 0000000..0bca490
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java
@@ -0,0 +1,502 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.stream.IntStream;
+
+import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.SupportedTypes;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import junit.framework.Assert;
+
+import org.junit.Ignore;
+import org.junit.Test;
+
+public class TestVectorCoalesceElt {
+
+ @Test
+ public void testCoalesce() throws Exception {
+ Random random = new Random(5371);
+
+ doCoalesceElt(random, /* isCoalesce */ true, false);
+ }
+
+ @Test
+ public void testElt() throws Exception {
+ Random random = new Random(5371);
+
+ // Grind through a few more index values...
+ for (int i = 0; i < 4; i++) {
+ doCoalesceElt(random, /* isCoalesce */ false, false);
+ doCoalesceElt(random, /* isCoalesce */ false, true);
+ }
+ }
+
+ public enum CoalesceEltTestMode {
+ ROW_MODE,
+ ADAPTOR,
+ VECTOR_EXPRESSION;
+
+ static final int count = values().length;
+ }
+
+ private void doCoalesceElt(Random random, boolean isCoalesce, boolean isEltIndexConst)
+ throws Exception {
+
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 2,
+ /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 2,
+ /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ false);
+
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3,
+ /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3,
+ new int[] { 0 }, /* nullConstantColumns */ null, /* allowNulls */ true);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3,
+ new int[] { 0 }, /* nullConstantColumns */ new int[] { 0 }, /* allowNulls */ true);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3,
+ new int[] { 1 }, /* nullConstantColumns */ null, /* allowNulls */ true);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3,
+ new int[] { 1 }, /* nullConstantColumns */ new int[] { 1 }, /* allowNulls */ true);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3,
+ new int[] { 0, 2 }, /* nullConstantColumns */ null, /* allowNulls */ true);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3,
+ new int[] { 0, 2 }, /* nullConstantColumns */ new int[] { 0 }, /* allowNulls */ true);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3,
+ new int[] { 0, 2 }, /* nullConstantColumns */ new int[] { 0, 2 }, /* allowNulls */ false);
+
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4,
+ /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4,
+ /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ false);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4,
+ new int[] { 0, 1, 2 }, /* nullConstantColumns */ new int[] { 0, 1, 2 }, /* allowNulls */ true);
+ doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4,
+ new int[] { 0, 1, 2 }, /* nullConstantColumns */ new int[] { 0, 1, 2 }, /* allowNulls */ false);
+ }
+
+ private boolean contains(int[] columns, int column) {
+ if (columns == null) {
+ return false;
+ }
+ for (int i = 0; i < columns.length; i++) {
+ if (columns[i] == column) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean doCoalesceOnRandomDataType(Random random,
+ boolean isCoalesce, boolean isEltIndexConst, int columnCount,
+ int[] constantColumns, int[] nullConstantColumns, boolean allowNulls)
+ throws Exception {
+
+ String typeName;
+ if (isCoalesce) {
+ typeName =
+ VectorRandomRowSource.getRandomTypeName(
+ random, SupportedTypes.PRIMITIVES, /* allowedTypeNameSet */ null);
+ typeName =
+ VectorRandomRowSource.getDecoratedTypeName(
+ random, typeName, SupportedTypes.PRIMITIVES, /* allowedTypeNameSet */ null,
+ /* depth */ 0, /* maxDepth */ 2);
+ } else {
+ // ELT only choose between STRINGs.
+ typeName = "string";
+ }
+
+ TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+
+ //----------------------------------------------------------------------------------------------
+
+ final TypeInfo intTypeInfo;
+ ObjectInspector intObjectInspector;
+ if (isCoalesce) {
+ intTypeInfo = null;
+ intObjectInspector = null;
+ } else {
+ intTypeInfo = TypeInfoFactory.intTypeInfo;
+ intObjectInspector =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ intTypeInfo);
+ }
+
+ ObjectInspector objectInspector =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ typeInfo);
+
+ //----------------------------------------------------------------------------------------------
+
+ List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
+ List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
+ new ArrayList<DataTypePhysicalVariation>();
+ List<String> columns = new ArrayList<String>();
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+
+ int columnNum = 1;
+ if (!isCoalesce) {
+
+ List<Object> intValueList = new ArrayList<Object>();
+ for (int i = -1; i < columnCount + 2; i++) {
+ intValueList.add(i);
+ }
+ final int intValueListCount = intValueList.size();
+ ExprNodeDesc colExpr;
+ if (!isEltIndexConst) {
+ generationSpecList.add(
+ GenerationSpec.createValueList(intTypeInfo, intValueList));
+ explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
+ String columnName = "col" + columnNum++;
+ columns.add(columnName);
+ colExpr = new ExprNodeColumnDesc(intTypeInfo, columnName, "table", false);
+ } else {
+ final Object scalarObject;
+ if (random.nextInt(10) != 0) {
+ scalarObject = intValueList.get(random.nextInt(intValueListCount));
+ } else {
+ scalarObject = null;
+ }
+ colExpr = new ExprNodeConstantDesc(typeInfo, scalarObject);
+ }
+ }
+ for (int c = 0; c < columnCount; c++) {
+ ExprNodeDesc colExpr;
+ if (!contains(constantColumns, c)) {
+
+ generationSpecList.add(
+ GenerationSpec.createSameType(typeInfo));
+ explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
+ String columnName = "col" + columnNum++;
+ columns.add(columnName);
+ colExpr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false);
+ } else {
+ final Object scalarObject;
+ if (!contains(nullConstantColumns, c)) {
+ scalarObject =
+ VectorRandomRowSource.randomPrimitiveObject(
+ random, (PrimitiveTypeInfo) typeInfo);
+ } else {
+ scalarObject = null;
+ }
+ colExpr = new ExprNodeConstantDesc(typeInfo, scalarObject);
+ }
+ children.add(colExpr);
+ }
+
+ VectorRandomRowSource rowSource = new VectorRandomRowSource();
+
+ rowSource.initGenerationSpecSchema(
+ random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ allowNulls,
+ explicitDataTypePhysicalVariationList);
+
+ String[] columnNames = columns.toArray(new String[0]);
+
+ Object[][] randomRows = rowSource.randomRows(100000);
+
+ VectorRandomBatchSource batchSource =
+ VectorRandomBatchSource.createInterestingBatches(
+ random,
+ rowSource,
+ randomRows,
+ null);
+
+ final GenericUDF udf =
+ (isCoalesce ? new GenericUDFCoalesce() : new GenericUDFElt());
+
+ final int start = isCoalesce ? 0 : 1;
+ final int end = start + columnCount;
+ ObjectInspector[] argumentOIs =
+ new ObjectInspector[end];
+ if (!isCoalesce) {
+ argumentOIs[0] = intObjectInspector;
+ }
+ for (int i = start; i < end; i++) {
+ argumentOIs[i] = objectInspector;
+ }
+ final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
+
+ TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
+
+ ExprNodeGenericFuncDesc exprDesc =
+ new ExprNodeGenericFuncDesc(typeInfo, udf, children);
+
+ final int rowCount = randomRows.length;
+ Object[][] resultObjectsArray = new Object[CoalesceEltTestMode.count][];
+ for (int i = 0; i < CoalesceEltTestMode.count; i++) {
+
+ Object[] resultObjects = new Object[rowCount];
+ resultObjectsArray[i] = resultObjects;
+
+ CoalesceEltTestMode coalesceEltTestMode = CoalesceEltTestMode.values()[i];
+ switch (coalesceEltTestMode) {
+ case ROW_MODE:
+ if (!doRowCastTest(
+ typeInfo,
+ columns,
+ children,
+ udf, exprDesc,
+ randomRows,
+ rowSource.rowStructObjectInspector(),
+ exprDesc.getWritableObjectInspector(),
+ resultObjects)) {
+ return false;
+ }
+ break;
+ case ADAPTOR:
+ case VECTOR_EXPRESSION:
+ if (!doVectorCastTest(
+ typeInfo,
+ columns,
+ columnNames,
+ rowSource.typeInfos(),
+ rowSource.dataTypePhysicalVariations(),
+ children,
+ udf, exprDesc,
+ coalesceEltTestMode,
+ batchSource,
+ exprDesc.getWritableObjectInspector(),
+ outputTypeInfo,
+ resultObjects)) {
+ return false;
+ }
+ break;
+ default:
+ throw new RuntimeException("Unexpected IF statement test mode " + coalesceEltTestMode);
+ }
+ }
+
+ for (int i = 0; i < rowCount; i++) {
+ // Row-mode is the expected value.
+ Object expectedResult = resultObjectsArray[0][i];
+
+ for (int v = 1; v < CoalesceEltTestMode.count; v++) {
+ Object vectorResult = resultObjectsArray[v][i];
+ CoalesceEltTestMode coalesceEltTestMode = CoalesceEltTestMode.values()[v];
+ if (expectedResult == null || vectorResult == null) {
+ if (expectedResult != null || vectorResult != null) {
+ Assert.fail(
+ "Row " + i +
+ " sourceTypeName " + typeName +
+ " " + coalesceEltTestMode +
+ " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) +
+ " does not match row-mode expected result is NULL " +
+ (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) +
+ " row values " + Arrays.toString(randomRows[i]) +
+ " exprDesc " + exprDesc.toString());
+ }
+ } else {
+
+ if (!expectedResult.equals(vectorResult)) {
+ Assert.fail(
+ "Row " + i +
+ " sourceTypeName " + typeName +
+ " " + coalesceEltTestMode +
+ " result " + vectorResult.toString() +
+ " (" + vectorResult.getClass().getSimpleName() + ")" +
+ " does not match row-mode expected result " + expectedResult.toString() +
+ " (" + expectedResult.getClass().getSimpleName() + ")" +
+ " row values " + Arrays.toString(randomRows[i]) +
+ " exprDesc " + exprDesc.toString());
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ private boolean doRowCastTest(TypeInfo typeInfo,
+ List<String> columns, List<ExprNodeDesc> children,
+ GenericUDF udf, ExprNodeGenericFuncDesc exprDesc,
+ Object[][] randomRows,
+ ObjectInspector rowInspector,
+ ObjectInspector objectInspector,
+ Object[] resultObjects)
+ throws Exception {
+
+ /*
+ System.out.println(
+ "*DEBUG* typeInfo " + typeInfo.toString() +
+ " targetTypeInfo " + targetTypeInfo +
+ " coalesceEltTestMode ROW_MODE" +
+ " exprDesc " + exprDesc.toString());
+ */
+
+ HiveConf hiveConf = new HiveConf();
+ ExprNodeEvaluator evaluator =
+ ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
+ try {
+ evaluator.initialize(rowInspector);
+ } catch (HiveException e) {
+ return false;
+ }
+
+ final int rowCount = randomRows.length;
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = randomRows[i];
+ Object result = evaluator.evaluate(row);
+ Object copyResult =
+ ObjectInspectorUtils.copyToStandardObject(
+ result, objectInspector,
+ ObjectInspectorCopyOption.WRITABLE);
+ resultObjects[i] = copyResult;
+ }
+
+ return true;
+ }
+
+ private void extractResultObjects(VectorizedRowBatch batch, int rowIndex,
+ VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow,
+ ObjectInspector objectInspector, Object[] resultObjects) {
+
+ boolean selectedInUse = batch.selectedInUse;
+ int[] selected = batch.selected;
+ for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) {
+ final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex);
+ resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow);
+
+ Object copyResult =
+ ObjectInspectorUtils.copyToStandardObject(
+ scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE);
+ resultObjects[rowIndex++] = copyResult;
+ }
+ }
+
+ private boolean doVectorCastTest(TypeInfo typeInfo,
+ List<String> columns, String[] columnNames,
+ TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations,
+ List<ExprNodeDesc> children,
+ GenericUDF udf, ExprNodeGenericFuncDesc exprDesc,
+ CoalesceEltTestMode coalesceEltTestMode,
+ VectorRandomBatchSource batchSource,
+ ObjectInspector objectInspector,
+ TypeInfo outputTypeInfo, Object[] resultObjects)
+ throws Exception {
+
+ HiveConf hiveConf = new HiveConf();
+ if (coalesceEltTestMode == CoalesceEltTestMode.ADAPTOR) {
+ hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
+ }
+
+ VectorizationContext vectorizationContext =
+ new VectorizationContext(
+ "name",
+ columns,
+ Arrays.asList(typeInfos),
+ Arrays.asList(dataTypePhysicalVariations),
+ hiveConf);
+ VectorExpression vectorExpression =
+ vectorizationContext.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
+ vectorExpression.transientInit();
+
+ if (coalesceEltTestMode == CoalesceEltTestMode.VECTOR_EXPRESSION &&
+ vectorExpression instanceof VectorUDFAdaptor) {
+ System.out.println(
+ "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() +
+ " coalesceEltTestMode " + coalesceEltTestMode +
+ " vectorExpression " + vectorExpression.toString());
+ }
+
+ System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
+
+ /*
+ System.out.println(
+ "*DEBUG* typeInfo " + typeInfo.toString() +
+ " coalesceEltTestMode " + coalesceEltTestMode +
+ " vectorExpression " + vectorExpression.toString());
+ */
+
+ VectorRandomRowSource rowSource = batchSource.getRowSource();
+ VectorizedRowBatchCtx batchContext =
+ new VectorizedRowBatchCtx(
+ columnNames,
+ rowSource.typeInfos(),
+ rowSource.dataTypePhysicalVariations(),
+ /* dataColumnNums */ null,
+ /* partitionColumnCount */ 0,
+ /* virtualColumnCount */ 0,
+ /* neededVirtualColumns */ null,
+ vectorizationContext.getScratchColumnTypeNames(),
+ vectorizationContext.getScratchDataTypePhysicalVariations());
+
+ VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
+
+ VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
+ resultVectorExtractRow.init(
+ new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() });
+ Object[] scrqtchRow = new Object[1];
+
+ // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
+
+ /*
+ System.out.println(
+ "*DEBUG* typeInfo1 " + typeInfo1.toString() +
+ " typeInfo2 " + typeInfo2.toString() +
+ " arithmeticTestMode " + arithmeticTestMode +
+ " columnScalarMode " + columnScalarMode +
+ " vectorExpression " + vectorExpression.toString());
+ */
+
+ batchSource.resetBatchIteration();
+ int rowIndex = 0;
+ while (true) {
+ if (!batchSource.fillNextBatch(batch)) {
+ break;
+ }
+ vectorExpression.evaluate(batch);
+ extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow,
+ objectInspector, resultObjects);
+ rowIndex += batch.size;
+ }
+
+ return true;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java
new file mode 100644
index 0000000..648feb0
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java
@@ -0,0 +1,575 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.IntStream;
+
+import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.SupportedTypes;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.io.WritableComparable;
+
+import junit.framework.Assert;
+
+import org.junit.Ignore;
+import org.junit.Test;
+
+public class TestVectorIndex {
+
+ @Test
+ public void testListIndex() throws Exception {
+ Random random = new Random(241);
+
+ doIndex(random, /* isList */ true, null, /* isFullElementTypeGamut */ true);
+ }
+
+ private static TypeInfo[] decimalTypeInfos = new TypeInfo[] {
+ new DecimalTypeInfo(38, 18),
+ new DecimalTypeInfo(25, 2),
+ new DecimalTypeInfo(19, 4),
+ new DecimalTypeInfo(18, 10),
+ new DecimalTypeInfo(17, 3),
+ new DecimalTypeInfo(12, 2),
+ new DecimalTypeInfo(7, 1)
+ };
+
+ @Test
+ public void testMapIndex() throws Exception {
+ Random random = new Random(233);
+
+ doIndex(random, /* isList */ false, "int", /* isFullElementTypeGamut */ true);
+ doIndex(random, /* isList */ false, "bigint", /* isFullElementTypeGamut */ false);
+ doIndex(random, /* isList */ false, "double", /* isFullElementTypeGamut */ false);
+ doIndex(random, /* isList */ false, "string", /* isFullElementTypeGamut */ false);
+ for (TypeInfo typeInfo : decimalTypeInfos) {
+ doIndex(
+ random, /* isList */ false, typeInfo.getTypeName(), /* isFullElementTypeGamut */ false);
+ }
+ }
+
+ public enum IndexTestMode {
+ ROW_MODE,
+ ADAPTOR,
+ VECTOR_EXPRESSION;
+
+ static final int count = values().length;
+ }
+
+ private void doIndex(Random random, boolean isList, String keyTypeName,
+ boolean isFullElementTypeGamut)
+ throws Exception {
+
+ String oneElementRootTypeName = "bigint";
+ doIndexOnRandomDataType(random, isList, keyTypeName, oneElementRootTypeName,
+ /* allowNulls */ true, /* isScalarIndex */ false);
+
+ doIndexOnRandomDataType(random, isList, keyTypeName, oneElementRootTypeName,
+ /* allowNulls */ true, /* isScalarIndex */ true);
+
+ doIndexOnRandomDataType(random, isList, keyTypeName, oneElementRootTypeName,
+ /* allowNulls */ false, /* isScalarIndex */ false);
+ doIndexOnRandomDataType(random, isList, keyTypeName, oneElementRootTypeName,
+ /* allowNulls */ false, /* isScalarIndex */ true);
+
+ if (!isFullElementTypeGamut) {
+ return;
+ }
+
+ List<String> elementRootTypeNameList = new ArrayList<String>();
+ elementRootTypeNameList.add("int");
+ elementRootTypeNameList.add("bigint");
+ elementRootTypeNameList.add("double");
+ elementRootTypeNameList.add("string");
+ elementRootTypeNameList.add("char");
+ elementRootTypeNameList.add("varchar");
+ elementRootTypeNameList.add("date");
+ elementRootTypeNameList.add("timestamp");
+ elementRootTypeNameList.add("binary");
+ elementRootTypeNameList.add("decimal");
+ elementRootTypeNameList.add("interval_day_time");
+
+ for (String elementRootTypeName : elementRootTypeNameList) {
+ doIndexOnRandomDataType(random, isList, keyTypeName, elementRootTypeName,
+ /* allowNulls */ true, /* isScalarIndex */ false);
+ }
+ }
+
+ private boolean doIndexOnRandomDataType(Random random,
+ boolean isList, String keyTypeName, String elementRootTypeName,
+ boolean allowNulls, boolean isScalarIndex)
+ throws Exception {
+
+ String elementTypeName =
+ VectorRandomRowSource.getDecoratedTypeName(
+ random, elementRootTypeName, SupportedTypes.ALL, /* allowedTypeNameSet */ null,
+ /* depth */ 0, /* maxDepth */ 3);
+
+ TypeInfo elementTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(elementTypeName);
+
+ ObjectInspector elementObjectInspector =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ elementTypeInfo);
+
+ //----------------------------------------------------------------------------------------------
+
+ final TypeInfo keyTypeInfo;
+ if (isList) {
+ keyTypeInfo = TypeInfoFactory.intTypeInfo;
+ } else {
+ keyTypeInfo =
+ TypeInfoUtils.getTypeInfoFromTypeString(keyTypeName);
+ }
+ final ObjectInspector keyObjectInspector =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ keyTypeInfo);
+
+ Object exampleObject =
+ (isList ?
+ ((WritableIntObjectInspector) keyObjectInspector).create(0) :
+ VectorRandomRowSource.randomWritable(
+ random, keyTypeInfo, keyObjectInspector, DataTypePhysicalVariation.NONE,
+ /* allowNull */ false));
+ WritableComparator writableComparator =
+ WritableComparator.get((Class<? extends WritableComparable>) exampleObject.getClass());
+
+ final int allKeyCount = 10 + random.nextInt(10);
+ final int keyCount = 5 + random.nextInt(allKeyCount / 2);
+ List<Object> allKeyList = new ArrayList<Object>(allKeyCount);
+
+ Set<Object> allKeyTreeSet = new TreeSet<Object>(writableComparator);
+
+ int fillAllKeyCount = 0;
+ while (fillAllKeyCount < allKeyCount) {
+ Object object;
+ if (isList) {
+ WritableIntObjectInspector writableOI = (WritableIntObjectInspector) keyObjectInspector;
+ int index = random.nextInt(keyCount);
+ object = writableOI.create(index);
+ while (allKeyTreeSet.contains(object)) {
+ index =
+ (random.nextBoolean() ?
+ random.nextInt() :
+ (random.nextBoolean() ? -1 : keyCount));
+ object = writableOI.create(index);
+ }
+ } else {
+ do {
+ object =
+ VectorRandomRowSource.randomWritable(
+ random, keyTypeInfo, keyObjectInspector, DataTypePhysicalVariation.NONE,
+ /* allowNull */ false);
+ } while (allKeyTreeSet.contains(object));
+ }
+ allKeyList.add(object);
+ allKeyTreeSet.add(object);
+ fillAllKeyCount++;
+ }
+
+ List<Object> keyList = new ArrayList<Object>();
+
+ Set<Object> keyTreeSet = new TreeSet<Object>(writableComparator);
+
+ int fillKeyCount = 0;
+ while (fillKeyCount < keyCount) {
+ Object newKey = allKeyList.get(random.nextInt(allKeyCount));
+ if (keyTreeSet.contains(newKey)) {
+ continue;
+ }
+ keyList.add(newKey);
+ keyTreeSet.add(newKey);
+ fillKeyCount++;
+ }
+
+ //----------------------------------------------------------------------------------------------
+
+ final TypeInfo typeInfo;
+ if (isList) {
+ ListTypeInfo listTypeInfo = new ListTypeInfo();
+ listTypeInfo.setListElementTypeInfo(elementTypeInfo);
+ typeInfo = listTypeInfo;
+ } else {
+ MapTypeInfo mapTypeInfo = new MapTypeInfo();
+ mapTypeInfo.setMapKeyTypeInfo(keyTypeInfo);
+ mapTypeInfo.setMapValueTypeInfo(elementTypeInfo);
+ typeInfo = mapTypeInfo;
+ }
+
+ final String typeName = typeInfo.getTypeName();
+
+ final ObjectInspector objectInspector =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ typeInfo);
+
+ //----------------------------------------------------------------------------------------------
+
+ GenerationSpec generationSpec = GenerationSpec.createSameType(typeInfo);
+
+ List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
+ List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
+ new ArrayList<DataTypePhysicalVariation>();
+ List<String> columns = new ArrayList<String>();
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+
+ int columnNum = 1;
+
+ ExprNodeDesc keyColExpr;
+
+ if (!isScalarIndex) {
+ generationSpecList.add(
+ GenerationSpec.createValueList(keyTypeInfo, keyList));
+ explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
+ String columnName = "col" + columnNum++;
+ columns.add(columnName);
+ keyColExpr = new ExprNodeColumnDesc(keyTypeInfo, columnName, "table", false);
+ } else {
+ Object scalarWritable = keyList.get(random.nextInt(keyCount));
+ final Object scalarObject =
+ VectorRandomRowSource.getNonWritableObject(
+ scalarWritable, keyTypeInfo, keyObjectInspector);
+ keyColExpr = new ExprNodeConstantDesc(keyTypeInfo, scalarObject);
+ }
+
+ /*
+ System.out.println("*DEBUG* typeName " + typeName);
+ System.out.println("*DEBUG* keyColExpr " + keyColExpr.toString());
+ System.out.println("*DEBUG* keyList " + keyList.toString());
+ System.out.println("*DEBUG* allKeyList " + allKeyList.toString());
+ */
+
+ generationSpecList.add(
+ GenerationSpec.createValueList(typeInfo, keyList));
+ explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
+ String columnName = "col" + columnNum++;
+ columns.add(columnName);
+
+ ExprNodeDesc listOrMapColExpr;
+ listOrMapColExpr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false);
+
+ children.add(listOrMapColExpr);
+ children.add(keyColExpr);
+
+ VectorRandomRowSource rowSource = new VectorRandomRowSource();
+
+ rowSource.initGenerationSpecSchema(
+ random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ allowNulls,
+ explicitDataTypePhysicalVariationList);
+
+ String[] columnNames = columns.toArray(new String[0]);
+
+ Object[][] randomRows = rowSource.randomRows(100000);
+
+ VectorRandomBatchSource batchSource =
+ VectorRandomBatchSource.createInterestingBatches(
+ random,
+ rowSource,
+ randomRows,
+ null);
+
+ final GenericUDF udf = new GenericUDFIndex();
+
+ ObjectInspector[] argumentOIs = new ObjectInspector[2];
+ argumentOIs[0] = objectInspector;
+ argumentOIs[1] = keyObjectInspector;
+
+ final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
+
+ TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
+
+ ExprNodeGenericFuncDesc exprDesc =
+ new ExprNodeGenericFuncDesc(elementTypeInfo, udf, children);
+
+ System.out.println("here");
+
+ final int rowCount = randomRows.length;
+ Object[][] resultObjectsArray = new Object[IndexTestMode.count][];
+ for (int i = 0; i < IndexTestMode.count; i++) {
+
+ Object[] resultObjects = new Object[rowCount];
+ resultObjectsArray[i] = resultObjects;
+
+ IndexTestMode indexTestMode = IndexTestMode.values()[i];
+ switch (indexTestMode) {
+ case ROW_MODE:
+ if (!doRowCastTest(
+ typeInfo,
+ columns,
+ children,
+ udf, exprDesc,
+ randomRows,
+ rowSource.rowStructObjectInspector(),
+ elementObjectInspector,
+ outputTypeInfo,
+ resultObjects)) {
+ return false;
+ }
+ break;
+ case ADAPTOR:
+ case VECTOR_EXPRESSION:
+ if (!doVectorCastTest(
+ typeInfo,
+ columns,
+ columnNames,
+ rowSource.typeInfos(),
+ rowSource.dataTypePhysicalVariations(),
+ children,
+ udf, exprDesc,
+ indexTestMode,
+ batchSource,
+ exprDesc.getWritableObjectInspector(),
+ outputTypeInfo,
+ resultObjects)) {
+ return false;
+ }
+ break;
+ default:
+ throw new RuntimeException("Unexpected IF statement test mode " + indexTestMode);
+ }
+ }
+
+ for (int i = 0; i < rowCount; i++) {
+ // Row-mode is the expected value.
+ Object expectedResult = resultObjectsArray[0][i];
+
+ for (int v = 1; v < IndexTestMode.count; v++) {
+ Object vectorResult = resultObjectsArray[v][i];
+ IndexTestMode indexTestMode = IndexTestMode.values()[v];
+ if (expectedResult == null || vectorResult == null) {
+ if (expectedResult != null || vectorResult != null) {
+ Assert.fail(
+ "Row " + i +
+ " sourceTypeName " + typeName +
+ " " + indexTestMode +
+ " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) +
+ " does not match row-mode expected result is NULL " +
+ (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) +
+ " row values " + Arrays.toString(randomRows[i]) +
+ " exprDesc " + exprDesc.toString());
+ }
+ } else {
+
+ if (!expectedResult.equals(vectorResult)) {
+ Assert.fail(
+ "Row " + i +
+ " sourceTypeName " + typeName +
+ " " + indexTestMode +
+ " result " + vectorResult.toString() +
+ " (" + vectorResult.getClass().getSimpleName() + ")" +
+ " does not match row-mode expected result " + expectedResult.toString() +
+ " (" + expectedResult.getClass().getSimpleName() + ")" +
+ " row values " + Arrays.toString(randomRows[i]) +
+ " exprDesc " + exprDesc.toString());
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+
+ private boolean doRowCastTest(TypeInfo typeInfo,
+ List<String> columns, List<ExprNodeDesc> children,
+ GenericUDF udf, ExprNodeGenericFuncDesc exprDesc,
+ Object[][] randomRows,
+ ObjectInspector rowInspector,
+ ObjectInspector elementObjectInspector,
+ TypeInfo outputTypeInfo,
+ Object[] resultObjects)
+ throws Exception {
+
+ /*
+ System.out.println(
+ "*DEBUG* typeInfo " + typeInfo.toString() +
+ " outputTypeInfo " + outputTypeInfo.toString() +
+ " indexTestMode ROW_MODE" +
+ " exprDesc " + exprDesc.toString());
+ */
+
+ HiveConf hiveConf = new HiveConf();
+ ExprNodeEvaluator evaluator =
+ ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
+ try {
+ evaluator.initialize(rowInspector);
+ } catch (HiveException e) {
+ return false;
+ }
+
+ final int rowCount = randomRows.length;
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = randomRows[i];
+ Object result = evaluator.evaluate(row);
+ Object copyResult;
+ try {
+ copyResult =
+ ObjectInspectorUtils.copyToStandardObject(
+ result, elementObjectInspector,
+ ObjectInspectorCopyOption.WRITABLE);
+ } catch (Exception e) {
+ System.out.println("here");
+ throw e;
+ }
+ resultObjects[i] = copyResult;
+ }
+
+ return true;
+ }
+
+ private void extractResultObjects(VectorizedRowBatch batch, int rowIndex,
+ VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow,
+ ObjectInspector objectInspector, Object[] resultObjects) {
+
+ boolean selectedInUse = batch.selectedInUse;
+ int[] selected = batch.selected;
+ for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) {
+ final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex);
+ resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow);
+
+ Object copyResult =
+ ObjectInspectorUtils.copyToStandardObject(
+ scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE);
+ resultObjects[rowIndex++] = copyResult;
+ }
+ }
+
+ private boolean doVectorCastTest(TypeInfo typeInfo,
+ List<String> columns, String[] columnNames,
+ TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations,
+ List<ExprNodeDesc> children,
+ GenericUDF udf, ExprNodeGenericFuncDesc exprDesc,
+ IndexTestMode indexTestMode,
+ VectorRandomBatchSource batchSource,
+ ObjectInspector objectInspector,
+ TypeInfo outputTypeInfo, Object[] resultObjects)
+ throws Exception {
+
+ HiveConf hiveConf = new HiveConf();
+ if (indexTestMode == IndexTestMode.ADAPTOR) {
+ hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
+ }
+
+ VectorizationContext vectorizationContext =
+ new VectorizationContext(
+ "name",
+ columns,
+ Arrays.asList(typeInfos),
+ Arrays.asList(dataTypePhysicalVariations),
+ hiveConf);
+ VectorExpression vectorExpression =
+ vectorizationContext.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
+ vectorExpression.transientInit();
+
+ if (indexTestMode == IndexTestMode.VECTOR_EXPRESSION &&
+ vectorExpression instanceof VectorUDFAdaptor) {
+ System.out.println(
+ "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() +
+ " indexTestMode " + indexTestMode +
+ " vectorExpression " + vectorExpression.toString());
+ }
+
+ System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
+
+ /*
+ System.out.println(
+ "*DEBUG* typeInfo " + typeInfo.toString() +
+ " indexTestMode " + indexTestMode +
+ " vectorExpression " + vectorExpression.toString());
+ */
+
+ VectorRandomRowSource rowSource = batchSource.getRowSource();
+ VectorizedRowBatchCtx batchContext =
+ new VectorizedRowBatchCtx(
+ columnNames,
+ rowSource.typeInfos(),
+ rowSource.dataTypePhysicalVariations(),
+ /* dataColumnNums */ null,
+ /* partitionColumnCount */ 0,
+ /* virtualColumnCount */ 0,
+ /* neededVirtualColumns */ null,
+ vectorizationContext.getScratchColumnTypeNames(),
+ vectorizationContext.getScratchDataTypePhysicalVariations());
+
+ VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
+
+ VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
+ resultVectorExtractRow.init(
+ new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() });
+ Object[] scrqtchRow = new Object[1];
+
+ /*
+ System.out.println(
+ "*DEBUG* typeInfo1 " + typeInfo1.toString() +
+ " typeInfo2 " + typeInfo2.toString() +
+ " arithmeticTestMode " + arithmeticTestMode +
+ " columnScalarMode " + columnScalarMode +
+ " vectorExpression " + vectorExpression.toString());
+ */
+
+ batchSource.resetBatchIteration();
+ int rowIndex = 0;
+ while (true) {
+ if (!batchSource.fillNextBatch(batch)) {
+ break;
+ }
+ vectorExpression.evaluate(batch);
+ extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow,
+ objectInspector, resultObjects);
+ rowIndex += batch.size;
+ }
+
+ return true;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java
index a4fc0d57..9b0a2ae 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java
@@ -49,8 +49,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
@@ -74,8 +72,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.hadoop.io.WritableComparable;
import junit.framework.Assert;
@@ -407,7 +403,7 @@ public class TestVectorNull {
" vectorExpression " + vectorExpression.toString());
}
- System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
+ // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
/*
System.out.println(
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out
index b278ecc..b036cdd 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out
@@ -127,7 +127,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 2, 3, 8, 9, 10, 11, 12, 13]
- selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k2) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 456) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map<double,double>, key: 123.123) -> 10:double, VectorUDFMapIndexStringCol(col 1:map<string,string>, key: col 4:string) -> 11:string, VectorUDFMapIndexLongCol(col 2:map<int,int>, key: col 5:int) -> 12:int, VectorUDFMapIndexDoubleCol(col 3:map<double,double>, key: col 6:double) -> 13:double
+ selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k2) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 456) -> 9:int, VectorUDFMapIndexDecimalScalar(col 3:map<double,double>, key: 123.123) -> 10:double, VectorUDFMapIndexStringCol(col 1:map<string,string>, key: col 4:string) -> 11:string, VectorUDFMapIndexLongCol(col 2:map<int,int>, key: col 5:int) -> 12:int, VectorUDFMapIndexDoubleCol(col 3:map<double,double>, key: col 6:double) -> 13:double
Statistics: Num rows: 1023 Data size: 2183412 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
@@ -228,7 +228,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [8, 9, 10]
- selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 123) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map<double,double>, key: 123.123) -> 10:double
+ selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 123) -> 9:int, VectorUDFMapIndexDecimalScalar(col 3:map<double,double>, key: 123.123) -> 10:double
Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE
Top N Key Operator
sort order: +
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out b/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out
index 49cf3a2..b4c75d4 100644
--- a/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out
@@ -124,7 +124,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 2, 3, 8, 9, 10, 11, 12, 13]
- selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k2) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 456) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map<double,double>, key: 123.123) -> 10:double, VectorUDFMapIndexStringCol(col 1:map<string,string>, key: col 4:string) -> 11:string, VectorUDFMapIndexLongCol(col 2:map<int,int>, key: col 5:int) -> 12:int, VectorUDFMapIndexDoubleCol(col 3:map<double,double>, key: col 6:double) -> 13:double
+ selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k2) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 456) -> 9:int, VectorUDFMapIndexDecimalScalar(col 3:map<double,double>, key: 123.123) -> 10:double, VectorUDFMapIndexStringCol(col 1:map<string,string>, key: col 4:string) -> 11:string, VectorUDFMapIndexLongCol(col 2:map<int,int>, key: col 5:int) -> 12:int, VectorUDFMapIndexDoubleCol(col 3:map<double,double>, key: col 6:double) -> 13:double
Statistics: Num rows: 1023 Data size: 7161 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
@@ -218,7 +218,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [8, 9, 10]
- selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 123) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map<double,double>, key: 123.123) -> 10:double
+ selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 123) -> 9:int, VectorUDFMapIndexDecimalScalar(col 3:map<double,double>, key: 123.123) -> 10:double
Statistics: Num rows: 511 Data size: 3577 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col1), sum(_col2)
[2/2] hive git commit: HIVE-20294: Vectorization: Fix NULL / Wrong
Results issues in COALESCE / ELT (Matt McCline, reviewed by Teddy Choi)
Posted by mm...@apache.org.
HIVE-20294: Vectorization: Fix NULL / Wrong Results issues in COALESCE / ELT (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/66ea3266
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/66ea3266
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/66ea3266
Branch: refs/heads/master
Commit: 66ea3266cb9209f8a201f169be626ba878d5b763
Parents: e9896bf
Author: Matt McCline <mm...@hortonworks.com>
Authored: Thu Aug 2 23:04:34 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu Aug 2 23:04:34 2018 -0500
----------------------------------------------------------------------
.../ql/exec/vector/expressions/DecimalUtil.java | 2 +-
.../vector/expressions/ListIndexColColumn.java | 472 ++++++++++++++-
.../vector/expressions/ListIndexColScalar.java | 128 ++++-
.../expressions/VectorUDFMapIndexBase.java | 89 ---
.../expressions/VectorUDFMapIndexBaseCol.java | 402 +++++++++++--
.../VectorUDFMapIndexBaseScalar.java | 138 ++++-
.../VectorUDFMapIndexDecimalCol.java | 76 +++
.../VectorUDFMapIndexDecimalScalar.java | 96 ++++
.../expressions/VectorUDFMapIndexDoubleCol.java | 15 +-
.../VectorUDFMapIndexDoubleScalar.java | 32 +-
.../expressions/VectorUDFMapIndexLongCol.java | 15 +-
.../VectorUDFMapIndexLongScalar.java | 21 +-
.../expressions/VectorUDFMapIndexStringCol.java | 32 +-
.../VectorUDFMapIndexStringScalar.java | 34 +-
.../hive/ql/udf/generic/GenericUDFIndex.java | 7 +-
.../ql/exec/vector/VectorRandomRowSource.java | 57 +-
.../vector/expressions/TestVectorBetweenIn.java | 4 +-
.../expressions/TestVectorCoalesceElt.java | 502 ++++++++++++++++
.../vector/expressions/TestVectorIndex.java | 575 +++++++++++++++++++
.../exec/vector/expressions/TestVectorNull.java | 6 +-
.../llap/parquet_map_type_vectorization.q.out | 4 +-
.../parquet_map_type_vectorization.q.out | 4 +-
22 files changed, 2452 insertions(+), 259 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java
index db040f1..70a9a9c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java
@@ -30,7 +30,7 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
public class DecimalUtil {
public static int compare(HiveDecimalWritable writableLeft, HiveDecimal right) {
- return writableLeft.getHiveDecimal().compareTo(right);
+ return writableLeft.compareTo(right);
}
public static int compare(HiveDecimal left, HiveDecimalWritable writableRight) {
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
index 55417cf..2992bff 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -47,6 +49,13 @@ public class ListIndexColColumn extends VectorExpression {
@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ // return immediately if batch is empty
+ final int n = batch.size;
+ if (n == 0) {
+ return;
+ }
+
if (childExpressions != null) {
super.evaluateChildren(batch);
}
@@ -56,48 +65,459 @@ public class ListIndexColColumn extends VectorExpression {
ColumnVector childV = listV.child;
LongColumnVector indexColumnVector = (LongColumnVector) batch.cols[indexColumnNum];
long[] indexV = indexColumnVector.vector;
+ int[] sel = batch.selected;
+ boolean[] indexIsNull = indexColumnVector.isNull;
+ boolean[] listIsNull = listV.isNull;
+ boolean[] outputIsNull = outV.isNull;
// We do not need to do a column reset since we are carefully changing the output.
outV.isRepeating = false;
+ /*
+ * List indices are 0-based.
+ *
+ * Do careful maintenance of the outputColVector.noNulls flag since the index may be
+ * out-of-bounds.
+ */
+
+ if (indexColumnVector.isRepeating) {
+
+ /*
+ * Repeated index or repeated NULL index.
+ */
+ if (indexColumnVector.noNulls || !indexIsNull[0]) {
+ final long repeatedLongIndex = indexV[0];
+ if (repeatedLongIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ outV.isRepeating = true;
+ return;
+ }
+
+ /*
+ * Same INDEX for entire batch. Still need to validate the LIST upper limit.
+ */
+ if (listV.isRepeating) {
+ if (listV.noNulls || !listIsNull[0]) {
+ final long repeatedLongListLength = listV.lengths[0];
+ if (repeatedLongIndex >= repeatedLongListLength) {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[0] = false;
+ outV.setElement(0, (int) (listV.offsets[0] + repeatedLongIndex), childV);
+ }
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ }
+ outV.isRepeating = true;
+ return;
+ }
+
+ /*
+ * Individual row processing for LIST vector with *repeated* INDEX instance.
+ */
+ if (listV.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final long longListLength = listV.lengths[i];
+ if (repeatedLongIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV);
+ }
+ }
+ } else {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final long longListLength = listV.lengths[i];
+ if (repeatedLongIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV);
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i < n; i++) {
+ final long longListLength = listV.lengths[i];
+ if (repeatedLongIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV);
+ }
+ }
+ }
+ } else /* there are NULLs in the LIST */ {
+
+ if (batch.selectedInUse) {
+ for (int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (repeatedLongIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (repeatedLongIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ }
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ outV.isRepeating = true;
+ }
+ return;
+ }
+
+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ /*
+ * Same LIST for entire batch. Still need to validate the LIST upper limit against varing
+ * INDEX.
+ *
+ * (Repeated INDEX case handled above).
+ */
+
if (listV.isRepeating) {
- if (listV.isNull[0]) {
- outV.isNull[0] = true;
+ if (listV.noNulls || !listIsNull[0]) {
+
+ /*
+ * Individual row processing for INDEX vector with *repeated* LIST value.
+ */
+ final long repeatedLongListOffset = listV.offsets[0];
+ final long repeatedLongListLength = listV.lengths[0];
+
+ if (indexColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (longIndex >= repeatedLongListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV);
+ }
+ }
+ }
+ } else {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (longIndex >= repeatedLongListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV);
+ }
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i != n; i++) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (longIndex >= repeatedLongListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV);
+ }
+ }
+ }
+ }
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outV.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
+ for(int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!indexIsNull[i]) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (longIndex >= repeatedLongListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV);
+ }
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (!indexIsNull[i]) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (longIndex >= repeatedLongListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV);
+ }
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ }
+ } else {
+ outputIsNull[0] = true;
outV.noNulls = false;
outV.isRepeating = true;
+ }
+ return;
+ }
+
+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ /*
+ * Individual row processing for INDEX vectors and LIST vectors.
+ */
+ final boolean listNoNulls = listV.noNulls;
+
+ if (indexColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (listNoNulls || !listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (longIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ } else {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (listNoNulls || !listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (longIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ }
} else {
- if (indexColumnVector.isRepeating) {
- if (indexV[0] >= listV.lengths[0]) {
- outV.isNull[0] = true;
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i != n; i++) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
outV.noNulls = false;
} else {
- outV.isNull[0] = false;
- outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV);
+ if (listNoNulls || !listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (longIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
- outV.isRepeating = true;
- } else {
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (indexV[j] >= listV.lengths[0]) {
- outV.isNull[j] = true;
+ }
+ }
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outV.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
+ for(int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!indexIsNull[i]) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
outV.noNulls = false;
} else {
- outV.isNull[j] = false;
- outV.setElement(j, (int) (listV.offsets[0] + indexV[j]), childV);
-
+ if (listNoNulls || !listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (longIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
}
}
- }
- } else {
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (listV.isNull[j] || indexV[j] >= listV.lengths[j]) {
- outV.isNull[j] = true;
- outV.noNulls = false;
- } else {
- outV.isNull[j] = false;
- outV.setElement(j, (int) (listV.offsets[j] + indexV[j]), childV);
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (!indexIsNull[i]) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (listNoNulls || !listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (longIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
index 808e9fb..bb01c1c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -46,6 +48,13 @@ public class ListIndexColScalar extends VectorExpression {
@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ // return immediately if batch is empty
+ final int n = batch.size;
+ if (n == 0) {
+ return;
+ }
+
if (childExpressions != null) {
super.evaluateChildren(batch);
}
@@ -53,37 +62,128 @@ public class ListIndexColScalar extends VectorExpression {
ColumnVector outV = batch.cols[outputColumnNum];
ListColumnVector listV = (ListColumnVector) batch.cols[listColumnNum];
ColumnVector childV = listV.child;
+ int[] sel = batch.selected;
+ boolean[] listIsNull = listV.isNull;
+ boolean[] outputIsNull = outV.isNull;
+
+ if (index < 0) {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
+ outV.isRepeating = true;
+ return;
+ }
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
/*
* Do careful maintenance of the outputColVector.noNulls flag.
*/
if (listV.isRepeating) {
- if (listV.isNull[0]) {
- outV.isNull[0] = true;
- outV.noNulls = false;
- } else {
- if (index >= listV.lengths[0]) {
+ if (listV.noNulls || !listIsNull[0]) {
+ final long repeatedLongListLength = listV.lengths[0];
+ if (index >= repeatedLongListLength) {
outV.isNull[0] = true;
outV.noNulls = false;
} else {
outV.isNull[0] = false;
outV.setElement(0, (int) (listV.offsets[0] + index), childV);
}
+ } else {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
}
outV.isRepeating = true;
- } else {
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (listV.isNull[j] || index >= listV.lengths[j]) {
- outV.isNull[j] = true;
- outV.noNulls = false;
+ return;
+ }
+
+ /*
+ * Individual row processing for LIST vector with scalar constant INDEX value.
+ */
+ if (listV.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final long longListLength = listV.lengths[i];
+ if (index >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + index), childV);
+ }
+ }
} else {
- outV.isNull[j] = false;
- outV.setElement(j, (int) (listV.offsets[j] + index), childV);
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final long longListLength = listV.lengths[i];
+ if (index >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + index), childV);
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outV.isNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i < n; i++) {
+ final long longListLength = listV.lengths[i];
+ if (index >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + index), childV);
+ }
+ }
+ }
+ } else /* there are NULLs in the LIST */ {
+
+ if (batch.selectedInUse) {
+ for (int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (index >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + index), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (index >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + index), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
}
- outV.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java
deleted file mode 100644
index 3df4bce..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-/**
- * Superclass to support vectorized functions that take a parameter as key of Map
- * and return the value of Map.
- */
-public abstract class VectorUDFMapIndexBase extends VectorExpression {
-
- private static final long serialVersionUID = 1L;
-
- public VectorUDFMapIndexBase() {
- super();
- }
-
- public VectorUDFMapIndexBase(int outputColumnNum) {
- super(outputColumnNum);
- }
-
- /**
- * The index array of MapColumnVector is used to get the value from MapColumnVector based on the
- * index, the following are the steps to get it:
- * 1. Get the current key which is a scalar or from a ColumnVector.
- * 2. Compare the current key and the key from MapColumnVector.
- * 3. Set the index of MapColumnVector to the result array if the keys are same.
- */
- protected int[] getMapValueIndex(MapColumnVector mapV, VectorizedRowBatch batch) {
- int[] indexArray = new int[VectorizedRowBatch.DEFAULT_SIZE];
- for (int i = 0; i < batch.size; i++) {
- boolean findKey = false;
- int offset = (batch.selectedInUse) ? batch.selected[i] : i;
- Object columnKey = getCurrentKey(offset);
- for (int j = 0; j < mapV.lengths[offset]; j++) {
- int index = (int)(mapV.offsets[offset] + j);
- Object tempKey = getKeyByIndex(mapV.keys, index);
- if (compareKey(columnKey, tempKey)) {
- indexArray[offset] = j;
- findKey = true;
- break;
- }
- }
- if (!findKey) {
- indexArray[offset] = -1;
- }
- if (mapV.isRepeating) {
- break;
- }
- }
- return indexArray;
- }
-
- protected boolean compareKey(Object columnKey, Object otherKey) {
- if (columnKey == null && otherKey == null) {
- return true;
- } else if (columnKey != null && otherKey != null) {
- return compareKeyInternal(columnKey, otherKey);
- } else {
- return false;
- }
- }
-
- protected boolean compareKeyInternal(Object columnKey, Object otherKey) {
- return columnKey.equals(otherKey);
- }
-
- abstract Object getKeyByIndex(ColumnVector cv, int index);
-
- abstract Object getCurrentKey(int index);
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
index 157154a..67f4d55 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -27,7 +29,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
* Superclass to support vectorized functions that take a column value as key of Map
* and return the value of Map.
*/
-public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
+public abstract class VectorUDFMapIndexBaseCol extends VectorExpression {
private static final long serialVersionUID = 1L;
@@ -47,6 +49,13 @@ public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ // return immediately if batch is empty
+ final int n = batch.size;
+ if (n == 0) {
+ return;
+ }
+
if (childExpressions != null) {
super.evaluateChildren(batch);
}
@@ -55,61 +64,380 @@ public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum];
// indexColumnVector includes the keys of Map
indexColumnVector = batch.cols[indexColumnNum];
+ ColumnVector valuesV = mapV.values;
+
+ int[] sel = batch.selected;
+ boolean[] indexIsNull = indexColumnVector.isNull;
+ boolean[] mapIsNull = mapV.isNull;
+ boolean[] outputIsNull = outV.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
/*
* Do careful maintenance of the outputColVector.noNulls flag.
*/
- int[] mapValueIndex;
+ if (indexColumnVector.isRepeating) {
+
+ /*
+ * Repeated index or repeated NULL index.
+ */
+ if (indexColumnVector.noNulls || !indexIsNull[0]) {
+
+ /*
+ * Same INDEX for entire batch.
+ */
+ if (mapV.isRepeating) {
+ if (mapV.noNulls || !mapIsNull[0]) {
+ final int repeatedMapIndex = findInMap(indexColumnVector, 0, mapV, 0);
+ if (repeatedMapIndex == -1) {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[0] = false;
+ outV.setElement(0, repeatedMapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ }
+ outV.isRepeating = true;
+ return;
+ }
+
+ /*
+ * Individual row processing for LIST vector with *repeated* INDEX value.
+ */
+ if (mapV.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ } else {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i < n; i++) {
+ final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else /* there are NULLs in the LIST */ {
+
+ if (batch.selectedInUse) {
+ for (int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ }
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ outV.isRepeating = true;
+ }
+ return;
+ }
+
+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ /*
+ * Same MAP instance for entire batch.
+ *
+ * (Repeated INDEX case handled above).
+ */
+
if (mapV.isRepeating) {
- if (mapV.isNull[0]) {
- outV.isNull[0] = true;
+ if (mapV.noNulls || !mapIsNull[0]) {
+
+ /*
+ * Individual row processing for INDEX vector with *repeated* MAP instance.
+ */
+
+ if (indexColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ } else {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i != n; i++) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outV.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
+ for(int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!indexIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (!indexIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ }
+ } else {
+ outputIsNull[0] = true;
outV.noNulls = false;
outV.isRepeating = true;
+ }
+ return;
+ }
+
+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ /*
+ * Individual row processing for INDEX vectors and LIST vectors.
+ */
+ final boolean listNoNulls = mapV.noNulls;
+
+ if (indexColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+
+ if (listNoNulls || !mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ if (listNoNulls || !mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
} else {
- mapValueIndex = getMapValueIndex(mapV, batch);
- if (indexColumnVector.isRepeating) {
- // the key is not found in MapColumnVector, set the output as null ColumnVector
- if (mapValueIndex[0] == -1) {
- outV.isNull[0] = true;
- outV.noNulls = false;
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i != n; i++) {
+ if (listNoNulls || !mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
} else {
- // the key is found in MapColumnVector, set the value
- outV.isNull[0] = false;
- outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
+ outputIsNull[i] = true;
+ outV.noNulls = false;
}
- outV.isRepeating = true;
- } else {
- setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex);
}
}
- } else {
- mapValueIndex = getMapValueIndex(mapV, batch);
- setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex);
- }
- }
+ } else /* there are NULLs in the inputColVector */ {
- /**
- * Set the output based on the index array of MapColumnVector.
- */
- private void setUnRepeatingOutVector(VectorizedRowBatch batch, MapColumnVector mapV,
- ColumnVector outV, int[] mapValueIndex) {
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (mapV.isNull[j] || mapValueIndex[j] == -1) {
- outV.isNull[j] = true;
- outV.noNulls = false;
+ /*
+ * Do careful maintenance of the outV.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
+ for(int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!indexIsNull[i]) {
+ if (listNoNulls || !mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
} else {
- outV.isNull[j] = false;
- outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
+ for(int i = 0; i != n; i++) {
+ if (!indexIsNull[i]) {
+ if (listNoNulls || !mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
}
}
- outV.isRepeating = false;
}
- @Override
- protected Object getCurrentKey(int index) {
- return getKeyByIndex(indexColumnVector, index);
+ public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex,
+ MapColumnVector mapColumnVector, int mapBatchIndex) {
+ throw new RuntimeException("Not implemented");
}
public int getMapColumnNum() {
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
index 72662e0..e7bb4d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -27,7 +29,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
* Superclass to support vectorized functions that take a scalar as key of Map
* and return the value of Map.
*/
-public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase {
+public abstract class VectorUDFMapIndexBaseScalar extends VectorExpression {
private static final long serialVersionUID = 1L;
@@ -44,50 +46,144 @@ public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase
@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ // return immediately if batch is empty
+ final int n = batch.size;
+ if (n == 0) {
+ return;
+ }
+
if (childExpressions != null) {
super.evaluateChildren(batch);
}
ColumnVector outV = batch.cols[outputColumnNum];
MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum];
+ ColumnVector valuesV = mapV.values;
+
+ int[] sel = batch.selected;
+ boolean[] mapIsNull = mapV.isNull;
+ boolean[] outputIsNull = outV.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
/*
* Do careful maintenance of the outputColVector.noNulls flag.
*/
- int[] mapValueIndex;
if (mapV.isRepeating) {
- if (mapV.isNull[0]) {
- outV.isNull[0] = true;
- outV.noNulls = false;
- } else {
- mapValueIndex = getMapValueIndex(mapV, batch);
- if (mapValueIndex[0] == -1) {
- // the key is not found in MapColumnVector, set the output as null ColumnVector
+ if (mapV.noNulls || !mapIsNull[0]) {
+ final int repeatedMapIndex = findScalarInMap(mapV, 0);
+ if (repeatedMapIndex == -1) {
outV.isNull[0] = true;
outV.noNulls = false;
} else {
- // the key is found in MapColumnVector, set the value
- outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
+ outV.isNull[0] = false;
+ outV.setElement(0, repeatedMapIndex, valuesV);
}
+ } else {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
}
outV.isRepeating = true;
- } else {
- mapValueIndex = getMapValueIndex(mapV, batch);
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (mapV.isNull[j] || mapValueIndex[j] == -1) {
- outV.isNull[j] = true;
- outV.noNulls = false;
+ return;
+ }
+
+ /*
+ * Individual row processing for LIST vector with scalar constant INDEX value.
+ */
+ if (mapV.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findScalarInMap(mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
} else {
- outV.isNull[j] = false;
- outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findScalarInMap(mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outV.isNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i < n; i++) {
+ final long longListLength = mapV.lengths[i];
+ final int mapIndex = findScalarInMap(mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else /* there are NULLs in the MAP */ {
+
+ if (batch.selectedInUse) {
+ for (int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!mapIsNull[i]) {
+ final int mapIndex = findScalarInMap(mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!mapIsNull[i]) {
+ final int mapIndex = findScalarInMap(mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
}
- outV.isRepeating = false;
}
}
+ public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
+ throw new RuntimeException("Not implemented");
+ }
+
public int getMapColumnNum() {
return mapColumnNum;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java
new file mode 100644
index 0000000..d700799
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+
+/**
+ * Returns value of Map.
+ * Extends {@link VectorUDFMapIndexBaseCol}
+ */
+public class VectorUDFMapIndexDecimalCol extends VectorUDFMapIndexBaseCol {
+
+ public VectorUDFMapIndexDecimalCol() {
+ super();
+ }
+
+ public VectorUDFMapIndexDecimalCol(int mapColumnNum, int indexColumnNum, int outputColumnNum) {
+ super(mapColumnNum, indexColumnNum, outputColumnNum);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, getMapColumnNum()) + ", key: "
+ + getColumnParamString(1, getIndexColumnNum());
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.MAP,
+ VectorExpressionDescriptor.ArgumentType.DECIMAL)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+ }
+
+ @Override
+ public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex,
+ MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ HiveDecimalWritable[] keys = ((DecimalColumnVector) mapColumnVector.keys).vector;
+ final HiveDecimalWritable index =
+ ((DecimalColumnVector) indexColumnVector).vector[indexBatchIndex];
+ for (int i = 0; i < count; i++) {
+ if (index.compareTo(keys[offset + i]) == 0) {
+ return offset + i;
+ }
+ }
+ return -1;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java
new file mode 100644
index 0000000..7bdc555
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+
+/**
+ * Returns value of Map.
+ * Extends {@link VectorUDFMapIndexBaseScalar}
+ */
+public class VectorUDFMapIndexDecimalScalar extends VectorUDFMapIndexBaseScalar {
+
+ private static final long serialVersionUID = 1L;
+
+ private HiveDecimal key;
+ private double doubleKey;
+
+ public VectorUDFMapIndexDecimalScalar() {
+ super();
+ }
+
+ public VectorUDFMapIndexDecimalScalar(int mapColumnNum, HiveDecimal key, int outputColumnNum) {
+ super(mapColumnNum, outputColumnNum);
+ this.key = key;
+ doubleKey = key.doubleValue();
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, getMapColumnNum()) + ", key: " + key;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.MAP,
+ VectorExpressionDescriptor.ArgumentType.DECIMAL)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
+ }
+
+ @Override
+ public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+
+ ColumnVector keys = mapColumnVector.keys;
+ if (keys instanceof DecimalColumnVector) {
+ HiveDecimalWritable[] decimalKeyVector = ((DecimalColumnVector) keys).vector;
+ for (int i = 0; i < count; i++) {
+ if (decimalKeyVector[offset + i].compareTo(key) == 0) {
+ return offset + i;
+ }
+ }
+ } else {
+
+ // For some strange reason we receive a double column vector...
+ // The way we do VectorExpressionDescriptor may be inadequate in this case...
+ double[] doubleKeyVector = ((DoubleColumnVector) keys).vector;
+ for (int i = 0; i < count; i++) {
+ if (doubleKeyVector[offset + i] == doubleKey) {
+ return offset + i;
+ }
+ }
+ }
+ return -1;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java
index 48c64a7..ca05269 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
/**
@@ -57,7 +58,17 @@ public class VectorUDFMapIndexDoubleCol extends VectorUDFMapIndexBaseCol {
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- return ((DoubleColumnVector) cv).vector[index];
+ public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex,
+ MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ double[] keys = ((DoubleColumnVector) mapColumnVector.keys).vector;
+ final double index = ((DoubleColumnVector) indexColumnVector).vector[indexBatchIndex];
+ for (int i = 0; i < count; i++) {
+ if (index == keys[offset + i]) {
+ return offset + i;
+ }
+ }
+ return -1;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java
index d624176..5c064f6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java
@@ -18,9 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
/**
@@ -29,13 +28,15 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
*/
public class VectorUDFMapIndexDoubleScalar extends VectorUDFMapIndexBaseScalar {
- private HiveDecimal key;
+ private static final long serialVersionUID = 1L;
+
+ private double key;
public VectorUDFMapIndexDoubleScalar() {
super();
}
- public VectorUDFMapIndexDoubleScalar(int mapColumnNum, HiveDecimal key, int outputColumnNum) {
+ public VectorUDFMapIndexDoubleScalar(int mapColumnNum, double key, int outputColumnNum) {
super(mapColumnNum, outputColumnNum);
this.key = key;
}
@@ -53,24 +54,23 @@ public class VectorUDFMapIndexDoubleScalar extends VectorUDFMapIndexBaseScalar {
.setNumArguments(2)
.setArgumentTypes(
VectorExpressionDescriptor.ArgumentType.MAP,
- VectorExpressionDescriptor.ArgumentType.DECIMAL)
+ VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY)
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- return ((DoubleColumnVector) cv).vector[index];
- }
-
- @Override
- public Object getCurrentKey(int index) {
- return key;
+ public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ double[] keys = ((DoubleColumnVector) mapColumnVector.keys).vector;
+ for (int i = 0; i < count; i++) {
+ if (key == keys[offset + i]) {
+ return offset + i;
+ }
+ }
+ return -1;
}
- @Override
- protected boolean compareKeyInternal(Object columnKey, Object otherKey) {
- return otherKey.equals(((HiveDecimal) columnKey).doubleValue());
- }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java
index 5094d0b..482d83f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
/**
@@ -57,7 +58,17 @@ public class VectorUDFMapIndexLongCol extends VectorUDFMapIndexBaseCol {
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- return ((LongColumnVector) cv).vector[index];
+ public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex,
+ MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ long[] keys = ((LongColumnVector) mapColumnVector.keys).vector;
+ final long index = ((LongColumnVector) indexColumnVector).vector[indexBatchIndex];
+ for (int i = 0; i < count; i++) {
+ if (index == keys[offset + i]) {
+ return offset + i;
+ }
+ }
+ return -1;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java
index f7433e6..e604503 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java
@@ -18,8 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
/**
@@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
*/
public class VectorUDFMapIndexLongScalar extends VectorUDFMapIndexBaseScalar {
+ private static final long serialVersionUID = 1L;
+
private long key;
public VectorUDFMapIndexLongScalar() {
@@ -59,12 +61,15 @@ public class VectorUDFMapIndexLongScalar extends VectorUDFMapIndexBaseScalar {
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- return ((LongColumnVector) cv).vector[index];
- }
-
- @Override
- public Object getCurrentKey(int index) {
- return key;
+ public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ long[] keys = ((LongColumnVector) mapColumnVector.keys).vector;
+ for (int i = 0; i < count; i++) {
+ if (key == keys[offset + i]) {
+ return offset + i;
+ }
+ }
+ return -1;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java
index 4eefc6f..905d879 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java
@@ -18,12 +18,11 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-import java.util.Arrays;
/**
* Returns value of Map.
@@ -60,14 +59,25 @@ public class VectorUDFMapIndexStringCol extends VectorUDFMapIndexBaseCol {
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- BytesColumnVector bytesCV = (BytesColumnVector) cv;
- return ArrayUtils.subarray(bytesCV.vector[index], bytesCV.start[index],
- bytesCV.start[index] + bytesCV.length[index]);
- }
-
- @Override
- protected boolean compareKeyInternal(Object columnKey, Object otherKey) {
- return Arrays.equals((byte[])columnKey, (byte[]) otherKey);
+ public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex,
+ MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ BytesColumnVector keyColVector = (BytesColumnVector) mapColumnVector.keys;
+ byte[][] keyVector = keyColVector.vector;
+ int[] keyStart = keyColVector.start;
+ int[] keyLength = keyColVector.length;
+ BytesColumnVector indexColVector = (BytesColumnVector) indexColumnVector;
+ byte[] indexBytes = indexColVector.vector[indexBatchIndex];
+ int indexStart = indexColVector.start[indexBatchIndex];
+ int indexLength = indexColVector.length[indexBatchIndex];
+ for (int i = 0; i < count; i++) {
+ final int keyOffset = offset + i;
+ if (StringExpr.equal(indexBytes, indexStart, indexLength,
+ keyVector[keyOffset], keyStart[keyOffset], keyLength[keyOffset])) {
+ return offset + i;
+ }
+ }
+ return -1;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
index b08cd3a..0d9b5ba 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
@@ -18,13 +18,10 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-import java.util.Arrays;
-
/**
* Returns value of Map.
* Extends {@link VectorUDFMapIndexBaseScalar}
@@ -62,19 +59,20 @@ public class VectorUDFMapIndexStringScalar extends VectorUDFMapIndexBaseScalar {
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- BytesColumnVector bytesCV = (BytesColumnVector) cv;
- return ArrayUtils.subarray(bytesCV.vector[index], bytesCV.start[index],
- bytesCV.start[index] + bytesCV.length[index]);
- }
-
- @Override
- public Object getCurrentKey(int index) {
- return key;
- }
-
- @Override
- protected boolean compareKeyInternal(Object columnKey, Object otherKey) {
- return Arrays.equals((byte[])columnKey, (byte[]) otherKey);
+ public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ BytesColumnVector keyColVector = (BytesColumnVector) mapColumnVector.keys;
+ byte[][] keyVector = keyColVector.vector;
+ int[] keyStart = keyColVector.start;
+ int[] keyLength = keyColVector.length;
+ for (int i = 0; i < count; i++) {
+ final int keyOffset = offset + i;
+ if (StringExpr.equal(key, 0, key.length,
+ keyVector[keyOffset], keyStart[keyOffset], keyLength[keyOffset])) {
+ return offset + i;
+ }
+ }
+ return -1;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java
index 4c0cb2b..13cc284 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java
@@ -25,6 +25,8 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ListIndexColColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ListIndexColScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDecimalCol;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDecimalScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDoubleCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDoubleScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexLongCol;
@@ -48,8 +50,9 @@ import org.apache.hadoop.io.IntWritable;
@Description(name = "index", value = "_FUNC_(a, n) - Returns the n-th element of a ")
@VectorizedExpressions({ListIndexColScalar.class, ListIndexColColumn.class,
VectorUDFMapIndexStringScalar.class, VectorUDFMapIndexLongScalar.class,
- VectorUDFMapIndexDoubleScalar.class, VectorUDFMapIndexStringCol.class,
- VectorUDFMapIndexLongCol.class, VectorUDFMapIndexDoubleCol.class})
+ VectorUDFMapIndexDoubleScalar.class, VectorUDFMapIndexDecimalScalar.class,
+ VectorUDFMapIndexStringCol.class, VectorUDFMapIndexLongCol.class,
+ VectorUDFMapIndexDoubleCol.class, VectorUDFMapIndexDecimalCol.class})
public class GenericUDFIndex extends GenericUDF {
private transient MapObjectInspector mapOI;
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
index dfbf9d4..af73ee6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
@@ -22,8 +22,10 @@ import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.text.ParseException;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Random;
import java.util.Set;
@@ -925,7 +927,60 @@ public class VectorRandomRowSource {
{
List<Object> valueList = generationSpec.getValueList();
final int valueCount = valueList.size();
- object = valueList.get(r.nextInt(valueCount));
+
+ TypeInfo typeInfo = generationSpec.getTypeInfo();
+ Category category = typeInfo.getCategory();
+ switch (category) {
+ case PRIMITIVE:
+ case STRUCT:
+ object = valueList.get(r.nextInt(valueCount));
+ break;
+ case LIST:
+ {
+ final int elementCount = r.nextInt(valueCount);
+
+ ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
+ TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
+ final ObjectInspector elementObjectInspector =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ elementTypeInfo);
+ List<Object> list = new ArrayList<Object>(elementCount);
+ for (int i = 0; i < elementCount; i++) {
+ Object elementWritable =
+ randomWritable(elementTypeInfo, elementObjectInspector,
+ allowNull);
+ list.add(elementWritable);
+ }
+ object = list;
+ }
+ break;
+ case MAP:
+ {
+ final int elementCount = r.nextInt(valueCount);
+
+ MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
+ TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
+ final ObjectInspector valueObjectInspector =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ valueTypeInfo);
+ Map<Object,Object> map = new HashMap<Object,Object>(elementCount);
+ for (int i = 0; i < elementCount; i++) {
+ Object key = valueList.get(r.nextInt(valueCount));
+ Object valueWritable =
+ randomWritable(valueTypeInfo, valueObjectInspector,
+ allowNull);
+ if (!map.containsKey(key)) {
+ map.put(
+ key,
+ valueWritable);
+ }
+ }
+ object = map;
+ }
+ break;
+ default:
+ throw new RuntimeException("Unexpected category " + category);
+ }
}
break;
default:
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java
index 3f1a137..5b69bdf 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java
@@ -342,9 +342,9 @@ public class TestVectorBetweenIn {
List<Object> sortedList = new ArrayList<Object>(valueCount);
sortedList.addAll(valueList);
- Object object = valueList.get(0);
+ Object exampleObject = valueList.get(0);
WritableComparator writableComparator =
- WritableComparator.get((Class<? extends WritableComparable>) object.getClass());
+ WritableComparator.get((Class<? extends WritableComparable>) exampleObject.getClass());
sortedList.sort(writableComparator);
final boolean isInvert;