You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/06/15 05:58:17 UTC
[1/2] hive git commit: HIVE-19565: Vectorization: Fix NULL / Wrong
Results issues in STRING Functions (Matt McCline, reviewed by Teddy Choi)
Repository: hive
Updated Branches:
refs/heads/master 368d9cffe -> ce778adbc
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java
new file mode 100644
index 0000000..90f7992
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java
@@ -0,0 +1,368 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.StringGenerationOption;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.TestVectorTimestampExtract.TimestampExtractTestMode;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+public class TestVectorStringUnary {
+
+ @Test
+ public void testString() throws Exception {
+ Random random = new Random(83221);
+
+ doTests(random, "string");
+ }
+
+ @Test
+ public void testChar() throws Exception {
+ Random random = new Random(12882);
+
+ doTests(random, "char(10)");
+ }
+
+ @Test
+ public void testVarchar() throws Exception {
+ Random random = new Random(12882);
+
+ doTests(random, "varchar(15)");
+ }
+
+ public enum StringUnaryTestMode {
+ ROW_MODE,
+ ADAPTOR,
+ VECTOR_EXPRESSION;
+
+ static final int count = values().length;
+ }
+
+ private void doTests(Random random, String typeName)
+ throws Exception {
+
+ if (typeName.equals("string")) {
+
+ // These functions only work on the STRING type.
+ doTests(random, typeName, "ltrim");
+ doTests(random, typeName, "rtrim");
+ doTests(random, typeName, "trim");
+
+ doTests(random, typeName, "initcap");
+
+ doTests(random, typeName, "hex");
+ }
+
+ doTests(random, typeName, "lower");
+ doTests(random, typeName, "upper");
+
+ doTests(random, typeName, "char_length");
+ doTests(random, typeName, "length");
+ doTests(random, typeName, "octet_length");
+ }
+
+ private void doTests(Random random, String typeName, String functionName)
+ throws Exception {
+
+ TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+
+ TypeInfo targetTypeInfo;
+ if (functionName.equals("char_length") ||
+ functionName.equals("length") ||
+ functionName.equals("octet_length")) {
+ targetTypeInfo = TypeInfoFactory.intTypeInfo;
+ } else {
+ targetTypeInfo = typeInfo;
+ }
+
+ List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
+ List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
+ new ArrayList<DataTypePhysicalVariation>();
+
+ List<String> columns = new ArrayList<String>();
+ int columnNum = 0;
+ ExprNodeDesc col1Expr;
+ StringGenerationOption stringGenerationOption =
+ new StringGenerationOption(true, true);
+ generationSpecList.add(
+ GenerationSpec.createStringFamily(
+ typeInfo, stringGenerationOption));
+ explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
+
+ String columnName = "col" + (columnNum++);
+ col1Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false);
+ columns.add(columnName);
+
+ VectorRandomRowSource rowSource = new VectorRandomRowSource();
+
+ rowSource.initGenerationSpecSchema(
+ random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true,
+ explicitDataTypePhysicalVariationList);
+
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(col1Expr);
+
+ //----------------------------------------------------------------------------------------------
+
+ String[] columnNames = columns.toArray(new String[0]);
+
+ String[] outputScratchTypeNames = new String[] { targetTypeInfo.getTypeName() };
+ DataTypePhysicalVariation[] outputDataTypePhysicalVariations =
+ new DataTypePhysicalVariation[] { DataTypePhysicalVariation.NONE };
+
+ VectorizedRowBatchCtx batchContext =
+ new VectorizedRowBatchCtx(
+ columnNames,
+ rowSource.typeInfos(),
+ rowSource.dataTypePhysicalVariations(),
+ /* dataColumnNums */ null,
+ /* partitionColumnCount */ 0,
+ /* virtualColumnCount */ 0,
+ /* neededVirtualColumns */ null,
+ outputScratchTypeNames,
+ outputDataTypePhysicalVariations);
+
+ Object[][] randomRows = rowSource.randomRows(100000);
+
+ VectorRandomBatchSource batchSource =
+ VectorRandomBatchSource.createInterestingBatches(
+ random,
+ rowSource,
+ randomRows,
+ null);
+
+ GenericUDF genericUdf;
+ FunctionInfo funcInfo = null;
+ try {
+ funcInfo = FunctionRegistry.getFunctionInfo(functionName);
+ } catch (SemanticException e) {
+ Assert.fail("Failed to load " + functionName + " " + e);
+ }
+ genericUdf = funcInfo.getGenericUDF();
+
+ final int rowCount = randomRows.length;
+ Object[][] resultObjectsArray = new Object[StringUnaryTestMode.count][];
+ for (int i = 0; i < StringUnaryTestMode.count; i++) {
+
+ Object[] resultObjects = new Object[rowCount];
+ resultObjectsArray[i] = resultObjects;
+
+ StringUnaryTestMode stringUnaryTestMode = StringUnaryTestMode.values()[i];
+ switch (stringUnaryTestMode) {
+ case ROW_MODE:
+ doRowIfTest(
+ typeInfo, targetTypeInfo,
+ columns, children, randomRows, rowSource.rowStructObjectInspector(),
+ genericUdf, resultObjects);
+ break;
+ case ADAPTOR:
+ case VECTOR_EXPRESSION:
+ doVectorIfTest(
+ typeInfo,
+ targetTypeInfo,
+ columns,
+ rowSource.typeInfos(),
+ rowSource.dataTypePhysicalVariations(),
+ children,
+ stringUnaryTestMode,
+ batchSource,
+ batchContext,
+ genericUdf,
+ resultObjects);
+ break;
+ default:
+ throw new RuntimeException("Unexpected STRING Unary test mode " + stringUnaryTestMode);
+ }
+ }
+
+ for (int i = 0; i < rowCount; i++) {
+ // Row-mode is the expected value.
+ Object expectedResult = resultObjectsArray[0][i];
+
+ for (int v = 1; v < StringUnaryTestMode.count; v++) {
+ Object vectorResult = resultObjectsArray[v][i];
+ if (expectedResult == null || vectorResult == null) {
+ if (expectedResult != null || vectorResult != null) {
+ Assert.fail(
+ "Row " + i + " " + StringUnaryTestMode.values()[v] +
+ " typeName " + typeName +
+ " result is NULL " + (vectorResult == null) +
+ " does not match row-mode expected result is NULL " + (expectedResult == null) +
+ " functionName " + functionName +
+ " genericUdf " + genericUdf.getClass().getSimpleName());
+ }
+ } else {
+
+ if (!expectedResult.equals(vectorResult)) {
+ Assert.fail(
+ "Row " + i + " " + StringUnaryTestMode.values()[v] +
+ " typeName " + typeName +
+ " result \"" + vectorResult.toString() + "\"" +
+ " (" + vectorResult.getClass().getSimpleName() + ")" +
+ " does not match row-mode expected result \"" + expectedResult.toString() + "\"" +
+ " (" + expectedResult.getClass().getSimpleName() + ")" +
+ " functionName " + functionName +
+ " genericUdf " + genericUdf.getClass().getSimpleName());
+ }
+ }
+ }
+ }
+ }
+
+ private void doRowIfTest(TypeInfo typeInfo, TypeInfo targetTypeInfo,
+ List<String> columns, List<ExprNodeDesc> children,
+ Object[][] randomRows, ObjectInspector rowInspector,
+ GenericUDF genericUdf, Object[] resultObjects) throws Exception {
+
+ ExprNodeGenericFuncDesc exprDesc =
+ new ExprNodeGenericFuncDesc(typeInfo, genericUdf, children);
+ HiveConf hiveConf = new HiveConf();
+ ExprNodeEvaluator evaluator =
+ ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
+ evaluator.initialize(rowInspector);
+
+ ObjectInspector objectInspector = TypeInfoUtils
+ .getStandardWritableObjectInspectorFromTypeInfo(targetTypeInfo);
+
+ final int rowCount = randomRows.length;
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = randomRows[i];
+ Object result = evaluator.evaluate(row);
+ Object copyResult =
+ ObjectInspectorUtils.copyToStandardObject(
+ result, objectInspector, ObjectInspectorCopyOption.WRITABLE);
+ resultObjects[i] = copyResult;
+ }
+ }
+
+ private void extractResultObjects(VectorizedRowBatch batch, int rowIndex,
+ VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow,
+ TypeInfo targetTypeInfo, Object[] resultObjects) {
+
+ ObjectInspector objectInspector = TypeInfoUtils
+ .getStandardWritableObjectInspectorFromTypeInfo(targetTypeInfo);
+
+ boolean selectedInUse = batch.selectedInUse;
+ int[] selected = batch.selected;
+ for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) {
+ final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex);
+
+ try {
+ resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow);
+ } catch (Exception e) {
+ Assert.fail(e.toString());
+ }
+
+ Object copyResult =
+ ObjectInspectorUtils.copyToStandardObject(
+ scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE);
+ resultObjects[rowIndex++] = copyResult;
+ }
+ }
+
+ private void doVectorIfTest(TypeInfo typeInfo, TypeInfo targetTypeInfo,
+ List<String> columns,
+ TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations,
+ List<ExprNodeDesc> children,
+ StringUnaryTestMode stringUnaryTestMode,
+ VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext,
+ GenericUDF genericUdf, Object[] resultObjects)
+ throws Exception {
+
+ ExprNodeGenericFuncDesc exprDesc =
+ new ExprNodeGenericFuncDesc(targetTypeInfo, genericUdf, children);
+
+ HiveConf hiveConf = new HiveConf();
+ if (stringUnaryTestMode == StringUnaryTestMode.ADAPTOR) {
+ hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
+ }
+
+ VectorizationContext vectorizationContext =
+ new VectorizationContext(
+ "name",
+ columns,
+ Arrays.asList(typeInfos),
+ Arrays.asList(dataTypePhysicalVariations),
+ hiveConf);
+ VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc);
+
+ VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
+
+ VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
+ resultVectorExtractRow.init(new TypeInfo[] { targetTypeInfo }, new int[] { columns.size() });
+ Object[] scrqtchRow = new Object[1];
+
+ System.out.println(
+ "*DEBUG* typeInfo " + typeInfo.toString() +
+ " targetTypeInfo " + targetTypeInfo.toString() +
+ " stringUnaryTestMode " + stringUnaryTestMode +
+ " vectorExpression " + vectorExpression.getClass().getSimpleName());
+
+ batchSource.resetBatchIteration();
+ int rowIndex = 0;
+ while (true) {
+ if (!batchSource.fillNextBatch(batch)) {
+ break;
+ }
+ vectorExpression.evaluate(batch);
+ extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow,
+ targetTypeInfo, resultObjects);
+ rowIndex += batch.size;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java
new file mode 100644
index 0000000..284a47a
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.StringGenerationOption;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.IntWritable;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+public class TestVectorSubStr {
+
+ @Test
+ public void testString() throws Exception {
+ Random random = new Random(83221);
+
+ doTests(random);
+ }
+
+ public enum SubStrTestMode {
+ ROW_MODE,
+ ADAPTOR,
+ VECTOR_EXPRESSION;
+
+ static final int count = values().length;
+ }
+
+ private void doTests(Random random)
+ throws Exception {
+
+ for (int i = 0; i < 50; i++) {
+ doTests(random, false);
+ doTests(random, true);
+ }
+ }
+
+ private void doTests(Random random, boolean useLength)
+ throws Exception {
+
+ String typeName = "string";
+ TypeInfo typeInfo = TypeInfoFactory.stringTypeInfo;
+ TypeInfo targetTypeInfo = typeInfo;
+ String functionName = "substr";
+
+ List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
+ List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
+ new ArrayList<DataTypePhysicalVariation>();
+
+ List<String> columns = new ArrayList<String>();
+ int columnNum = 0;
+ ExprNodeDesc col1Expr;
+ StringGenerationOption stringGenerationOption =
+ new StringGenerationOption(true, true);
+ generationSpecList.add(
+ GenerationSpec.createStringFamily(
+ typeInfo, stringGenerationOption));
+ explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
+
+ String columnName = "col" + (columnNum++);
+ col1Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false);
+ columns.add(columnName);
+
+ VectorRandomRowSource rowSource = new VectorRandomRowSource();
+
+ rowSource.initGenerationSpecSchema(
+ random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true,
+ explicitDataTypePhysicalVariationList);
+
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(col1Expr);
+
+ final int position = 10 - random.nextInt(21);
+ Object scalar2Object =
+ Integer.valueOf(position);
+ ExprNodeDesc col2Expr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, scalar2Object);
+ children.add(col2Expr);
+
+ if (useLength) {
+
+ Object scalar3Object = random.nextInt(12);
+ ExprNodeDesc col3Expr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, scalar3Object);
+ children.add(col3Expr);
+ }
+
+ //----------------------------------------------------------------------------------------------
+
+ String[] columnNames = columns.toArray(new String[0]);
+
+ String[] outputScratchTypeNames = new String[] { targetTypeInfo.getTypeName() };
+ DataTypePhysicalVariation[] outputDataTypePhysicalVariations =
+ new DataTypePhysicalVariation[] { DataTypePhysicalVariation.NONE };
+
+ VectorizedRowBatchCtx batchContext =
+ new VectorizedRowBatchCtx(
+ columnNames,
+ rowSource.typeInfos(),
+ rowSource.dataTypePhysicalVariations(),
+ /* dataColumnNums */ null,
+ /* partitionColumnCount */ 0,
+ /* virtualColumnCount */ 0,
+ /* neededVirtualColumns */ null,
+ outputScratchTypeNames,
+ outputDataTypePhysicalVariations);
+
+ Object[][] randomRows = rowSource.randomRows(100000);
+
+ VectorRandomBatchSource batchSource =
+ VectorRandomBatchSource.createInterestingBatches(
+ random,
+ rowSource,
+ randomRows,
+ null);
+
+ GenericUDF genericUdf;
+ FunctionInfo funcInfo = null;
+ try {
+ funcInfo = FunctionRegistry.getFunctionInfo(functionName);
+ } catch (SemanticException e) {
+ Assert.fail("Failed to load " + functionName + " " + e);
+ }
+ genericUdf = funcInfo.getGenericUDF();
+
+ final int rowCount = randomRows.length;
+ Object[][] resultObjectsArray = new Object[SubStrTestMode.count][];
+ for (int i = 0; i < SubStrTestMode.count; i++) {
+
+ Object[] resultObjects = new Object[rowCount];
+ resultObjectsArray[i] = resultObjects;
+
+ SubStrTestMode subStrTestMode = SubStrTestMode.values()[i];
+ switch (subStrTestMode) {
+ case ROW_MODE:
+ doRowIfTest(
+ typeInfo, targetTypeInfo,
+ columns, children, randomRows, rowSource.rowStructObjectInspector(),
+ genericUdf, resultObjects);
+ break;
+ case ADAPTOR:
+ case VECTOR_EXPRESSION:
+ doVectorIfTest(
+ typeInfo,
+ targetTypeInfo,
+ columns,
+ rowSource.typeInfos(),
+ rowSource.dataTypePhysicalVariations(),
+ children,
+ subStrTestMode,
+ batchSource,
+ batchContext,
+ genericUdf,
+ resultObjects);
+ break;
+ default:
+ throw new RuntimeException("Unexpected STRING Unary test mode " + subStrTestMode);
+ }
+ }
+
+ for (int i = 0; i < rowCount; i++) {
+ // Row-mode is the expected value.
+ Object expectedResult = resultObjectsArray[0][i];
+
+ for (int v = 1; v < SubStrTestMode.count; v++) {
+ Object vectorResult = resultObjectsArray[v][i];
+ if (expectedResult == null || vectorResult == null) {
+ if (expectedResult != null || vectorResult != null) {
+ Assert.fail(
+ "Row " + i +
+ " " + SubStrTestMode.values()[v] +
+ " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) +
+ " does not match row-mode expected result is NULL " +
+ (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) +
+ " row values " + Arrays.toString(randomRows[i]));
+ }
+ } else {
+
+ if (!expectedResult.equals(vectorResult)) {
+ Assert.fail(
+ "Row " + i +
+ " " + SubStrTestMode.values()[v] +
+ " result " + vectorResult.toString() +
+ " (" + vectorResult.getClass().getSimpleName() + ")" +
+ " does not match row-mode expected result " + expectedResult.toString() +
+ " (" + expectedResult.getClass().getSimpleName() + ")" +
+ " row values " + Arrays.toString(randomRows[i]));
+ }
+ }
+ }
+ }
+ }
+
+ private void doRowIfTest(TypeInfo typeInfo, TypeInfo targetTypeInfo,
+ List<String> columns, List<ExprNodeDesc> children,
+ Object[][] randomRows, ObjectInspector rowInspector,
+ GenericUDF genericUdf, Object[] resultObjects) throws Exception {
+
+ ExprNodeGenericFuncDesc exprDesc =
+ new ExprNodeGenericFuncDesc(typeInfo, genericUdf, children);
+
+ HiveConf hiveConf = new HiveConf();
+ ExprNodeEvaluator evaluator =
+ ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
+ evaluator.initialize(rowInspector);
+
+ ObjectInspector objectInspector = TypeInfoUtils
+ .getStandardWritableObjectInspectorFromTypeInfo(targetTypeInfo);
+
+ final int rowCount = randomRows.length;
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = randomRows[i];
+ Object result = evaluator.evaluate(row);
+ Object copyResult =
+ ObjectInspectorUtils.copyToStandardObject(
+ result, objectInspector, ObjectInspectorCopyOption.WRITABLE);
+ resultObjects[i] = copyResult;
+ }
+ }
+
+ private void extractResultObjects(VectorizedRowBatch batch, int rowIndex,
+ VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow,
+ TypeInfo targetTypeInfo, Object[] resultObjects) {
+
+ ObjectInspector objectInspector = TypeInfoUtils
+ .getStandardWritableObjectInspectorFromTypeInfo(targetTypeInfo);
+
+ boolean selectedInUse = batch.selectedInUse;
+ int[] selected = batch.selected;
+ for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) {
+ final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex);
+
+ try {
+ resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow);
+ } catch (Exception e) {
+ Assert.fail(e.toString());
+ }
+
+ Object copyResult =
+ ObjectInspectorUtils.copyToStandardObject(
+ scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE);
+ resultObjects[rowIndex++] = copyResult;
+ }
+ }
+
+ private void doVectorIfTest(TypeInfo typeInfo, TypeInfo targetTypeInfo,
+ List<String> columns,
+ TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations,
+ List<ExprNodeDesc> children,
+ SubStrTestMode subStrTestMode,
+ VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext,
+ GenericUDF genericUdf, Object[] resultObjects)
+ throws Exception {
+
+ ExprNodeGenericFuncDesc exprDesc =
+ new ExprNodeGenericFuncDesc(targetTypeInfo, genericUdf, children);
+
+ HiveConf hiveConf = new HiveConf();
+ if (subStrTestMode == SubStrTestMode.ADAPTOR) {
+ hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
+ }
+
+ VectorizationContext vectorizationContext =
+ new VectorizationContext(
+ "name",
+ columns,
+ Arrays.asList(typeInfos),
+ Arrays.asList(dataTypePhysicalVariations),
+ hiveConf);
+ VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc);
+
+ VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
+
+ VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
+ resultVectorExtractRow.init(new TypeInfo[] { targetTypeInfo }, new int[] { columns.size() });
+ Object[] scrqtchRow = new Object[1];
+
+ System.out.println(
+ "*DEBUG* typeInfo " + typeInfo.toString() +
+ " targetTypeInfo " + targetTypeInfo.toString() +
+ " subStrTestMode " + subStrTestMode +
+ " vectorExpression " + vectorExpression.getClass().getSimpleName());
+
+ batchSource.resetBatchIteration();
+ int rowIndex = 0;
+ while (true) {
+ if (!batchSource.fillNextBatch(batch)) {
+ break;
+ }
+ vectorExpression.evaluate(batch);
+ extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow,
+ targetTypeInfo, resultObjects);
+ rowIndex += batch.size;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java
index 2a52eea..fa8f465 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.TestVectorDateAddSub.ColumnScalarMode;
@@ -86,8 +87,6 @@ import org.junit.Test;
public class TestVectorTimestampExtract {
- private static final boolean corruptTimestampStrings = false;
-
@Test
public void testTimestamp() throws Exception {
Random random = new Random(7436);
@@ -130,53 +129,6 @@ public class TestVectorTimestampExtract {
doIfTestOneTimestampExtract(random, typeName, "year");
}
- private static final String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-
- private Object randomTimestampStringFamily(
- Random random, TypeInfo dateTimeStringTypeInfo, boolean wantWritable) {
-
- String randomTimestampString =
- VectorRandomRowSource.randomPrimitiveTimestampStringObject(random);
- if (corruptTimestampStrings && random.nextInt(40) == 39) {
-
- // Randomly corrupt.
- int index = random.nextInt(randomTimestampString.length());
- char[] chars = randomTimestampString.toCharArray();
- chars[index] = alphabet.charAt(random.nextInt(alphabet.length()));
- randomTimestampString = String.valueOf(chars);
- }
-
- PrimitiveCategory dateTimeStringPrimitiveCategory =
- ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory();
- switch (dateTimeStringPrimitiveCategory) {
- case STRING:
- return randomTimestampString;
- case CHAR:
- {
- HiveChar hiveChar =
- new HiveChar(randomTimestampString, ((CharTypeInfo) dateTimeStringTypeInfo).getLength());
- if (wantWritable) {
- return new HiveCharWritable(hiveChar);
- } else {
- return hiveChar;
- }
- }
- case VARCHAR:
- {
- HiveVarchar hiveVarchar =
- new HiveVarchar(
- randomTimestampString, ((VarcharTypeInfo) dateTimeStringTypeInfo).getLength());
- if (wantWritable) {
- return new HiveVarcharWritable(hiveVarchar);
- } else {
- return hiveVarchar;
- }
- }
- default:
- throw new RuntimeException("Unexpected string family category " + dateTimeStringPrimitiveCategory);
- }
- }
-
private void doIfTestOneTimestampExtract(Random random, String dateTimeStringTypeName,
String extractFunctionName)
throws Exception {
@@ -190,14 +142,21 @@ public class TestVectorTimestampExtract {
dateTimeStringPrimitiveCategory == PrimitiveCategory.CHAR ||
dateTimeStringPrimitiveCategory == PrimitiveCategory.VARCHAR);
- List<String> explicitTypeNameList = new ArrayList<String>();
+ List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
new ArrayList<DataTypePhysicalVariation>();
List<String> columns = new ArrayList<String>();
int columnNum = 0;
ExprNodeDesc col1Expr;
- explicitTypeNameList.add(dateTimeStringTypeName);
+ if (!isStringFamily) {
+ generationSpecList.add(
+ GenerationSpec.createSameType(dateTimeStringTypeInfo));
+ } else {
+ generationSpecList.add(
+ GenerationSpec.createStringFamilyOtherTypeValue(
+ dateTimeStringTypeInfo, TypeInfoFactory.timestampTypeInfo));
+ }
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
@@ -206,8 +165,8 @@ public class TestVectorTimestampExtract {
VectorRandomRowSource rowSource = new VectorRandomRowSource();
- rowSource.initExplicitSchema(
- random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true,
+ rowSource.initGenerationSpecSchema(
+ random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true,
explicitDataTypePhysicalVariationList);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
@@ -219,18 +178,6 @@ public class TestVectorTimestampExtract {
Object[][] randomRows = rowSource.randomRows(100000);
- if (isStringFamily) {
- for (int i = 0; i < randomRows.length; i++) {
- Object[] row = randomRows[i];
- Object object = row[columnNum - 1];
- if (row[0] != null) {
- row[0] =
- randomTimestampStringFamily(
- random, dateTimeStringTypeInfo, /* wantWritable */ true);
- }
- }
- }
-
VectorRandomBatchSource batchSource =
VectorRandomBatchSource.createInterestingBatches(
random,
@@ -405,9 +352,11 @@ public class TestVectorTimestampExtract {
Text text;
if (object == null) {
text = null;
- } else {
+ } else if (object instanceof String) {
text = new Text();
text.set((String) object);
+ } else {
+ text = (Text) object;
}
result = evaluator.evaluate(text);
}
[2/2] hive git commit: HIVE-19565: Vectorization: Fix NULL / Wrong
Results issues in STRING Functions (Matt McCline, reviewed by Teddy Choi)
Posted by mm...@apache.org.
HIVE-19565: Vectorization: Fix NULL / Wrong Results issues in STRING Functions (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ce778adb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ce778adb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ce778adb
Branch: refs/heads/master
Commit: ce778adbcba2e933ef6cee00dec9f8d2d5198191
Parents: 368d9cf
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Jun 15 00:57:57 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Jun 15 00:57:57 2018 -0500
----------------------------------------------------------------------
.../CharScalarConcatStringGroupCol.java | 51 ---
.../StringGroupColConcatCharScalar.java | 51 ---
.../StringGroupColConcatStringScalar.java | 3 +-
.../StringGroupColConcatVarCharScalar.java | 52 ---
.../expressions/StringGroupConcatColCol.java | 2 +-
.../ql/exec/vector/expressions/StringLTrim.java | 25 +-
.../ql/exec/vector/expressions/StringRTrim.java | 24 +-
.../StringScalarConcatStringGroupCol.java | 2 +-
.../expressions/StringSubstrColStart.java | 11 +-
.../expressions/StringSubstrColStartLen.java | 11 +-
.../ql/exec/vector/expressions/StringTrim.java | 35 +-
.../VarCharScalarConcatStringGroupCol.java | 52 ---
.../hive/ql/udf/generic/GenericUDFConcat.java | 11 +-
.../ql/exec/vector/VectorRandomRowSource.java | 332 +++++++++++++-
.../expressions/TestVectorCastStatement.java | 86 ++--
.../expressions/TestVectorDateAddSub.java | 84 +---
.../vector/expressions/TestVectorDateDiff.java | 112 ++---
.../expressions/TestVectorStringConcat.java | 427 +++++++++++++++++++
.../TestVectorStringExpressions.java | 16 +-
.../expressions/TestVectorStringUnary.java | 368 ++++++++++++++++
.../vector/expressions/TestVectorSubStr.java | 347 +++++++++++++++
.../expressions/TestVectorTimestampExtract.java | 81 +---
22 files changed, 1634 insertions(+), 549 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java
deleted file mode 100644
index 712b8de..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-
-/**
- * Vectorized instruction to concatenate a scalar to a string column and put
- * the result in an output column.
- */
-public class CharScalarConcatStringGroupCol extends StringScalarConcatStringGroupCol {
- private static final long serialVersionUID = 1L;
-
- public CharScalarConcatStringGroupCol(byte[] value, int colNum, int outputColumnNum) {
- super(value, colNum, outputColumnNum);
- }
-
- public CharScalarConcatStringGroupCol() {
- super();
- }
-
- @Override
- public VectorExpressionDescriptor.Descriptor getDescriptor() {
- return (new VectorExpressionDescriptor.Builder())
- .setMode(
- VectorExpressionDescriptor.Mode.PROJECTION)
- .setNumArguments(2)
- .setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.CHAR,
- VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
- .setInputExpressionTypes(
- VectorExpressionDescriptor.InputExpressionType.SCALAR,
- VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java
deleted file mode 100644
index bbebe6c..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-
-/**
- * Vectorized instruction to concatenate a string column to a scalar and put
- * the result in an output column.
- */
-public class StringGroupColConcatCharScalar extends StringGroupColConcatStringScalar {
- private static final long serialVersionUID = 1L;
-
- public StringGroupColConcatCharScalar(int colNum, byte[] value, int outputColumnNum) {
- super(colNum, value, outputColumnNum);
- }
-
- public StringGroupColConcatCharScalar() {
- super();
- }
-
- @Override
- public VectorExpressionDescriptor.Descriptor getDescriptor() {
- return (new VectorExpressionDescriptor.Builder())
- .setMode(
- VectorExpressionDescriptor.Mode.PROJECTION)
- .setNumArguments(2)
- .setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
- VectorExpressionDescriptor.ArgumentType.CHAR)
- .setInputExpressionTypes(
- VectorExpressionDescriptor.InputExpressionType.COLUMN,
- VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
index 9194e8b..896de85 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -164,7 +163,7 @@ public class StringGroupColConcatStringScalar extends VectorExpression {
.setNumArguments(2)
.setArgumentTypes(
VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
- VectorExpressionDescriptor.ArgumentType.STRING)
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java
deleted file mode 100644
index 7349410..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-
-/**
- * Vectorized instruction to concatenate a string column to a scalar and put
- * the result in an output column.
- */
-public class StringGroupColConcatVarCharScalar extends StringGroupColConcatStringScalar {
- private static final long serialVersionUID = 1L;
-
- public StringGroupColConcatVarCharScalar(int colNum, HiveVarchar value, int outputColumnNum) {
- super(colNum, value.getValue().getBytes(), outputColumnNum);
- }
-
- public StringGroupColConcatVarCharScalar() {
- super();
- }
-
- @Override
- public VectorExpressionDescriptor.Descriptor getDescriptor() {
- return (new VectorExpressionDescriptor.Builder())
- .setMode(
- VectorExpressionDescriptor.Mode.PROJECTION)
- .setNumArguments(2)
- .setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
- VectorExpressionDescriptor.ArgumentType.VARCHAR)
- .setInputExpressionTypes(
- VectorExpressionDescriptor.InputExpressionType.COLUMN,
- VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
index 1c9433b..ceb7b26 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
@@ -128,7 +128,7 @@ public class StringGroupConcatColCol extends VectorExpression {
}
} else {
for(int i = 0; i != n; i++) {
- if (!inV2.isNull[0]) {
+ if (!inV2.isNull[i]) {
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
index 84f03cc..8a41e76 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
@@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
public class StringLTrim extends StringUnaryUDFDirect {
private static final long serialVersionUID = 1L;
+ private static final byte[] EMPTY_BYTES = new byte[0];
+
public StringLTrim(int inputColumn, int outputColumnNum) {
super(inputColumn, outputColumnNum);
}
@@ -36,14 +38,23 @@ public class StringLTrim extends StringUnaryUDFDirect {
* Operate on the data in place, and set the output by reference
* to improve performance. Ignore null handling. That will be handled separately.
*/
- protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) {
- int j = start[i];
-
- // skip past blank characters
- while(j < start[i] + vector[i].length && vector[i][j] == 0x20) {
- j++;
+ protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length,
+ int batchIndex) {
+ byte[] bytes = vector[batchIndex];
+ final int startIndex = start[batchIndex];
+
+ // Skip past blank characters.
+ final int end = startIndex + length[batchIndex];
+ int index = startIndex;
+ while(index < end && bytes[index] == 0x20) {
+ index++;
}
- outV.setVal(i, vector[i], j, length[i] - (j - start[i]));
+ final int resultLength = end - index;
+ if (resultLength == 0) {
+ outV.setVal(batchIndex, EMPTY_BYTES, 0, 0);
+ return;
+ }
+ outV.setVal(batchIndex, bytes, index, resultLength);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
index 5c087aa..6a3a220 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
@@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
public class StringRTrim extends StringUnaryUDFDirect {
private static final long serialVersionUID = 1L;
+ private static final byte[] EMPTY_BYTES = new byte[0];
+
public StringRTrim(int inputColumn, int outputColumnNum) {
super(inputColumn, outputColumnNum);
}
@@ -36,15 +38,23 @@ public class StringRTrim extends StringUnaryUDFDirect {
* Operate on the data in place, and set the output by reference
* to improve performance. Ignore null handling. That will be handled separately.
*/
- protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) {
- int j = start[i] + length[i] - 1;
+ protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length,
+ int batchIndex) {
+
+ byte[] bytes = vector[batchIndex];
+ final int startIndex = start[batchIndex];
- // skip trailing blank characters
- while(j >= start[i] && vector[i][j] == 0x20) {
- j--;
+ // Skip trailing blank characters.
+ int index = startIndex + length[batchIndex] - 1;
+ while(index >= startIndex && bytes[index] == 0x20) {
+ index--;
}
- // set output vector
- outV.setVal(i, vector[i], start[i], (j - start[i]) + 1);
+ final int resultLength = index - startIndex + 1;
+ if (resultLength == 0) {
+ outV.setVal(batchIndex, EMPTY_BYTES, 0, 0);
+ return;
+ }
+ outV.setVal(batchIndex, bytes, startIndex, resultLength);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
index db679b0..b099910 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
@@ -163,7 +163,7 @@ public class StringScalarConcatStringGroupCol extends VectorExpression {
VectorExpressionDescriptor.Mode.PROJECTION)
.setNumArguments(2)
.setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.STRING,
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.SCALAR,
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
index 411fc4b..2b97504 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
@@ -146,12 +146,7 @@ public class StringSubstrColStart extends VectorExpression {
outputColVector.isRepeating = false;
if (inV.isRepeating) {
- if (!inV.noNulls && inV.isNull[0]) {
- outputIsNull[0] = true;
- outputColVector.noNulls = false;
- outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
- return;
- } else {
+ if (inV.noNulls || !inV.isNull[0]) {
outputIsNull[0] = false;
int offset = getSubstrStartOffset(vector[0], start[0], len[0], startIdx);
if (offset != -1) {
@@ -159,6 +154,10 @@ public class StringSubstrColStart extends VectorExpression {
} else {
outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
}
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
}
outputColVector.isRepeating = true;
return;
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
index e28c0a7..fff3032 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
@@ -167,12 +167,7 @@ public class StringSubstrColStartLen extends VectorExpression {
outputColVector.isRepeating = false;
if (inV.isRepeating) {
-
- if (!inV.noNulls && inV.isNull[0]) {
- outputIsNull[0] = true;
- outputColVector.noNulls = false;
- outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
- } else {
+ if (inV.noNulls || !inV.isNull[0]) {
outputIsNull[0] = false;
populateSubstrOffsets(vector[0], start[0], len[0], startIdx, length, offsetArray);
if (offsetArray[0] != -1) {
@@ -180,6 +175,10 @@ public class StringSubstrColStartLen extends VectorExpression {
} else {
outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
}
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
}
outputColVector.isRepeating = true;
return;
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
index 458ac7d..76afe7c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
@@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
public class StringTrim extends StringUnaryUDFDirect {
private static final long serialVersionUID = 1L;
+ private static final byte[] EMPTY_BYTES = new byte[0];
+
public StringTrim(int inputColumn, int outputColumnNum) {
super(inputColumn, outputColumnNum);
}
@@ -37,20 +39,31 @@ public class StringTrim extends StringUnaryUDFDirect {
* Operate on the data in place, and set the output by reference
* to improve performance. Ignore null handling. That will be handled separately.
*/
- protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) {
- int l = start[i];
- int r = start[i] + length[i] - 1;
+ protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length,
+ int batchIndex) {
- // skip blank character on left
- while(l <= r && vector[i][l] == 0x20) {
- l++;
+ byte[] bytes = vector[batchIndex];
+ final int startIndex = start[batchIndex];
+ final int end = startIndex + length[batchIndex];
+ int leftIndex = startIndex;
+ while(leftIndex < end && bytes[leftIndex] == 0x20) {
+ leftIndex++;
}
-
- // skip blank characters on right
- while(l <= r && vector[i][r] == 0x20) {
- r--;
+ if (leftIndex == end) {
+ outV.setVal(batchIndex, EMPTY_BYTES, 0, 0);
+ return;
}
- outV.setVal(i, vector[i], l, (r - l) + 1);
+ // Have at least 1 non-blank; Skip trailing blank characters.
+ int rightIndex = end - 1;
+ final int rightLimit = leftIndex + 1;
+ while(rightIndex >= rightLimit && bytes[rightIndex] == 0x20) {
+ rightIndex--;
+ }
+ final int resultLength = rightIndex - leftIndex + 1;
+ if (resultLength <= 0) {
+ throw new RuntimeException("Not expected");
+ }
+ outV.setVal(batchIndex, bytes, leftIndex, resultLength);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java
deleted file mode 100644
index 76e83e0..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-
-/**
- * Vectorized instruction to concatenate a scalar to a string column and put
- * the result in an output column.
- */
-public class VarCharScalarConcatStringGroupCol extends StringScalarConcatStringGroupCol {
- private static final long serialVersionUID = 1L;
-
- public VarCharScalarConcatStringGroupCol(HiveVarchar value, int colNum, int outputColumnNum) {
- super(value.getValue().getBytes(), colNum, outputColumnNum);
- }
-
- public VarCharScalarConcatStringGroupCol() {
- super();
- }
-
- @Override
- public VectorExpressionDescriptor.Descriptor getDescriptor() {
- return (new VectorExpressionDescriptor.Builder())
- .setMode(
- VectorExpressionDescriptor.Mode.PROJECTION)
- .setNumArguments(2)
- .setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.VARCHAR,
- VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
- .setInputExpressionTypes(
- VectorExpressionDescriptor.InputExpressionType.SCALAR,
- VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
index 62a7560..92588dd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
@@ -25,11 +25,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupConcatColCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatStringScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatCharScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatVarCharScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringScalarConcatStringGroupCol;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.CharScalarConcatStringGroupCol;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VarCharScalarConcatStringGroupCol;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
@@ -53,11 +49,10 @@ extended = "Returns NULL if any argument is NULL.\n"
+ "Example:\n"
+ " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n"
+ " 'abcdef'")
-@VectorizedExpressions({StringGroupConcatColCol.class,
+@VectorizedExpressions({
+ StringGroupConcatColCol.class,
StringGroupColConcatStringScalar.class,
- StringGroupColConcatCharScalar.class, StringGroupColConcatVarCharScalar.class,
- StringScalarConcatStringGroupCol.class,
- CharScalarConcatStringGroupCol.class, VarCharScalarConcatStringGroupCol.class})
+ StringScalarConcatStringGroupCol.class})
public class GenericUDFConcat extends GenericUDF {
private transient ObjectInspector[] argumentOIs;
private transient StringConverter[] stringConverters;
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
index 641ff10..0e4dcfd 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
@@ -20,12 +20,17 @@ package org.apache.hadoop.hive.ql.exec.vector;
import java.sql.Date;
import java.sql.Timestamp;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
+import org.apache.commons.lang.StringUtils;
+
import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -81,6 +86,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hive.common.util.DateUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
import com.google.common.base.Preconditions;
import com.google.common.base.Charsets;
@@ -114,6 +120,8 @@ public class VectorRandomRowSource {
private StructObjectInspector rowStructObjectInspector;
+ private List<GenerationSpec> generationSpecList;
+
private String[] alphabets;
private boolean allowNull;
@@ -121,6 +129,92 @@ public class VectorRandomRowSource {
private boolean addEscapables;
private String needsEscapeStr;
+ public static class StringGenerationOption {
+
+ private boolean generateSentences;
+ private boolean addPadding;
+
+ public StringGenerationOption(boolean generateSentences, boolean addPadding) {
+ this.generateSentences = generateSentences;
+ this.addPadding = addPadding;
+ }
+
+ public boolean getGenerateSentences() {
+ return generateSentences;
+ }
+
+ public boolean getAddPadding() {
+ return addPadding;
+ }
+ }
+
+ public static class GenerationSpec {
+
+ public static enum GenerationKind {
+ SAME_TYPE,
+ OMIT_GENERATION,
+ STRING_FAMILY,
+ STRING_FAMILY_OTHER_TYPE_VALUE,
+ TIMESTAMP_MILLISECONDS
+ }
+
+ private final GenerationKind generationKind;
+ private final TypeInfo typeInfo;
+ private final TypeInfo sourceTypeInfo;
+ private final StringGenerationOption stringGenerationOption;
+
+ private GenerationSpec(GenerationKind generationKind, TypeInfo typeInfo,
+ TypeInfo sourceTypeInfo, StringGenerationOption stringGenerationOption) {
+ this.generationKind = generationKind;
+ this.typeInfo = typeInfo;
+ this.sourceTypeInfo = sourceTypeInfo;
+ this.stringGenerationOption = stringGenerationOption;
+ }
+
+ public GenerationKind getGenerationKind() {
+ return generationKind;
+ }
+
+ public TypeInfo getTypeInfo() {
+ return typeInfo;
+ }
+
+ public TypeInfo getSourceTypeInfo() {
+ return sourceTypeInfo;
+ }
+
+ public StringGenerationOption getStringGenerationOption() {
+ return stringGenerationOption;
+ }
+
+ public static GenerationSpec createSameType(TypeInfo typeInfo) {
+ return new GenerationSpec(
+ GenerationKind.SAME_TYPE, typeInfo, null, null);
+ }
+
+ public static GenerationSpec createOmitGeneration(TypeInfo typeInfo) {
+ return new GenerationSpec(
+ GenerationKind.OMIT_GENERATION, typeInfo, null, null);
+ }
+
+ public static GenerationSpec createStringFamily(TypeInfo typeInfo,
+ StringGenerationOption stringGenerationOption) {
+ return new GenerationSpec(
+ GenerationKind.STRING_FAMILY, typeInfo, null, stringGenerationOption);
+ }
+
+ public static GenerationSpec createStringFamilyOtherTypeValue(TypeInfo typeInfo,
+ TypeInfo otherTypeTypeInfo) {
+ return new GenerationSpec(
+ GenerationKind.STRING_FAMILY_OTHER_TYPE_VALUE, typeInfo, otherTypeTypeInfo, null);
+ }
+
+ public static GenerationSpec createTimestampMilliseconds(TypeInfo typeInfo) {
+ return new GenerationSpec(
+ GenerationKind.TIMESTAMP_MILLISECONDS, typeInfo, null, null);
+ }
+ }
+
public List<String> typeNames() {
return typeNames;
}
@@ -186,8 +280,26 @@ public class VectorRandomRowSource {
boolean allowNull, List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList) {
this.r = r;
this.allowNull = allowNull;
+
+ List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
+ for (String explicitTypeName : explicitTypeNameList) {
+ TypeInfo typeInfo =
+ TypeInfoUtils.getTypeInfoFromTypeString(explicitTypeName);
+ generationSpecList.add(
+ GenerationSpec.createSameType(typeInfo));
+ }
+
+ chooseSchema(
+ SupportedTypes.ALL, null, generationSpecList, explicitDataTypePhysicalVariationList,
+ maxComplexDepth);
+ }
+
+ public void initGenerationSpecSchema(Random r, List<GenerationSpec> generationSpecList, int maxComplexDepth,
+ boolean allowNull, List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList) {
+ this.r = r;
+ this.allowNull = allowNull;
chooseSchema(
- SupportedTypes.ALL, null, explicitTypeNameList, explicitDataTypePhysicalVariationList,
+ SupportedTypes.ALL, null, generationSpecList, explicitDataTypePhysicalVariationList,
maxComplexDepth);
}
@@ -418,14 +530,14 @@ public class VectorRandomRowSource {
}
private void chooseSchema(SupportedTypes supportedTypes, Set<String> allowedTypeNameSet,
- List<String> explicitTypeNameList,
+ List<GenerationSpec> generationSpecList,
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList,
int maxComplexDepth) {
HashSet<Integer> hashSet = null;
final boolean allTypes;
final boolean onlyOne;
- if (explicitTypeNameList != null) {
- columnCount = explicitTypeNameList.size();
+ if (generationSpecList != null) {
+ columnCount = generationSpecList.size();
allTypes = false;
onlyOne = false;
} else if (allowedTypeNameSet != null) {
@@ -472,8 +584,8 @@ public class VectorRandomRowSource {
final String typeName;
DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE;
- if (explicitTypeNameList != null) {
- typeName = explicitTypeNameList.get(c);
+ if (generationSpecList != null) {
+ typeName = generationSpecList.get(c).getTypeInfo().getTypeName();
dataTypePhysicalVariation = explicitDataTypePhysicalVariationList.get(c);
} else if (onlyOne || allowedTypeNameSet != null) {
typeName = getRandomTypeName(r, supportedTypes, allowedTypeNameSet);
@@ -563,6 +675,154 @@ public class VectorRandomRowSource {
rowStructObjectInspector = ObjectInspectorFactory.
getStandardStructObjectInspector(columnNames, objectInspectorList);
alphabets = new String[columnCount];
+
+ this.generationSpecList = generationSpecList;
+ }
+
+ private static ThreadLocal<DateFormat> DATE_FORMAT =
+ new ThreadLocal<DateFormat>() {
+ @Override
+ protected DateFormat initialValue() {
+ return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ }
+ };
+
+ private static long MIN_FOUR_DIGIT_YEAR_MILLIS = parseToMillis("0001-01-01 00:00:00");
+ private static long MAX_FOUR_DIGIT_YEAR_MILLIS = parseToMillis("9999-01-01 00:00:00");
+
+ private static long parseToMillis(String s) {
+ try {
+ return DATE_FORMAT.get().parse(s).getTime();
+ } catch (ParseException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ private static String[] randomWords =
+ new String[] {
+ "groovy",
+ "attack",
+ "wacky",
+ "kiss",
+ "to",
+ "the",
+ "a",
+ "thoughtless",
+ "blushing",
+ "pay",
+ "rule",
+ "profuse",
+ "need",
+ "smell",
+ "bucket",
+ "board",
+ "eggs",
+ "laughable",
+ "idiotic",
+ "direful",
+ "thoughtful",
+ "curious",
+ "show",
+ "surge",
+ "opines",
+ "cowl",
+ "signal",
+ ""};
+ private static int randomWordCount = randomWords.length;
+
+ private static Object toStringFamilyObject(TypeInfo typeInfo, String string, boolean isWritable) {
+
+ PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
+ PrimitiveCategory primitiveCategory =
+ primitiveTypeInfo.getPrimitiveCategory();
+ Object object;
+ switch (primitiveCategory) {
+ case STRING:
+ if (isWritable) {
+ object = new Text(string);
+ } else {
+ object = string;
+ }
+ break;
+ case CHAR:
+ {
+ HiveChar hiveChar =
+ new HiveChar(
+ string, ((CharTypeInfo) typeInfo).getLength());
+ if (isWritable) {
+ object = new HiveCharWritable(hiveChar);
+ } else {
+ object = hiveChar;
+ }
+ }
+ break;
+ case VARCHAR:
+ {
+ HiveVarchar hiveVarchar =
+ new HiveVarchar(
+ string, ((VarcharTypeInfo) typeInfo).getLength());
+ if (isWritable) {
+ object = new HiveVarcharWritable(hiveVarchar);
+ } else {
+ object = hiveVarchar;
+ }
+ }
+ break;
+ default:
+ throw new RuntimeException("Unexpected string family category " + primitiveCategory);
+ }
+ return object;
+ }
+
+ public static Object randomStringFamilyOtherTypeValue(Random random, TypeInfo typeInfo,
+ TypeInfo specialValueTypeInfo, boolean isWritable) {
+ String string;
+ string =
+ VectorRandomRowSource.randomPrimitiveObject(
+ random, (PrimitiveTypeInfo) specialValueTypeInfo).toString();
+ return toStringFamilyObject(typeInfo, string, isWritable);
+ }
+
+ public static Object randomStringFamily(Random random, TypeInfo typeInfo,
+ StringGenerationOption stringGenerationOption, boolean isWritable) {
+
+ String string;
+ if (stringGenerationOption == null) {
+ string =
+ VectorRandomRowSource.randomPrimitiveObject(
+ random, (PrimitiveTypeInfo) typeInfo).toString();
+ } else {
+ boolean generateSentences = stringGenerationOption.getGenerateSentences();
+ boolean addPadding = stringGenerationOption.getAddPadding();
+ StringBuilder sb = new StringBuilder();
+ if (addPadding && random.nextBoolean()) {
+ sb.append(StringUtils.leftPad("", random.nextInt(5)));
+ }
+ if (generateSentences) {
+ boolean capitalizeFirstWord = random.nextBoolean();
+ final int n = random.nextInt(10);
+ for (int i = 0; i < n; i++) {
+ String randomWord = randomWords[random.nextInt(randomWordCount)];
+ if (randomWord.length() > 0 &&
+ ((i == 0 && capitalizeFirstWord) || random.nextInt(20) == 0)) {
+ randomWord = Character.toUpperCase(randomWord.charAt(0)) + randomWord.substring(1);
+ }
+ if (i > 0) {
+ sb.append(" ");
+ }
+ sb.append(randomWord);
+ }
+ } else {
+ sb.append(
+ VectorRandomRowSource.randomPrimitiveObject(
+ random, (PrimitiveTypeInfo) typeInfo).toString());
+ }
+ if (addPadding && random.nextBoolean()) {
+ sb.append(StringUtils.leftPad("", random.nextInt(5)));
+ }
+ string = sb.toString();
+ }
+ return toStringFamilyObject(typeInfo, string, isWritable);
}
public Object[][] randomRows(int n) {
@@ -577,8 +837,64 @@ public class VectorRandomRowSource {
public Object[] randomRow() {
final Object row[] = new Object[columnCount];
- for (int c = 0; c < columnCount; c++) {
- row[c] = randomWritable(c);
+
+ if (generationSpecList == null) {
+ for (int c = 0; c < columnCount; c++) {
+ row[c] = randomWritable(c);
+ }
+ } else {
+ for (int c = 0; c < columnCount; c++) {
+ GenerationSpec generationSpec = generationSpecList.get(c);
+ GenerationSpec.GenerationKind generationKind = generationSpec.getGenerationKind();
+ Object object;
+ switch (generationKind) {
+ case SAME_TYPE:
+ object = randomWritable(c);
+ break;
+ case OMIT_GENERATION:
+ object = null;
+ break;
+ case STRING_FAMILY:
+ {
+ TypeInfo typeInfo = generationSpec.getTypeInfo();
+ StringGenerationOption stringGenerationOption =
+ generationSpec.getStringGenerationOption();
+ object = randomStringFamily(
+ r, typeInfo, stringGenerationOption, true);
+ }
+ break;
+ case STRING_FAMILY_OTHER_TYPE_VALUE:
+ {
+ TypeInfo typeInfo = generationSpec.getTypeInfo();
+ TypeInfo otherTypeTypeInfo = generationSpec.getSourceTypeInfo();
+ object = randomStringFamilyOtherTypeValue(
+ r, typeInfo, otherTypeTypeInfo, true);
+ }
+ break;
+ case TIMESTAMP_MILLISECONDS:
+ {
+ LongWritable longWritable = (LongWritable) randomWritable(c);
+ if (longWritable != null) {
+
+ while (true) {
+ long longValue = longWritable.get();
+ if (longValue >= MIN_FOUR_DIGIT_YEAR_MILLIS &&
+ longValue <= MAX_FOUR_DIGIT_YEAR_MILLIS) {
+ break;
+ }
+ longWritable.set(
+ (Long) VectorRandomRowSource.randomPrimitiveObject(
+ r, (PrimitiveTypeInfo) TypeInfoFactory.longTypeInfo));
+ }
+ }
+ object = longWritable;
+ }
+ break;
+ default:
+ throw new RuntimeException("Unexpected generationKind " + generationKind);
+ }
+ row[c] = object;
+ }
}
return row;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java
index 30e8906..9e61fcd 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -252,24 +253,6 @@ public class TestVectorCastStatement {
final int decimal64Scale =
(isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0);
- List<String> explicitTypeNameList = new ArrayList<String>();
- List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
- explicitTypeNameList.add(typeName);
- explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation);
-
- VectorRandomRowSource rowSource = new VectorRandomRowSource();
-
- rowSource.initExplicitSchema(
- random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true,
- explicitDataTypePhysicalVariationList);
-
- List<String> columns = new ArrayList<String>();
- columns.add("col0");
- ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col0", "table", false);
-
- List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
- children.add(col1Expr);
-
//----------------------------------------------------------------------------------------------
String targetTypeName;
@@ -287,53 +270,38 @@ public class TestVectorCastStatement {
//----------------------------------------------------------------------------------------------
- String[] columnNames = columns.toArray(new String[0]);
-
- Object[][] randomRows = rowSource.randomRows(100000);
-
+ GenerationSpec generationSpec;
if (needsValidDataTypeData(targetTypeInfo) &&
(primitiveCategory == PrimitiveCategory.STRING ||
primitiveCategory == PrimitiveCategory.CHAR ||
primitiveCategory == PrimitiveCategory.VARCHAR)) {
-
- // Regenerate string family with valid data for target data type.
- final int rowCount = randomRows.length;
- for (int i = 0; i < rowCount; i++) {
- Object object = randomRows[i][0];
- if (object == null) {
- continue;
- }
- String string =
- VectorRandomRowSource.randomPrimitiveObject(
- random, (PrimitiveTypeInfo) targetTypeInfo).toString();
- Object newObject;
- switch (primitiveCategory) {
- case STRING:
- newObject = new Text(string);
- break;
- case CHAR:
- {
- HiveChar hiveChar =
- new HiveChar(
- string, ((CharTypeInfo) typeInfo).getLength());
- newObject = new HiveCharWritable(hiveChar);
- }
- break;
- case VARCHAR:
- {
- HiveVarchar hiveVarchar =
- new HiveVarchar(
- string, ((VarcharTypeInfo) typeInfo).getLength());
- newObject = new HiveVarcharWritable(hiveVarchar);
- }
- break;
- default:
- throw new RuntimeException("Unexpected string family category " + primitiveCategory);
- }
- randomRows[i][0] = newObject;
- }
+ generationSpec = GenerationSpec.createStringFamilyOtherTypeValue(typeInfo, targetTypeInfo);
+ } else {
+ generationSpec = GenerationSpec.createSameType(typeInfo);
}
+ List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
+ List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
+ generationSpecList.add(generationSpec);
+ explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation);
+
+ VectorRandomRowSource rowSource = new VectorRandomRowSource();
+
+ rowSource.initGenerationSpecSchema(
+ random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true,
+ explicitDataTypePhysicalVariationList);
+
+ List<String> columns = new ArrayList<String>();
+ columns.add("col0");
+ ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col0", "table", false);
+
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(col1Expr);
+
+ String[] columnNames = columns.toArray(new String[0]);
+
+ Object[][] randomRows = rowSource.randomRows(100000);
+
VectorRandomBatchSource batchSource =
VectorRandomBatchSource.createInterestingBatches(
random,
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java
index 0f658c6..f5deca5 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
@@ -156,52 +157,6 @@ public class TestVectorDateAddSub {
}
}
- private static final String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-
- private Object randomDateStringFamily(
- Random random, TypeInfo dateTimeStringTypeInfo, boolean wantWritable) {
-
- String randomDateString = VectorRandomRowSource.randomPrimitiveDateStringObject(random);
- if (random.nextInt(40) == 39) {
-
- // Randomly corrupt.
- int index = random.nextInt(randomDateString.length());
- char[] chars = randomDateString.toCharArray();
- chars[index] = alphabet.charAt(random.nextInt(alphabet.length()));
- randomDateString = String.valueOf(chars);
- }
-
- PrimitiveCategory dateTimeStringPrimitiveCategory =
- ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory();
- switch (dateTimeStringPrimitiveCategory) {
- case STRING:
- return randomDateString;
- case CHAR:
- {
- HiveChar hiveChar =
- new HiveChar(randomDateString, ((CharTypeInfo) dateTimeStringTypeInfo).getLength());
- if (wantWritable) {
- return new HiveCharWritable(hiveChar);
- } else {
- return hiveChar;
- }
- }
- case VARCHAR:
- {
- HiveVarchar hiveVarchar =
- new HiveVarchar(
- randomDateString, ((VarcharTypeInfo) dateTimeStringTypeInfo).getLength());
- if (wantWritable) {
- return new HiveVarcharWritable(hiveVarchar);
- } else {
- return hiveVarchar;
- }
- }
- default:
- throw new RuntimeException("Unexpected string family category " + dateTimeStringPrimitiveCategory);
- }
- }
-
private void doDateAddSubTestsWithDiffColumnScalar(Random random, String dateTimeStringTypeName,
String integerTypeName, ColumnScalarMode columnScalarMode, boolean isAdd)
throws Exception {
@@ -220,7 +175,7 @@ public class TestVectorDateAddSub {
PrimitiveCategory integerPrimitiveCategory =
((PrimitiveTypeInfo) integerTypeInfo).getPrimitiveCategory();
- List<String> explicitTypeNameList = new ArrayList<String>();
+ List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
new ArrayList<DataTypePhysicalVariation>();
@@ -229,7 +184,14 @@ public class TestVectorDateAddSub {
ExprNodeDesc col1Expr;
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
- explicitTypeNameList.add(dateTimeStringTypeName);
+ if (!isStringFamily) {
+ generationSpecList.add(
+ GenerationSpec.createSameType(dateTimeStringTypeInfo));
+ } else {
+ generationSpecList.add(
+ GenerationSpec.createStringFamilyOtherTypeValue(
+ dateTimeStringTypeInfo, TypeInfoFactory.dateTypeInfo));
+ }
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
@@ -243,15 +205,16 @@ public class TestVectorDateAddSub {
random, (PrimitiveTypeInfo) dateTimeStringTypeInfo);
} else {
scalar1Object =
- randomDateStringFamily(
- random, dateTimeStringTypeInfo, /* wantWritable */ false);
+ VectorRandomRowSource.randomStringFamilyOtherTypeValue(
+ random, dateTimeStringTypeInfo, TypeInfoFactory.dateTypeInfo, false);
}
col1Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo, scalar1Object);
}
ExprNodeDesc col2Expr;
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
- explicitTypeNameList.add(integerTypeName);
+ generationSpecList.add(
+ GenerationSpec.createSameType(integerTypeInfo));
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
@@ -277,27 +240,12 @@ public class TestVectorDateAddSub {
VectorRandomRowSource rowSource = new VectorRandomRowSource();
- rowSource.initExplicitSchema(
- random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true,
+ rowSource.initGenerationSpecSchema(
+ random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true,
explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
- if (isStringFamily) {
- if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
- columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
- for (int i = 0; i < randomRows.length; i++) {
- Object[] row = randomRows[i];
- Object object = row[columnNum - 1];
- if (row[0] != null) {
- row[0] =
- randomDateStringFamily(
- random, dateTimeStringTypeInfo, /* wantWritable */ true);
- }
- }
- }
- }
-
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java
index 80a1118..dce7ccf 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
@@ -70,8 +71,6 @@ import org.junit.Test;
public class TestVectorDateDiff {
- private static final boolean corruptDateStrings = false;
-
@Test
public void testDateDate() throws Exception {
Random random = new Random(7743);
@@ -152,52 +151,6 @@ public class TestVectorDateDiff {
}
}
- private static final String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-
- private Object randomDateStringFamily(
- Random random, TypeInfo dateTimeStringTypeInfo, boolean wantWritable) {
-
- String randomDateString = VectorRandomRowSource.randomPrimitiveDateStringObject(random);
- if (corruptDateStrings && random.nextInt(40) == 39) {
-
- // Randomly corrupt.
- int index = random.nextInt(randomDateString.length());
- char[] chars = randomDateString.toCharArray();
- chars[index] = alphabet.charAt(random.nextInt(alphabet.length()));
- randomDateString = String.valueOf(chars);
- }
-
- PrimitiveCategory dateTimeStringPrimitiveCategory =
- ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory();
- switch (dateTimeStringPrimitiveCategory) {
- case STRING:
- return randomDateString;
- case CHAR:
- {
- HiveChar hiveChar =
- new HiveChar(randomDateString, ((CharTypeInfo) dateTimeStringTypeInfo).getLength());
- if (wantWritable) {
- return new HiveCharWritable(hiveChar);
- } else {
- return hiveChar;
- }
- }
- case VARCHAR:
- {
- HiveVarchar hiveVarchar =
- new HiveVarchar(
- randomDateString, ((VarcharTypeInfo) dateTimeStringTypeInfo).getLength());
- if (wantWritable) {
- return new HiveVarcharWritable(hiveVarchar);
- } else {
- return hiveVarchar;
- }
- }
- default:
- throw new RuntimeException("Unexpected string family category " + dateTimeStringPrimitiveCategory);
- }
- }
-
private void doDateDiffTestsWithDiffColumnScalar(Random random, String dateTimeStringTypeName1,
String dateTimeStringTypeName2, ColumnScalarMode columnScalarMode)
throws Exception {
@@ -220,7 +173,7 @@ public class TestVectorDateDiff {
dateTimeStringPrimitiveCategory2 == PrimitiveCategory.CHAR ||
dateTimeStringPrimitiveCategory2 == PrimitiveCategory.VARCHAR);
- List<String> explicitTypeNameList = new ArrayList<String>();
+ List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
new ArrayList<DataTypePhysicalVariation>();
@@ -229,7 +182,14 @@ public class TestVectorDateDiff {
ExprNodeDesc col1Expr;
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
- explicitTypeNameList.add(dateTimeStringTypeName1);
+ if (!isStringFamily1) {
+ generationSpecList.add(
+ GenerationSpec.createSameType(dateTimeStringTypeInfo1));
+ } else {
+ generationSpecList.add(
+ GenerationSpec.createStringFamilyOtherTypeValue(
+ dateTimeStringTypeInfo1, TypeInfoFactory.dateTypeInfo));
+ }
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
@@ -243,15 +203,23 @@ public class TestVectorDateDiff {
random, (PrimitiveTypeInfo) dateTimeStringTypeInfo1);
} else {
scalar1Object =
- randomDateStringFamily(
- random, dateTimeStringTypeInfo1, /* wantWritable */ false);
+ VectorRandomRowSource.randomStringFamilyOtherTypeValue(
+ random, dateTimeStringTypeInfo1, TypeInfoFactory.dateTypeInfo, false);
}
col1Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo1, scalar1Object);
}
ExprNodeDesc col2Expr;
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
- explicitTypeNameList.add(dateTimeStringTypeName2);
+ if (!isStringFamily2) {
+ generationSpecList.add(
+ GenerationSpec.createSameType(dateTimeStringTypeInfo2));
+ } else {
+ generationSpecList.add(
+ GenerationSpec.createStringFamilyOtherTypeValue(
+ dateTimeStringTypeInfo2, TypeInfoFactory.dateTypeInfo));
+ }
+
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
@@ -265,8 +233,8 @@ public class TestVectorDateDiff {
random, (PrimitiveTypeInfo) dateTimeStringTypeInfo2);
} else {
scalar2Object =
- randomDateStringFamily(
- random, dateTimeStringTypeInfo2, /* wantWritable */ false);
+ VectorRandomRowSource.randomStringFamilyOtherTypeValue(
+ random, dateTimeStringTypeInfo2, TypeInfoFactory.dateTypeInfo, false);
}
col2Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo2, scalar2Object);
}
@@ -281,42 +249,12 @@ public class TestVectorDateDiff {
VectorRandomRowSource rowSource = new VectorRandomRowSource();
- rowSource.initExplicitSchema(
- random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true,
+ rowSource.initGenerationSpecSchema(
+ random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true,
explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
- if (isStringFamily1) {
- if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
- columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
- for (int i = 0; i < randomRows.length; i++) {
- Object[] row = randomRows[i];
- Object object = row[columnNum - 1];
- if (row[0] != null) {
- row[0] =
- randomDateStringFamily(
- random, dateTimeStringTypeInfo1, /* wantWritable */ true);
- }
- }
- }
- }
-
- if (isStringFamily2) {
- if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
- columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
- for (int i = 0; i < randomRows.length; i++) {
- Object[] row = randomRows[i];
- Object object = row[columnNum - 1];
- if (row[columnNum - 1] != null) {
- row[columnNum - 1] =
- randomDateStringFamily(
- random, dateTimeStringTypeInfo2, /* wantWritable */ true);
- }
- }
- }
- }
-
VectorRandomBatchSource batchSource =
VectorRandomBatchSource.createInterestingBatches(
random,
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java
new file mode 100644
index 0000000..a87a8b4
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java
@@ -0,0 +1,427 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateAdd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateSub;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+public class TestVectorStringConcat {
+
+ @Test
+ public void testString() throws Exception {
+ Random random = new Random(12882);
+
+ doStringConcatTests(random, "string", "string");
+ }
+
+ @Test
+ public void testChar() throws Exception {
+ Random random = new Random(12882);
+
+ doStringConcatTests(random, "char(20)", "char(10)");
+ doStringConcatTests(random, "char(20)", "string");
+ doStringConcatTests(random, "char(20)", "varchar(10)");
+ doStringConcatTests(random, "string", "char(10)");
+ }
+
+ @Test
+ public void testVarchar() throws Exception {
+ Random random = new Random(12882);
+
+ doStringConcatTests(random, "varchar(20)", "varchar(10)");
+ doStringConcatTests(random, "varchar(20)", "string");
+ doStringConcatTests(random, "varchar(20)", "char(10)");
+ doStringConcatTests(random, "string", "varchar(10)");
+ }
+
+ public enum StringConcatTestMode {
+ ROW_MODE,
+ ADAPTOR,
+ VECTOR_EXPRESSION;
+
+ static final int count = values().length;
+ }
+
+ public enum ColumnScalarMode {
+ COLUMN_COLUMN,
+ COLUMN_SCALAR,
+ SCALAR_COLUMN;
+
+ static final int count = values().length;
+ }
+
+ private void doStringConcatTests(Random random, String stringTypeName1, String stringTypeName2)
+ throws Exception {
+ for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) {
+ doStringConcatTestsWithDiffColumnScalar(
+ random, stringTypeName1, stringTypeName2, columnScalarMode);
+ }
+ }
+
+ private void doStringConcatTestsWithDiffColumnScalar(Random random,
+ String stringTypeName1, String stringTypeName2, ColumnScalarMode columnScalarMode)
+ throws Exception {
+
+ TypeInfo stringTypeInfo1 =
+ TypeInfoUtils.getTypeInfoFromTypeString(stringTypeName1);
+ PrimitiveCategory stringPrimitiveCategory1 =
+ ((PrimitiveTypeInfo) stringTypeInfo1).getPrimitiveCategory();
+
+ TypeInfo stringTypeInfo2 =
+ TypeInfoUtils.getTypeInfoFromTypeString(stringTypeName2);
+ PrimitiveCategory stringPrimitiveCategory2 =
+ ((PrimitiveTypeInfo) stringTypeInfo2).getPrimitiveCategory();
+
+ String functionName = "concat";
+
+ List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
+ List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
+ new ArrayList<DataTypePhysicalVariation>();
+
+ List<String> columns = new ArrayList<String>();
+ int columnNum = 0;
+ ExprNodeDesc col1Expr;
+ if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
+ columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
+ generationSpecList.add(
+ GenerationSpec.createSameType(stringTypeInfo1));
+
+ explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
+
+ String columnName = "col" + (columnNum++);
+ col1Expr = new ExprNodeColumnDesc(stringTypeInfo1, columnName, "table", false);
+ columns.add(columnName);
+ } else {
+ Object scalar1Object =
+ VectorRandomRowSource.randomPrimitiveObject(
+ random, (PrimitiveTypeInfo) stringTypeInfo1);
+ col1Expr = new ExprNodeConstantDesc(stringTypeInfo1, scalar1Object);
+ }
+ ExprNodeDesc col2Expr;
+ if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
+ columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
+ generationSpecList.add(
+ GenerationSpec.createSameType(stringTypeInfo2));
+ explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
+
+ String columnName = "col" + (columnNum++);
+ col2Expr = new ExprNodeColumnDesc(stringTypeInfo2, columnName, "table", false);
+ columns.add(columnName);
+ } else {
+ Object scalar2Object =
+ VectorRandomRowSource.randomPrimitiveObject(
+ random, (PrimitiveTypeInfo) stringTypeInfo2);
+ col2Expr = new ExprNodeConstantDesc(stringTypeInfo2, scalar2Object);
+ }
+
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(col1Expr);
+ children.add(col2Expr);
+
+ //----------------------------------------------------------------------------------------------
+
+ String[] columnNames = columns.toArray(new String[0]);
+
+ VectorRandomRowSource rowSource = new VectorRandomRowSource();
+
+ rowSource.initGenerationSpecSchema(
+ random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true,
+ explicitDataTypePhysicalVariationList);
+
+ Object[][] randomRows = rowSource.randomRows(100000);
+
+ VectorRandomBatchSource batchSource =
+ VectorRandomBatchSource.createInterestingBatches(
+ random,
+ rowSource,
+ randomRows,
+ null);
+
+ String[] outputScratchTypeNames = new String[] { "string" };
+
+ VectorizedRowBatchCtx batchContext =
+ new VectorizedRowBatchCtx(
+ columnNames,
+ rowSource.typeInfos(),
+ rowSource.dataTypePhysicalVariations(),
+ /* dataColumnNums */ null,
+ /* partitionColumnCount */ 0,
+ /* virtualColumnCount */ 0,
+ /* neededVirtualColumns */ null,
+ outputScratchTypeNames,
+ null);
+
+ GenericUDF genericUdf;
+ FunctionInfo funcInfo = null;
+ try {
+ funcInfo = FunctionRegistry.getFunctionInfo(functionName);
+ } catch (SemanticException e) {
+ Assert.fail("Failed to load " + functionName + " " + e);
+ }
+ genericUdf = funcInfo.getGenericUDF();
+
+ final int rowCount = randomRows.length;
+ Object[][] resultObjectsArray = new Object[StringConcatTestMode.count][];
+ for (int i = 0; i < StringConcatTestMode.count; i++) {
+
+ Object[] resultObjects = new Object[rowCount];
+ resultObjectsArray[i] = resultObjects;
+
+ StringConcatTestMode stringConcatTestMode = StringConcatTestMode.values()[i];
+ switch (stringConcatTestMode) {
+ case ROW_MODE:
+ doRowStringConcatTest(
+ stringTypeInfo1,
+ stringTypeInfo2,
+ columns,
+ children,
+ randomRows,
+ columnScalarMode,
+ rowSource.rowStructObjectInspector(),
+ genericUdf,
+ resultObjects);
+ break;
+ case ADAPTOR:
+ case VECTOR_EXPRESSION:
+ doVectorStringConcatTest(
+ stringTypeInfo1,
+ stringTypeInfo2,
+ columns,
+ rowSource.typeInfos(),
+ children,
+ stringConcatTestMode,
+ columnScalarMode,
+ batchSource,
+ batchContext,
+ rowSource.rowStructObjectInspector(),
+ genericUdf,
+ resultObjects);
+ break;
+ default:
+ throw new RuntimeException("Unexpected IF statement test mode " + stringConcatTestMode);
+ }
+ }
+
+ for (int i = 0; i < rowCount; i++) {
+ // Row-mode is the expected value.
+ Object expectedResult = resultObjectsArray[0][i];
+
+ for (int v = 1; v < StringConcatTestMode.count; v++) {
+ Object vectorResult = resultObjectsArray[v][i];
+ if (expectedResult == null || vectorResult == null) {
+ if (expectedResult != null || vectorResult != null) {
+ Assert.fail(
+ "Row " + i + " " + StringConcatTestMode.values()[v] +
+ " " + columnScalarMode +
+ " result is NULL " + (vectorResult == null) +
+ " does not match row-mode expected result is NULL " + (expectedResult == null) +
+ " row values " + Arrays.toString(randomRows[i]));
+ }
+ } else {
+
+ if (!expectedResult.equals(vectorResult)) {
+ Assert.fail(
+ "Row " + i + " " + StringConcatTestMode.values()[v] +
+ " " + columnScalarMode +
+ " result \"" + vectorResult.toString() + "\"" +
+ " (" + vectorResult.getClass().getSimpleName() + ")" +
+ " does not match row-mode expected result \"" + expectedResult.toString() + "\"" +
+ " (" + expectedResult.getClass().getSimpleName() + ")" +
+ " row values " + Arrays.toString(randomRows[i]));
+ }
+ }
+ }
+ }
+ }
+
+ private void doRowStringConcatTest(TypeInfo stringTypeInfo, TypeInfo integerTypeInfo,
+ List<String> columns, List<ExprNodeDesc> children,
+ Object[][] randomRows, ColumnScalarMode columnScalarMode,
+ ObjectInspector rowInspector,
+ GenericUDF genericUdf, Object[] resultObjects) throws Exception {
+
+ System.out.println(
+ "*DEBUG* stringTypeInfo " + stringTypeInfo.toString() +
+ " integerTypeInfo " + integerTypeInfo +
+ " stringConcatTestMode ROW_MODE" +
+ " columnScalarMode " + columnScalarMode +
+ " genericUdf " + genericUdf.toString());
+
+ ExprNodeGenericFuncDesc exprDesc =
+ new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, genericUdf, children);
+
+ HiveConf hiveConf = new HiveConf();
+ ExprNodeEvaluator evaluator =
+ ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
+ evaluator.initialize(rowInspector);
+
+ ObjectInspector objectInspector = evaluator.getOutputOI();
+
+ final int rowCount = randomRows.length;
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = randomRows[i];
+ Object result = evaluator.evaluate(row);
+ Object copyResult =
+ ObjectInspectorUtils.copyToStandardObject(
+ result, objectInspector, ObjectInspectorCopyOption.WRITABLE);
+ resultObjects[i] = copyResult;
+ }
+ }
+
+ private void extractResultObjects(VectorizedRowBatch batch, int rowIndex,
+ VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow,
+ ObjectInspector objectInspector, Object[] resultObjects) {
+
+ boolean selectedInUse = batch.selectedInUse;
+ int[] selected = batch.selected;
+ for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) {
+ final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex);
+ resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow);
+
+ Object copyResult =
+ ObjectInspectorUtils.copyToStandardObject(
+ scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE);
+ resultObjects[rowIndex++] = copyResult;
+ }
+ }
+
+ private void doVectorStringConcatTest(TypeInfo stringTypeInfo1, TypeInfo stringTypeInfo2,
+ List<String> columns,
+ TypeInfo[] typeInfos,
+ List<ExprNodeDesc> children,
+ StringConcatTestMode stringConcatTestMode, ColumnScalarMode columnScalarMode,
+ VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext,
+ ObjectInspector rowInspector,
+ GenericUDF genericUdf, Object[] resultObjects)
+ throws Exception {
+
+ HiveConf hiveConf = new HiveConf();
+ if (stringConcatTestMode == StringConcatTestMode.ADAPTOR) {
+ hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
+ }
+
+ DataTypePhysicalVariation[] dataTypePhysicalVariations = new DataTypePhysicalVariation[2];
+ Arrays.fill(dataTypePhysicalVariations, DataTypePhysicalVariation.NONE);
+
+ ExprNodeGenericFuncDesc exprDesc =
+ new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, genericUdf, children);
+
+ //---------------------------------------
+ // Just so we can get the output type...
+
+ ExprNodeEvaluator evaluator =
+ ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
+ evaluator.initialize(rowInspector);
+
+ ObjectInspector objectInspector = evaluator.getOutputOI();
+ TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(objectInspector);
+
+ /*
+ * Again with correct output type...
+ */
+ exprDesc =
+ new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children);
+ //---------------------------------------
+
+ VectorizationContext vectorizationContext =
+ new VectorizationContext(
+ "name",
+ columns,
+ Arrays.asList(typeInfos),
+ Arrays.asList(dataTypePhysicalVariations),
+ hiveConf);
+ VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc);
+ vectorExpression.transientInit();
+
+ VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
+
+ VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
+ resultVectorExtractRow.init(
+ new TypeInfo[] { outputTypeInfo }, new int[] { columns.size() });
+ Object[] scrqtchRow = new Object[1];
+
+ System.out.println(
+ "*DEBUG* stringTypeInfo1 " + stringTypeInfo1.toString() +
+ " stringTypeInfo2 " + stringTypeInfo2.toString() +
+ " stringConcatTestMode " + stringConcatTestMode +
+ " columnScalarMode " + columnScalarMode +
+ " vectorExpression " + vectorExpression.toString());
+
+ batchSource.resetBatchIteration();
+ int rowIndex = 0;
+ while (true) {
+ if (!batchSource.fillNextBatch(batch)) {
+ break;
+ }
+ vectorExpression.evaluate(batch);
+ extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow,
+ objectInspector, resultObjects);
+ rowIndex += batch.size;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/ce778adb/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
index 65daeaa..902f29e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
@@ -4541,8 +4541,8 @@ public class TestVectorStringExpressions {
// has nulls, not repeating
VectorizedRowBatch batch = makeStringBatch();
- StringGroupColConcatCharScalar expr =
- new StringGroupColConcatCharScalar(
+ StringGroupColConcatStringScalar expr =
+ new StringGroupColConcatStringScalar(
0, new HiveChar(new String(red), 6).getStrippedValue().getBytes(), 1);
expr.evaluate(batch);
BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
@@ -4605,7 +4605,9 @@ public class TestVectorStringExpressions {
// has nulls, not repeating
VectorizedRowBatch batch = makeStringBatch();
- StringGroupColConcatVarCharScalar expr = new StringGroupColConcatVarCharScalar(0, new HiveVarchar(new String(red), 14), 1);
+ StringGroupColConcatStringScalar expr =
+ new StringGroupColConcatStringScalar(
+ 0, new HiveVarchar(new String(red), 14).getValue().getBytes(), 1);
expr.evaluate(batch);
BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
@@ -4729,8 +4731,8 @@ public class TestVectorStringExpressions {
// has nulls, not repeating
VectorizedRowBatch batch = makeStringBatch();
- CharScalarConcatStringGroupCol expr =
- new CharScalarConcatStringGroupCol(
+ StringScalarConcatStringGroupCol expr =
+ new StringScalarConcatStringGroupCol(
new HiveChar(new String(red), 6).getStrippedValue().getBytes(), 0, 1);
expr.evaluate(batch);
BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
@@ -4793,7 +4795,9 @@ public class TestVectorStringExpressions {
// has nulls, not repeating
VectorizedRowBatch batch = makeStringBatch();
- VarCharScalarConcatStringGroupCol expr = new VarCharScalarConcatStringGroupCol(new HiveVarchar(new String(red), 14), 0, 1);
+ StringScalarConcatStringGroupCol expr =
+ new StringScalarConcatStringGroupCol(
+ new HiveVarchar(new String(red), 14).getValue().getBytes(), 0, 1);
expr.evaluate(batch);
BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];