You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/11/25 20:29:38 UTC
[hive] branch master updated: HIVE-22435: Exception when using
VectorTopNKeyOperator operator (Krisztian Kasa, reviewed by Rajesh Balamohan,
Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 0a44c15 HIVE-22435: Exception when using VectorTopNKeyOperator operator (Krisztian Kasa, reviewed by Rajesh Balamohan, Jesus Camacho Rodriguez)
0a44c15 is described below
commit 0a44c151e4eab623f8f031e76382126119d1c06c
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Mon Nov 25 11:48:50 2019 -0800
HIVE-22435: Exception when using VectorTopNKeyOperator operator (Krisztian Kasa, reviewed by Rajesh Balamohan, Jesus Camacho Rodriguez)
---
.../test/resources/testconfiguration.properties | 2 +
.../apache/hadoop/hive/ql/exec/TopNKeyFilter.java | 54 ++
.../hadoop/hive/ql/exec/TopNKeyOperator.java | 39 +-
.../hive/ql/exec/vector/VectorTopNKeyOperator.java | 78 ++-
.../vector/wrapper/VectorHashKeyWrapperBatch.java | 15 +
.../VectorHashKeyWrapperGeneralComparator.java | 136 +++++
.../hive/ql/optimizer/physical/Vectorizer.java | 2 +-
ql/src/test/queries/clientpositive/topnkey.q | 54 --
.../queries/clientpositive/topnkey_order_null.q | 83 +++
.../test/queries/clientpositive/vector_topnkey.q | 46 +-
.../test/results/clientpositive/llap/topnkey.q.out | 389 -------------
.../clientpositive/llap/topnkey_order_null.q.out | 282 ++++++++++
.../clientpositive/llap/vector_topnkey.q.out | 612 ++++++---------------
.../test/results/clientpositive/tez/topnkey.q.out | 289 ----------
.../clientpositive/tez/topnkey_order_null.q.out | 282 ++++++++++
.../clientpositive/tez/vector_topnkey.q.out | 612 ++++++---------------
ql/src/test/results/clientpositive/topnkey.q.out | 375 -------------
.../clientpositive/topnkey_order_null.q.out | 282 ++++++++++
.../results/clientpositive/vector_topnkey.q.out | 510 +++++------------
.../serde2/objectinspector/ObjectComparator.java | 45 ++
20 files changed, 1736 insertions(+), 2451 deletions(-)
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 2918a68..3711b33 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -24,6 +24,7 @@ minitez.query.files.shared=delete_orig_table.q,\
orc_merge12.q,\
orc_vectorization_ppd.q,\
topnkey.q,\
+ topnkey_order_null.q,\
update_orig_table.q,\
vector_join_part_col_char.q,\
vector_non_string_partition.q,\
@@ -215,6 +216,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
subquery_in.q,\
temp_table.q,\
topnkey.q,\
+ topnkey_order_null.q,\
union2.q,\
union3.q,\
union4.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java
new file mode 100644
index 0000000..4998766
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec;
+
+import java.util.Comparator;
+import java.util.PriorityQueue;
+
+/**
+ * Implementation of filtering out keys.
+ * An instance of this class is wrapped in {@link TopNKeyOperator} and
+ * {@link org.apache.hadoop.hive.ql.exec.vector.VectorTopNKeyOperator}
+ * @param <T> - Type of {@link KeyWrapper}. Each key is stored in a KeyWrapper instance.
+ */
+public class TopNKeyFilter<T extends KeyWrapper> {
+ private final PriorityQueue<T> priorityQueue;
+ private final int topN;
+
+ public TopNKeyFilter(int topN, Comparator<T> comparator) {
+ // We need a reversed comparator because the PriorityQueue.poll() method is used for filtering out keys.
+ // Ex.: When ORDER BY key1 ASC then call of poll() should remove the largest key.
+ this.priorityQueue = new PriorityQueue<>(topN + 1, comparator.reversed());
+ this.topN = topN;
+ }
+
+ public boolean canForward(T kw) {
+ if (!priorityQueue.contains(kw)) {
+ priorityQueue.offer((T) kw.copyKey());
+ }
+ if (priorityQueue.size() > topN) {
+ priorityQueue.poll();
+ }
+
+ return priorityQueue.contains(kw);
+ }
+
+ public void clear() {
+ priorityQueue.clear();
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
index d16500e..b3ab701 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -30,7 +30,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import java.io.Serializable;
import java.util.Comparator;
-import java.util.PriorityQueue;
import static org.apache.hadoop.hive.ql.plan.api.OperatorType.TOPNKEY;
@@ -41,11 +40,7 @@ public class TopNKeyOperator extends Operator<TopNKeyDesc> implements Serializab
private static final long serialVersionUID = 1L;
- // Maximum number of keys to hold
- private transient int topN;
-
- // Priority queue that holds occurred keys
- private transient PriorityQueue<KeyWrapper> priorityQueue;
+ private transient TopNKeyFilter<KeyWrapper> topNKeyFilter;
private transient KeyWrapper keyWrapper;
@@ -86,7 +81,8 @@ public class TopNKeyOperator extends Operator<TopNKeyDesc> implements Serializab
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
- this.topN = conf.getTopN();
+ String columnSortOrder = conf.getColumnSortOrder();
+ String nullSortOrder = conf.getNullOrder();
ObjectInspector rowInspector = inputObjInspectors[0];
ObjectInspector standardObjInspector = ObjectInspectorUtils.getStandardObjectInspector(rowInspector);
@@ -107,13 +103,8 @@ public class TopNKeyOperator extends Operator<TopNKeyDesc> implements Serializab
standardKeyObjectInspectors[i] = standardKeyFields[i].initialize(standardObjInspector);
}
- String columnSortOrder = conf.getColumnSortOrder();
- String nullSortOrder = conf.getNullOrder();
-
- // We need a reversed comparator because the PriorityQueue.poll() method is used for filtering out keys.
- // Ex.: When ORDER BY key1 ASC then call of poll() should remove the largest key.
- priorityQueue = new PriorityQueue<>(topN + 1,
- new KeyWrapperComparator(standardKeyObjectInspectors, columnSortOrder, nullSortOrder).reversed());
+ this.topNKeyFilter = new TopNKeyFilter<>(conf.getTopN(), new TopNKeyOperator.KeyWrapperComparator(
+ standardKeyObjectInspectors, columnSortOrder, nullSortOrder));
KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors,
standardKeyObjectInspectors);
@@ -122,28 +113,16 @@ public class TopNKeyOperator extends Operator<TopNKeyDesc> implements Serializab
@Override
public void process(Object row, int tag) throws HiveException {
- if (canProcess(row, tag)) {
- forward(row, outputObjInspector);
- }
- }
-
- protected boolean canProcess(Object row, int tag) throws HiveException {
keyWrapper.getNewKey(row, inputObjInspectors[tag]);
keyWrapper.setHashKey();
-
- if (!priorityQueue.contains(keyWrapper)) {
- priorityQueue.offer(keyWrapper.copyKey());
- }
- if (priorityQueue.size() > topN) {
- priorityQueue.poll();
+ if (topNKeyFilter.canForward(keyWrapper)) {
+ forward(row, outputObjInspector);
}
-
- return priorityQueue.contains(keyWrapper);
}
@Override
protected final void closeOp(boolean abort) throws HiveException {
- priorityQueue.clear();
+ topNKeyFilter.clear();
super.closeOp(abort);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
index c80bc80..5faa038 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -21,31 +21,32 @@ import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.TopNKeyFilter;
import org.apache.hadoop.hive.ql.exec.TopNKeyOperator;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase;
+import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TopNKeyDesc;
import org.apache.hadoop.hive.ql.plan.VectorDesc;
import org.apache.hadoop.hive.ql.plan.VectorTopNKeyDesc;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
/**
* VectorTopNKeyOperator passes rows that contains top N keys only.
*/
-public class VectorTopNKeyOperator extends TopNKeyOperator implements VectorizationOperator {
+public class VectorTopNKeyOperator extends Operator<TopNKeyDesc> implements VectorizationOperator {
private static final long serialVersionUID = 1L;
private VectorTopNKeyDesc vectorDesc;
private VectorizationContext vContext;
- // Extract row
- private transient Object[] extractedRow;
- private transient VectorExtractRow vectorExtractRow;
-
// Batch processing
private transient int[] temporarySelected;
+ private transient VectorHashKeyWrapperBatch keyWrappersBatch;
+ private transient TopNKeyFilter<VectorHashKeyWrapperBase> topNKeyFilter;
public VectorTopNKeyOperator(CompilationOpContext ctx, OperatorDesc conf,
VectorizationContext vContext, VectorDesc vectorDesc) {
@@ -70,17 +71,18 @@ public class VectorTopNKeyOperator extends TopNKeyOperator implements Vectorizat
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
- VectorExpression.doTransientInit(vectorDesc.getKeyExpressions(), hconf);
- for (VectorExpression keyExpression : vectorDesc.getKeyExpressions()) {
+ VectorExpression[] keyExpressions = vectorDesc.getKeyExpressions();
+ VectorExpression.doTransientInit(keyExpressions, hconf);
+ for (VectorExpression keyExpression : keyExpressions) {
keyExpression.init(hconf);
}
- vectorExtractRow = new VectorExtractRow();
- vectorExtractRow.init((StructObjectInspector) inputObjInspectors[0],
- vContext.getProjectedColumns());
- extractedRow = new Object[vectorExtractRow.getCount()];
-
temporarySelected = new int [VectorizedRowBatch.DEFAULT_SIZE];
+
+ keyWrappersBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
+ this.topNKeyFilter = new TopNKeyFilter<>(conf.getTopN(), keyWrappersBatch.getComparator(
+ conf.getColumnSortOrder(),
+ conf.getNullOrder()));
}
@Override
@@ -99,6 +101,9 @@ public class VectorTopNKeyOperator extends TopNKeyOperator implements Vectorizat
keyExpression.evaluate(batch);
}
+ keyWrappersBatch.evaluateBatch(batch);
+ VectorHashKeyWrapperBase[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers();
+
// Filter rows with top n keys
int size = 0;
int[] selected = new int[batch.selected.length];
@@ -110,11 +115,8 @@ public class VectorTopNKeyOperator extends TopNKeyOperator implements Vectorizat
j = i;
}
- // Get keys
- vectorExtractRow.extractRow(batch, j, extractedRow);
-
// Select a row in the priority queue
- if (canProcess(extractedRow, tag)) {
+ if (topNKeyFilter.canForward(keyWrappers[i])) {
selected[size++] = j;
}
}
@@ -154,4 +156,44 @@ public class VectorTopNKeyOperator extends TopNKeyOperator implements Vectorizat
op.setNextVectorBatchGroupStatus(isLastGroupBatch);
}
}
+
+ @Override
+ public String getName() {
+ return TopNKeyOperator.getOperatorName();
+ }
+
+ @Override
+ public OperatorType getType() {
+ return OperatorType.TOPNKEY;
+ }
+
+ @Override
+ protected void closeOp(boolean abort) throws HiveException {
+ topNKeyFilter.clear();
+ super.closeOp(abort);
+ }
+
+ // Because a TopNKeyOperator works like a FilterOperator with top n key condition, its properties
+ // for optimizers has same values. Following methods are same with FilterOperator;
+ // supportSkewJoinOptimization, columnNamesRowResolvedCanBeObtained,
+ // supportAutomaticSortMergeJoin, and supportUnionRemoveOptimization.
+ @Override
+ public boolean supportSkewJoinOptimization() {
+ return true;
+ }
+
+ @Override
+ public boolean columnNamesRowResolvedCanBeObtained() {
+ return true;
+ }
+
+ @Override
+ public boolean supportAutomaticSortMergeJoin() {
+ return true;
+ }
+
+ @Override
+ public boolean supportUnionRemoveOptimization() {
+ return true;
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java
index dd31991..0786c82 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.wrapper;
+import java.util.Comparator;
+
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -1072,5 +1074,18 @@ public class VectorHashKeyWrapperBatch extends VectorColumnSetInfo {
}
return variableSize;
}
+
+ public Comparator<VectorHashKeyWrapperBase> getComparator(String columnSortOrder, String nullOrder) {
+ VectorHashKeyWrapperGeneralComparator comparator =
+ new VectorHashKeyWrapperGeneralComparator(columnVectorTypes.length);
+ for (int i = 0; i < columnVectorTypes.length; ++i) {
+ final int columnTypeSpecificIndex = columnTypeSpecificIndices[i];
+ ColumnVector.Type columnVectorType = columnVectorTypes[i];
+ comparator.addColumnComparator(
+ i, columnTypeSpecificIndex, columnVectorType, columnSortOrder.charAt(i), nullOrder.charAt(i));
+ }
+
+ return comparator;
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneralComparator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneralComparator.java
new file mode 100644
index 0000000..8cb4847
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneralComparator.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.wrapper;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.util.NullOrdering;
+
+/**
+ * An implementation of {@link Comparator} to compare {@link VectorHashKeyWrapperBase} instances.
+ */
+public class VectorHashKeyWrapperGeneralComparator
+ implements Comparator<VectorHashKeyWrapperBase>, Serializable {
+
+ /**
+ * Compare {@link VectorHashKeyWrapperBase} instances only by one column.
+ */
+ private static class VectorHashKeyWrapperBaseComparator
+ implements Comparator<VectorHashKeyWrapperBase>, Serializable {
+
+ private final int keyIndex;
+ private final Comparator<VectorHashKeyWrapperBase> comparator;
+ private final int nullResult;
+
+ VectorHashKeyWrapperBaseComparator(int keyIndex, Comparator<VectorHashKeyWrapperBase> comparator, char nullOrder) {
+ this.keyIndex = keyIndex;
+ this.comparator = comparator;
+ switch (NullOrdering.fromSign(nullOrder)) {
+ case NULLS_FIRST:
+ this.nullResult = 1;
+ break;
+ default:
+ this.nullResult = -1;
+ }
+ }
+
+ @Override
+ public int compare(VectorHashKeyWrapperBase o1, VectorHashKeyWrapperBase o2) {
+ boolean isNull1 = o1.isNull(keyIndex);
+ boolean isNull2 = o2.isNull(keyIndex);
+
+ if (isNull1 && isNull2) {
+ return 0;
+ }
+ if (isNull1) {
+ return -nullResult;
+ }
+ if (isNull2) {
+ return nullResult;
+ }
+ return comparator.compare(o1, o2);
+ }
+ }
+
+ private final List<VectorHashKeyWrapperBaseComparator> comparators;
+
+ public VectorHashKeyWrapperGeneralComparator(int numberOfColumns) {
+ this.comparators = new ArrayList<>(numberOfColumns);
+ }
+
+ public void addColumnComparator(int keyIndex, int columnTypeSpecificIndex, ColumnVector.Type columnVectorType,
+ char sortOrder, char nullOrder) {
+ Comparator<VectorHashKeyWrapperBase> comparator;
+ switch (columnVectorType) {
+ case LONG:
+ case DECIMAL_64:
+ comparator = (o1, o2) ->
+ Long.compare(o1.getLongValue(columnTypeSpecificIndex), o2.getLongValue(columnTypeSpecificIndex));
+ break;
+ case DOUBLE:
+ comparator = (o1, o2) -> Double.compare(
+ o1.getDoubleValue(columnTypeSpecificIndex), o2.getDoubleValue(columnTypeSpecificIndex));
+ break;
+ case BYTES:
+ comparator = (o1, o2) -> StringExpr.compare(
+ o1.getBytes(columnTypeSpecificIndex),
+ o1.getByteStart(columnTypeSpecificIndex),
+ o1.getByteLength(columnTypeSpecificIndex),
+ o2.getBytes(columnTypeSpecificIndex),
+ o2.getByteStart(columnTypeSpecificIndex),
+ o2.getByteLength(columnTypeSpecificIndex));
+ break;
+ case DECIMAL:
+ comparator = (o1, o2) ->
+ o1.getDecimal(columnTypeSpecificIndex).compareTo(o2.getDecimal(columnTypeSpecificIndex));
+ break;
+ case TIMESTAMP:
+ comparator = (o1, o2) ->
+ o1.getTimestamp(columnTypeSpecificIndex).compareTo(o2.getTimestamp(columnTypeSpecificIndex));
+ break;
+ case INTERVAL_DAY_TIME:
+ comparator = (o1, o2) -> o1.getIntervalDayTime(columnTypeSpecificIndex)
+ .compareTo(o2.getIntervalDayTime(columnTypeSpecificIndex));
+ break;
+ default:
+ throw new RuntimeException("Unexpected column vector columnVectorType " + columnVectorType);
+ }
+
+ comparators.add(
+ new VectorHashKeyWrapperBaseComparator(
+ keyIndex,
+ sortOrder == '-' ? comparator.reversed() : comparator,
+ nullOrder));
+ }
+
+ @Override
+ public int compare(VectorHashKeyWrapperBase o1, VectorHashKeyWrapperBase o2) {
+ for (Comparator<VectorHashKeyWrapperBase> comparator : comparators) {
+ int c = comparator.compare(o1, o2);
+ if (c != 0) {
+ return c;
+ }
+ }
+ return 0;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 4cc02b4..bb5f9df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -4334,7 +4334,7 @@ public class Vectorizer implements PhysicalPlanResolver {
TopNKeyDesc topNKeyDesc = (TopNKeyDesc) topNKeyOperator.getConf();
List<ExprNodeDesc> keyColumns = topNKeyDesc.getKeyColumns();
- VectorExpression[] keyExpressions = vContext.getVectorExpressions(keyColumns);
+ VectorExpression[] keyExpressions = vContext.getVectorExpressionsUpConvertDecimal64(keyColumns);
vectorTopNKeyDesc.setKeyExpressions(keyExpressions);
return OperatorFactory.getVectorOperator(
topNKeyOperator.getCompilationOpContext(), topNKeyDesc,
diff --git a/ql/src/test/queries/clientpositive/topnkey.q b/ql/src/test/queries/clientpositive/topnkey.q
index 283f426..057b6a4 100644
--- a/ql/src/test/queries/clientpositive/topnkey.q
+++ b/ql/src/test/queries/clientpositive/topnkey.q
@@ -28,57 +28,3 @@ explain vectorization detail
SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5;
SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5;
-
-CREATE TABLE t_test(
- a int,
- b int,
- c int
-);
-
-INSERT INTO t_test VALUES
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL);
-
-EXPLAIN
-SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3;
-SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3;
-
-
-EXPLAIN
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2;
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2;
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2;
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2;
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2;
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2;
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2;
-
-DROP TABLE IF EXISTS t_test;
-
-CREATE TABLE t_test(
- a int,
- b int,
- c int
-);
-
-INSERT INTO t_test VALUES
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL);
-
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2;
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2;
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2;
-
-DROP TABLE IF EXISTS t_test;
diff --git a/ql/src/test/queries/clientpositive/topnkey_order_null.q b/ql/src/test/queries/clientpositive/topnkey_order_null.q
new file mode 100644
index 0000000..8d04104
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/topnkey_order_null.q
@@ -0,0 +1,83 @@
+SET hive.vectorized.execution.enabled=false;
+SET hive.optimize.topnkey=true;
+
+CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+);
+
+INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL);
+
+SET hive.vectorized.execution.enabled=false;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2;
+SET hive.vectorized.execution.enabled=true;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2;
+
+SET hive.vectorized.execution.enabled=false;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2;
+SET hive.vectorized.execution.enabled=true;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2;
+
+SET hive.vectorized.execution.enabled=false;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2;
+SET hive.vectorized.execution.enabled=true;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2;
+
+SET hive.vectorized.execution.enabled=false;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2;
+SET hive.vectorized.execution.enabled=true;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2;
+
+SET hive.vectorized.execution.enabled=false;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2;
+SET hive.vectorized.execution.enabled=true;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2;
+
+SET hive.vectorized.execution.enabled=false;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2;
+SET hive.vectorized.execution.enabled=true;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2;
+
+DROP TABLE IF EXISTS t_test;
+
+CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+);
+
+INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL);
+
+SET hive.vectorized.execution.enabled=false;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2;
+SET hive.vectorized.execution.enabled=true;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2;
+
+SET hive.vectorized.execution.enabled=false;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2;
+SET hive.vectorized.execution.enabled=true;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2;
+
+SET hive.vectorized.execution.enabled=false;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2;
+SET hive.vectorized.execution.enabled=true;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2;
+
+DROP TABLE IF EXISTS t_test;
diff --git a/ql/src/test/queries/clientpositive/vector_topnkey.q b/ql/src/test/queries/clientpositive/vector_topnkey.q
index e1b7d26..85c5880 100644
--- a/ql/src/test/queries/clientpositive/vector_topnkey.q
+++ b/ql/src/test/queries/clientpositive/vector_topnkey.q
@@ -1,4 +1,3 @@
---! qt:dataset:src
set hive.mapred.mode=nonstrict;
set hive.vectorized.execution.enabled=true;
set hive.optimize.topnkey=true;
@@ -14,17 +13,34 @@ set hive.tez.dynamic.partition.pruning=true;
set hive.stats.fetch.column.stats=true;
set hive.cbo.enable=true;
-explain vectorization detail
-SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5;
-
-SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5;
-
-explain vectorization detail
-SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5;
-
-SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5;
-
-explain vectorization detail
-SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5;
-
-SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5;
+CREATE TABLE t_test(
+ cint1 int,
+ cint2 int,
+ cdouble double,
+ cvarchar varchar(50),
+ cdecimal1 decimal(10,2),
+ cdecimal2 decimal(38,5)
+);
+
+INSERT INTO t_test VALUES
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(8, 9, 2.0, 'one', 2.0, 2.0), (8, 9, 2.0, 'one', 2.0, 2.0),
+(4, 2, 3.3, 'two', 3.3, 3.3),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(6, 2, 1.8, 'three', 1.8, 1.8),
+(7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5),
+(4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0),
+(NULL, NULL, NULL, NULL, NULL, NULL);
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3;
+
+SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3;
+SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1, cint2 LIMIT 3;
+SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1 DESC, cint2 LIMIT 3;
+SELECT cint1, cdouble FROM t_test GROUP BY cint1, cdouble ORDER BY cint1, cdouble LIMIT 3;
+SELECT cvarchar, cdouble FROM t_test GROUP BY cvarchar, cdouble ORDER BY cvarchar, cdouble LIMIT 3;
+SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3;
+
+DROP TABLE t_test;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/topnkey.q.out b/ql/src/test/results/clientpositive/llap/topnkey.q.out
index ea73547..1e77587 100644
--- a/ql/src/test/results/clientpositive/llap/topnkey.q.out
+++ b/ql/src/test/results/clientpositive/llap/topnkey.q.out
@@ -421,392 +421,3 @@ POSTHOOK: Input: default@src
0 val_0
0 val_0
0 val_0
-PREHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@t_test
-POSTHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@t_test
-PREHOOK: query: INSERT INTO t_test VALUES
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-PREHOOK: type: QUERY
-PREHOOK: Input: _dummy_database@_dummy_table
-PREHOOK: Output: default@t_test
-POSTHOOK: query: INSERT INTO t_test VALUES
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: _dummy_database@_dummy_table
-POSTHOOK: Output: default@t_test
-POSTHOOK: Lineage: t_test.a SCRIPT []
-POSTHOOK: Lineage: t_test.b SCRIPT []
-POSTHOOK: Lineage: t_test.c SCRIPT []
-PREHOOK: query: EXPLAIN
-SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN
-SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: t_test
- Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: a (type: int), b (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Top N Key Operator
- sort order: ++
- keys: a (type: int), b (type: int)
- null sort order: zz
- Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- top n: 3
- Group By Operator
- keys: a (type: int), b (type: int)
- minReductionHashAggr: 0.3333333
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- null sort order: zz
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- Execution mode: llap
- LLAP IO: no inputs
- Reducer 2
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int), KEY._col1 (type: int)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- null sort order: zz
- sort order: ++
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 3
- Statistics: Num rows: 3 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 3
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-5 1
-5 2
-6 2
-PREHOOK: query: EXPLAIN
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: t_test
- Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: a (type: int), b (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Top N Key Operator
- sort order: +
- keys: a (type: int)
- null sort order: z
- Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- top n: 2
- Group By Operator
- aggregations: count(b)
- keys: a (type: int)
- minReductionHashAggr: 0.6666666
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
- Execution mode: llap
- LLAP IO: no inputs
- Reducer 2
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- null sort order: z
- sort order: +
- Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 2
- Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 2
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-5 4
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-NULL 0
-5 4
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-5 4
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-5 4
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-NULL 0
-5 4
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-5 4
-6 1
-PREHOOK: query: DROP TABLE IF EXISTS t_test
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@t_test
-PREHOOK: Output: default@t_test
-POSTHOOK: query: DROP TABLE IF EXISTS t_test
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: default@t_test
-PREHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@t_test
-POSTHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@t_test
-PREHOOK: query: INSERT INTO t_test VALUES
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-PREHOOK: type: QUERY
-PREHOOK: Input: _dummy_database@_dummy_table
-PREHOOK: Output: default@t_test
-POSTHOOK: query: INSERT INTO t_test VALUES
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: _dummy_database@_dummy_table
-POSTHOOK: Output: default@t_test
-POSTHOOK: Lineage: t_test.a SCRIPT []
-POSTHOOK: Lineage: t_test.b SCRIPT []
-POSTHOOK: Lineage: t_test.c SCRIPT []
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-7 3
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-NULL 0
-7 3
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-7 3
-6 1
-PREHOOK: query: DROP TABLE IF EXISTS t_test
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@t_test
-PREHOOK: Output: default@t_test
-POSTHOOK: query: DROP TABLE IF EXISTS t_test
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/llap/topnkey_order_null.q.out b/ql/src/test/results/clientpositive/llap/topnkey_order_null.q.out
new file mode 100644
index 0000000..fef6b70
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/topnkey_order_null.q.out
@@ -0,0 +1,282 @@
+PREHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
+PREHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7 3
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7 3
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+7 3
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+7 3
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7 3
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7 3
+6 1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out b/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
index 31d971b..c140bdf 100644
--- a/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
@@ -1,215 +1,66 @@
-PREHOOK: query: explain vectorization detail
-SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: query: CREATE TABLE t_test(
+ cint1 int,
+ cint2 int,
+ cdouble double,
+ cvarchar varchar(50),
+ cdecimal1 decimal(10,2),
+ cdecimal2 decimal(38,5)
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+ cint1 int,
+ cint2 int,
+ cdouble double,
+ cvarchar varchar(50),
+ cdecimal1 decimal(10,2),
+ cdecimal2 decimal(38,5)
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(8, 9, 2.0, 'one', 2.0, 2.0), (8, 9, 2.0, 'one', 2.0, 2.0),
+(4, 2, 3.3, 'two', 3.3, 3.3),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(6, 2, 1.8, 'three', 1.8, 1.8),
+(7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5),
+(4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0),
+(NULL, NULL, NULL, NULL, NULL, NULL)
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-#### A masked pattern was here ####
-POSTHOOK: query: explain vectorization detail
-SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(8, 9, 2.0, 'one', 2.0, 2.0), (8, 9, 2.0, 'one', 2.0, 2.0),
+(4, 2, 3.3, 'two', 3.3, 3.3),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(6, 2, 1.8, 'three', 1.8, 1.8),
+(7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5),
+(4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0),
+(NULL, NULL, NULL, NULL, NULL, NULL)
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
- Select Operator
- expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 4]
- selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Top N Key Operator
- sort order: +
- keys: _col0 (type: string)
- null sort order: z
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- top n: 5
- Top N Key Vectorization:
- className: VectorTopNKeyOperator
- keyExpressions: col 0:string
- native: true
- Group By Operator
- aggregations: sum(_col1)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 4:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
- keys: _col0 (type: string)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- keyColumns: 0:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumns: 1:bigint
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
- Execution mode: vectorized, llap
- LLAP IO: no inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: key:string, value:string
- partitionColumnCount: 0
- scratchColumnTypeNames: [string, bigint]
- Reducer 2
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: z
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- dataColumns: KEY._col0:string, VALUE._col0:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumns: 0:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumns: 1:bigint
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
- Reducer 3
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: z
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- dataColumns: KEY.reducesinkkey0:string, VALUE._col0:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 5
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
- Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 5
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.cdecimal1 SCRIPT []
+POSTHOOK: Lineage: t_test.cdecimal2 SCRIPT []
+POSTHOOK: Lineage: t_test.cdouble SCRIPT []
+POSTHOOK: Lineage: t_test.cint1 SCRIPT []
+POSTHOOK: Lineage: t_test.cint2 SCRIPT []
+POSTHOOK: Lineage: t_test.cvarchar SCRIPT []
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
#### A masked pattern was here ####
-POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-#### A masked pattern was here ####
-0 0
-10 10
-100 200
-103 206
-104 208
-PREHOOK: query: explain vectorization detail
-SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-#### A masked pattern was here ####
-POSTHOOK: query: explain vectorization detail
-SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
@@ -231,53 +82,53 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: t_test
+ Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ vectorizationSchemaColumns: [0:cint1:int, 1:cint2:int, 2:cdouble:double, 3:cvarchar:varchar(50), 4:cdecimal1:decimal(10,2)/DECIMAL_64, 5:cdecimal2:decimal(38,5), 6:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: cint1 (type: int)
+ outputColumnNames: cint1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
Top N Key Operator
sort order: +
- keys: key (type: string)
+ keys: cint1 (type: int)
null sort order: z
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- top n: 5
+ Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 3
Top N Key Vectorization:
className: VectorTopNKeyOperator
- keyExpressions: col 0:string
+ keyExpressions: col 0:int
native: true
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: HASH
- keyExpressions: col 0:string
+ keyExpressions: col 0:int
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: []
- keys: key (type: string)
- minReductionHashAggr: 0.5
+ keys: cint1 (type: int)
+ minReductionHashAggr: 0.64285713
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: _col0 (type: int)
null sort order: z
sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- keyColumns: 0:string
+ className: VectorReduceSinkLongOperator
+ keyColumns: 0:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: no inputs
@@ -291,9 +142,9 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 2
+ dataColumnCount: 6
includeColumns: [0]
- dataColumns: key:string, value:string
+ dataColumns: cint1:int, cint2:int, cdouble:double, cvarchar:varchar(50), cdecimal1:decimal(10,2)/DECIMAL_64, cdecimal2:decimal(38,5)
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
@@ -308,7 +159,7 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 1
- dataColumns: KEY._col0:string
+ dataColumns: KEY._col0:int
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
@@ -316,24 +167,24 @@ STAGE PLANS:
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string
+ keyExpressions: col 0:int
native: false
vectorProcessingMode: MERGE_PARTIAL
projectedOutputColumnNums: []
- keys: KEY._col0 (type: string)
+ keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: _col0 (type: int)
null sort order: z
sort order: +
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
- keyColumns: 0:string
+ keyColumns: 0:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Reducer 3
Execution mode: vectorized, llap
@@ -347,30 +198,30 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 1
- dataColumns: KEY.reducesinkkey0:string
+ dataColumns: KEY.reducesinkkey0:int
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: string)
+ expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Limit
- Number of rows: 5
+ Number of rows: 3
Limit Vectorization:
className: VectorLimitOperator
native: true
- Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -378,238 +229,81 @@ STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: 5
+ limit: 3
Processor Tree:
ListSink
-PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: query: SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
#### A masked pattern was here ####
-POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: query: SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
#### A masked pattern was here ####
-0
-10
-100
-103
-104
-PREHOOK: query: explain vectorization detail
-SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+4
+6
+7
+PREHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1, cint2 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
#### A masked pattern was here ####
-POSTHOOK: query: explain vectorization detail
-SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1, cint2 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
+4 1
+4 2
+6 2
+PREHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1 DESC, cint2 LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+POSTHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1 DESC, cint2 LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: src1
- filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 0:string)
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- keyColumns: 0:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: vectorized, llap
- LLAP IO: no inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0]
- dataColumns: key:string, value:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Map 4
- Map Operator Tree:
- TableScan
- alias: src2
- filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 0:string)
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- keyColumns: 0:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumns: 1:string
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: vectorized, llap
- LLAP IO: no inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: key:string, value:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reducer 2
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: string)
- MergeJoin Vectorization:
- enabled: false
- enableConditionsNotMet: Vectorizing MergeJoin Supported IS false
- Reducer 3
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: z
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 5
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
- Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 5
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+8 9
+7 8
+6 2
+PREHOOK: query: SELECT cint1, cdouble FROM t_test GROUP BY cint1, cdouble ORDER BY cint1, cdouble LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cint1, cdouble FROM t_test GROUP BY cint1, cdouble ORDER BY cint1, cdouble LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+4 2.0
+4 3.3
+6 1.8
+PREHOOK: query: SELECT cvarchar, cdouble FROM t_test GROUP BY cvarchar, cdouble ORDER BY cvarchar, cdouble LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cvarchar, cdouble FROM t_test GROUP BY cvarchar, cdouble ORDER BY cvarchar, cdouble LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+five 2.0
+four 4.5
+one 2.0
+PREHOOK: query: SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
#### A masked pattern was here ####
-POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: query: SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
#### A masked pattern was here ####
-0 val_0
-0 val_0
-0 val_0
-0 val_0
-0 val_0
+1.80 1.80000
+2.00 2.00000
+3.30 3.30000
+PREHOOK: query: DROP TABLE t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/tez/topnkey.q.out b/ql/src/test/results/clientpositive/tez/topnkey.q.out
index 54891b8..45947d0 100644
--- a/ql/src/test/results/clientpositive/tez/topnkey.q.out
+++ b/ql/src/test/results/clientpositive/tez/topnkey.q.out
@@ -365,292 +365,3 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
0 val_0
0 val_0
0 val_0
-PREHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@t_test
-POSTHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@t_test
-PREHOOK: query: INSERT INTO t_test VALUES
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-PREHOOK: type: QUERY
-PREHOOK: Input: _dummy_database@_dummy_table
-PREHOOK: Output: default@t_test
-POSTHOOK: query: INSERT INTO t_test VALUES
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: _dummy_database@_dummy_table
-POSTHOOK: Output: default@t_test
-POSTHOOK: Lineage: t_test.a SCRIPT []
-POSTHOOK: Lineage: t_test.b SCRIPT []
-POSTHOOK: Lineage: t_test.c SCRIPT []
-PREHOOK: query: EXPLAIN
-SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: EXPLAIN
-SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-Plan optimized by CBO.
-
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-
-Stage-0
- Fetch Operator
- limit:3
- Stage-1
- Reducer 3
- File Output Operator [FS_9]
- Limit [LIM_8] (rows=3 width=5)
- Number of rows:3
- Select Operator [SEL_7] (rows=6 width=4)
- Output:["_col0","_col1"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_6]
- null sort order:zz,sort order:++
- Group By Operator [GBY_4] (rows=6 width=4)
- Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
- <-Map 1 [SIMPLE_EDGE]
- SHUFFLE [RS_3]
- PartitionCols:_col0, _col1,null sort order:zz,sort order:++
- Group By Operator [GBY_2] (rows=6 width=4)
- Output:["_col0","_col1"],keys:a, b
- Top N Key Operator [TNK_10] (rows=12 width=6)
- keys:a, b,null sort order:zz,sort order:++,top n:3
- Select Operator [SEL_1] (rows=12 width=6)
- Output:["a","b"]
- TableScan [TS_0] (rows=12 width=6)
- default@t_test,t_test,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
-
-PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-5 1
-5 2
-6 2
-PREHOOK: query: EXPLAIN
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: EXPLAIN
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-Plan optimized by CBO.
-
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-
-Stage-0
- Fetch Operator
- limit:2
- Stage-1
- Reducer 3
- File Output Operator [FS_9]
- Limit [LIM_8] (rows=2 width=10)
- Number of rows:2
- Select Operator [SEL_7] (rows=4 width=10)
- Output:["_col0","_col1"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_6]
- null sort order:z,sort order:+
- Group By Operator [GBY_4] (rows=4 width=10)
- Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
- <-Map 1 [SIMPLE_EDGE]
- SHUFFLE [RS_3]
- PartitionCols:_col0,null sort order:z,sort order:+
- Group By Operator [GBY_2] (rows=4 width=10)
- Output:["_col0","_col1"],aggregations:["count(b)"],keys:a
- Top N Key Operator [TNK_10] (rows=12 width=6)
- keys:a,null sort order:z,sort order:+,top n:2
- Select Operator [SEL_1] (rows=12 width=6)
- Output:["a","b"]
- TableScan [TS_0] (rows=12 width=6)
- default@t_test,t_test,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
-
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-5 4
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-NULL 0
-5 4
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-5 4
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-5 4
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-NULL 0
-5 4
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-5 4
-6 1
-PREHOOK: query: DROP TABLE IF EXISTS t_test
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@t_test
-PREHOOK: Output: default@t_test
-POSTHOOK: query: DROP TABLE IF EXISTS t_test
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: default@t_test
-PREHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@t_test
-POSTHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@t_test
-PREHOOK: query: INSERT INTO t_test VALUES
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-PREHOOK: type: QUERY
-PREHOOK: Input: _dummy_database@_dummy_table
-PREHOOK: Output: default@t_test
-POSTHOOK: query: INSERT INTO t_test VALUES
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: _dummy_database@_dummy_table
-POSTHOOK: Output: default@t_test
-POSTHOOK: Lineage: t_test.a SCRIPT []
-POSTHOOK: Lineage: t_test.b SCRIPT []
-POSTHOOK: Lineage: t_test.c SCRIPT []
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-7 3
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-NULL 0
-7 3
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-7 3
-6 1
-PREHOOK: query: DROP TABLE IF EXISTS t_test
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@t_test
-PREHOOK: Output: default@t_test
-POSTHOOK: query: DROP TABLE IF EXISTS t_test
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/tez/topnkey_order_null.q.out b/ql/src/test/results/clientpositive/tez/topnkey_order_null.q.out
new file mode 100644
index 0000000..656c97d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/topnkey_order_null.q.out
@@ -0,0 +1,282 @@
+PREHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5 4
+6 1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
+PREHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+7 3
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+7 3
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL 0
+7 3
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL 0
+7 3
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+7 3
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+7 3
+6 1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out b/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out
index aca08fb..d179013 100644
--- a/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out
@@ -1,214 +1,66 @@
-PREHOOK: query: explain vectorization detail
-SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: query: CREATE TABLE t_test(
+ cint1 int,
+ cint2 int,
+ cdouble double,
+ cvarchar varchar(50),
+ cdecimal1 decimal(10,2),
+ cdecimal2 decimal(38,5)
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+ cint1 int,
+ cint2 int,
+ cdouble double,
+ cvarchar varchar(50),
+ cdecimal1 decimal(10,2),
+ cdecimal2 decimal(38,5)
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(8, 9, 2.0, 'one', 2.0, 2.0), (8, 9, 2.0, 'one', 2.0, 2.0),
+(4, 2, 3.3, 'two', 3.3, 3.3),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(6, 2, 1.8, 'three', 1.8, 1.8),
+(7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5),
+(4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0),
+(NULL, NULL, NULL, NULL, NULL, NULL)
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: explain vectorization detail
-SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
- Select Operator
- expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 4]
- selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Top N Key Operator
- sort order: +
- keys: _col0 (type: string)
- null sort order: z
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- top n: 5
- Top N Key Vectorization:
- className: VectorTopNKeyOperator
- keyExpressions: col 0:string
- native: true
- Group By Operator
- aggregations: sum(_col1)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 4:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
- keys: _col0 (type: string)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- keyColumns: 0:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumns: 1:bigint
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: key:string, value:string
- partitionColumnCount: 0
- scratchColumnTypeNames: [string, bigint]
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: z
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- dataColumns: KEY._col0:string, VALUE._col0:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumns: 0:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumns: 1:bigint
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: z
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- dataColumns: KEY.reducesinkkey0:string, VALUE._col0:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 5
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
- Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 5
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(8, 9, 2.0, 'one', 2.0, 2.0), (8, 9, 2.0, 'one', 2.0, 2.0),
+(4, 2, 3.3, 'two', 3.3, 3.3),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(6, 2, 1.8, 'three', 1.8, 1.8),
+(7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5),
+(4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0),
+(NULL, NULL, NULL, NULL, NULL, NULL)
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-0 0
-10 10
-100 200
-103 206
-104 208
-PREHOOK: query: explain vectorization detail
-SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.cdecimal1 SCRIPT []
+POSTHOOK: Lineage: t_test.cdecimal2 SCRIPT []
+POSTHOOK: Lineage: t_test.cdouble SCRIPT []
+POSTHOOK: Lineage: t_test.cint1 SCRIPT []
+POSTHOOK: Lineage: t_test.cint2 SCRIPT []
+POSTHOOK: Lineage: t_test.cvarchar SCRIPT []
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: explain vectorization detail
-SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
POSTHOOK: Output: hdfs://### HDFS PATH ###
PLAN VECTORIZATION:
enabled: true
@@ -230,53 +82,53 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: t_test
+ Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ vectorizationSchemaColumns: [0:cint1:int, 1:cint2:int, 2:cdouble:double, 3:cvarchar:varchar(50), 4:cdecimal1:decimal(10,2)/DECIMAL_64, 5:cdecimal2:decimal(38,5), 6:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: cint1 (type: int)
+ outputColumnNames: cint1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
Top N Key Operator
sort order: +
- keys: key (type: string)
+ keys: cint1 (type: int)
null sort order: z
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- top n: 5
+ Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 3
Top N Key Vectorization:
className: VectorTopNKeyOperator
- keyExpressions: col 0:string
+ keyExpressions: col 0:int
native: true
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: HASH
- keyExpressions: col 0:string
+ keyExpressions: col 0:int
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: []
- keys: key (type: string)
- minReductionHashAggr: 0.5
+ keys: cint1 (type: int)
+ minReductionHashAggr: 0.64285713
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: _col0 (type: int)
null sort order: z
sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- keyColumns: 0:string
+ className: VectorReduceSinkLongOperator
+ keyColumns: 0:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized
Map Vectorization:
@@ -289,9 +141,9 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 2
+ dataColumnCount: 6
includeColumns: [0]
- dataColumns: key:string, value:string
+ dataColumns: cint1:int, cint2:int, cdouble:double, cvarchar:varchar(50), cdecimal1:decimal(10,2)/DECIMAL_64, cdecimal2:decimal(38,5)
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
@@ -306,7 +158,7 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 1
- dataColumns: KEY._col0:string
+ dataColumns: KEY._col0:int
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
@@ -314,24 +166,24 @@ STAGE PLANS:
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string
+ keyExpressions: col 0:int
native: false
vectorProcessingMode: MERGE_PARTIAL
projectedOutputColumnNums: []
- keys: KEY._col0 (type: string)
+ keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: _col0 (type: int)
null sort order: z
sort order: +
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
- keyColumns: 0:string
+ keyColumns: 0:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Reducer 3
Execution mode: vectorized
@@ -345,30 +197,30 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 1
- dataColumns: KEY.reducesinkkey0:string
+ dataColumns: KEY.reducesinkkey0:int
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: string)
+ expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Limit
- Number of rows: 5
+ Number of rows: 3
Limit Vectorization:
className: VectorLimitOperator
native: true
- Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -376,235 +228,81 @@ STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: 5
+ limit: 3
Processor Tree:
ListSink
-PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: query: SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: query: SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
POSTHOOK: Output: hdfs://### HDFS PATH ###
-0
-10
-100
-103
-104
-PREHOOK: query: explain vectorization detail
-SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+4
+6
+7
+PREHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1, cint2 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: explain vectorization detail
-SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1, cint2 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
POSTHOOK: Output: hdfs://### HDFS PATH ###
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: src1
- filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 0:string)
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- keyColumns: 0:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0]
- dataColumns: key:string, value:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Map 4
- Map Operator Tree:
- TableScan
- alias: src2
- filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 0:string)
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- keyColumns: 0:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumns: 1:string
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: key:string, value:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reducer 2
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: string)
- MergeJoin Vectorization:
- enabled: false
- enableConditionsNotMet: Vectorizing MergeJoin Supported IS false
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: z
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 5
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
- Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 5
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+4 1
+4 2
+6 2
+PREHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1 DESC, cint2 LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1 DESC, cint2 LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+8 9
+7 8
+6 2
+PREHOOK: query: SELECT cint1, cdouble FROM t_test GROUP BY cint1, cdouble ORDER BY cint1, cdouble LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT cint1, cdouble FROM t_test GROUP BY cint1, cdouble ORDER BY cint1, cdouble LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+4 2.0
+4 3.3
+6 1.8
+PREHOOK: query: SELECT cvarchar, cdouble FROM t_test GROUP BY cvarchar, cdouble ORDER BY cvarchar, cdouble LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT cvarchar, cdouble FROM t_test GROUP BY cvarchar, cdouble ORDER BY cvarchar, cdouble LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+five 2.0
+four 4.5
+one 2.0
+PREHOOK: query: SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: query: SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
POSTHOOK: Output: hdfs://### HDFS PATH ###
-0 val_0
-0 val_0
-0 val_0
-0 val_0
-0 val_0
+1.80 1.80000
+2.00 2.00000
+3.30 3.30000
+PREHOOK: query: DROP TABLE t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/topnkey.q.out b/ql/src/test/results/clientpositive/topnkey.q.out
index 67691ac..cecbe89 100644
--- a/ql/src/test/results/clientpositive/topnkey.q.out
+++ b/ql/src/test/results/clientpositive/topnkey.q.out
@@ -442,378 +442,3 @@ POSTHOOK: Input: default@src
0 val_0
0 val_0
0 val_0
-PREHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@t_test
-POSTHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@t_test
-PREHOOK: query: INSERT INTO t_test VALUES
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-PREHOOK: type: QUERY
-PREHOOK: Input: _dummy_database@_dummy_table
-PREHOOK: Output: default@t_test
-POSTHOOK: query: INSERT INTO t_test VALUES
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: _dummy_database@_dummy_table
-POSTHOOK: Output: default@t_test
-POSTHOOK: Lineage: t_test.a SCRIPT []
-POSTHOOK: Lineage: t_test.b SCRIPT []
-POSTHOOK: Lineage: t_test.c SCRIPT []
-PREHOOK: query: EXPLAIN
-SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN
-SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: t_test
- Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: a (type: int), b (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: a (type: int), b (type: int)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- null sort order: zz
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int), KEY._col1 (type: int)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- null sort order: zz
- sort order: ++
- Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 3
- Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 3
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-5 1
-5 2
-6 2
-PREHOOK: query: EXPLAIN
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN
-SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: t_test
- Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: a (type: int), b (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(b)
- keys: a (type: int)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: int)
- null sort order: z
- sort order: +
- Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 2
- Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 2
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-5 4
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-NULL 0
-5 4
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-5 4
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-5 4
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-NULL 0
-5 4
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-5 4
-6 1
-PREHOOK: query: DROP TABLE IF EXISTS t_test
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@t_test
-PREHOOK: Output: default@t_test
-POSTHOOK: query: DROP TABLE IF EXISTS t_test
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: default@t_test
-PREHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@t_test
-POSTHOOK: query: CREATE TABLE t_test(
- a int,
- b int,
- c int
-)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@t_test
-PREHOOK: query: INSERT INTO t_test VALUES
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-PREHOOK: type: QUERY
-PREHOOK: Input: _dummy_database@_dummy_table
-PREHOOK: Output: default@t_test
-POSTHOOK: query: INSERT INTO t_test VALUES
-(7, 8, 4), (7, 8, 4), (7, 8, 4),
-(NULL, NULL, NULL),
-(5, 2, 3),
-(NULL, NULL, NULL),
-(NULL, NULL, NULL),
-(6, 2, 1),
-(5, 1, 2), (5, 1, 2), (5, 1, 2),
-(NULL, NULL, NULL)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: _dummy_database@_dummy_table
-POSTHOOK: Output: default@t_test
-POSTHOOK: Lineage: t_test.a SCRIPT []
-POSTHOOK: Lineage: t_test.b SCRIPT []
-POSTHOOK: Lineage: t_test.c SCRIPT []
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-7 3
-6 1
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-NULL 0
-7 3
-PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t_test
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t_test
-#### A masked pattern was here ####
-7 3
-6 1
-PREHOOK: query: DROP TABLE IF EXISTS t_test
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@t_test
-PREHOOK: Output: default@t_test
-POSTHOOK: query: DROP TABLE IF EXISTS t_test
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@t_test
-POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/topnkey_order_null.q.out b/ql/src/test/results/clientpositive/topnkey_order_null.q.out
new file mode 100644
index 0000000..fef6b70
--- /dev/null
+++ b/ql/src/test/results/clientpositive/topnkey_order_null.q.out
@@ -0,0 +1,282 @@
+PREHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+5 4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 4
+6 1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
+PREHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+ a int,
+ b int,
+ c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7 3
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7 3
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+7 3
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL 0
+7 3
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7 3
+6 1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7 3
+6 1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/vector_topnkey.q.out b/ql/src/test/results/clientpositive/vector_topnkey.q.out
index 7010c0b..1830eae 100644
--- a/ql/src/test/results/clientpositive/vector_topnkey.q.out
+++ b/ql/src/test/results/clientpositive/vector_topnkey.q.out
@@ -1,186 +1,66 @@
-PREHOOK: query: explain vectorization detail
-SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: query: CREATE TABLE t_test(
+ cint1 int,
+ cint2 int,
+ cdouble double,
+ cvarchar varchar(50),
+ cdecimal1 decimal(10,2),
+ cdecimal2 decimal(38,5)
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+ cint1 int,
+ cint2 int,
+ cdouble double,
+ cvarchar varchar(50),
+ cdecimal1 decimal(10,2),
+ cdecimal2 decimal(38,5)
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(8, 9, 2.0, 'one', 2.0, 2.0), (8, 9, 2.0, 'one', 2.0, 2.0),
+(4, 2, 3.3, 'two', 3.3, 3.3),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(6, 2, 1.8, 'three', 1.8, 1.8),
+(7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5),
+(4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0),
+(NULL, NULL, NULL, NULL, NULL, NULL)
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-#### A masked pattern was here ####
-POSTHOOK: query: explain vectorization detail
-SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
- Select Operator
- expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 4]
- selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col1)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 4:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
- keys: _col0 (type: string)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: key:string, value:string
- partitionColumnCount: 0
- scratchColumnTypeNames: [string, bigint]
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:_col0:string, 1:_col1:bigint]
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: _col0:string, _col1:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 5
- Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 5
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(8, 9, 2.0, 'one', 2.0, 2.0), (8, 9, 2.0, 'one', 2.0, 2.0),
+(4, 2, 3.3, 'two', 3.3, 3.3),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(NULL, NULL, NULL, NULL, NULL, NULL),
+(6, 2, 1.8, 'three', 1.8, 1.8),
+(7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5), (7, 8, 4.5, 'four', 4.5, 4.5),
+(4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0), (4, 1, 2.0, 'five', 2.0, 2.0),
+(NULL, NULL, NULL, NULL, NULL, NULL)
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-#### A masked pattern was here ####
-0 0
-10 10
-100 200
-103 206
-104 208
-PREHOOK: query: explain vectorization detail
-SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.cdecimal1 SCRIPT []
+POSTHOOK: Lineage: t_test.cdecimal2 SCRIPT []
+POSTHOOK: Lineage: t_test.cdouble SCRIPT []
+POSTHOOK: Lineage: t_test.cint1 SCRIPT []
+POSTHOOK: Lineage: t_test.cint2 SCRIPT []
+POSTHOOK: Lineage: t_test.cvarchar SCRIPT []
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
#### A masked pattern was here ####
-POSTHOOK: query: explain vectorization detail
-SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
@@ -196,43 +76,43 @@ STAGE PLANS:
Map Reduce
Map Operator Tree:
TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: t_test
+ Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ vectorizationSchemaColumns: [0:cint1:int, 1:cint2:int, 2:cdouble:double, 3:cvarchar:varchar(50), 4:cdecimal1:decimal(10,2)/DECIMAL_64, 5:cdecimal2:decimal(38,5), 6:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: cint1 (type: int)
+ outputColumnNames: cint1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: HASH
- keyExpressions: col 0:string
+ keyExpressions: col 0:int
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: []
- keys: key (type: string)
+ keys: cint1 (type: int)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: _col0 (type: int)
null sort order: z
sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkOperator
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized
Map Vectorization:
@@ -245,9 +125,9 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 2
+ dataColumnCount: 6
includeColumns: [0]
- dataColumns: key:string, value:string
+ dataColumns: cint1:int, cint2:int, cdouble:double, cvarchar:varchar(50), cdecimal1:decimal(10,2)/DECIMAL_64, cdecimal2:decimal(38,5)
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Vectorization:
@@ -256,10 +136,10 @@ STAGE PLANS:
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- keys: KEY._col0 (type: string)
+ keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -273,9 +153,9 @@ STAGE PLANS:
TableScan
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:_col0:string]
+ vectorizationSchemaColumns: [0:_col0:int]
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: _col0 (type: int)
null sort order: z
sort order: +
Reduce Sink Vectorization:
@@ -283,7 +163,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized
Map Vectorization:
@@ -298,7 +178,7 @@ STAGE PLANS:
rowBatchContext:
dataColumnCount: 1
includeColumns: [0]
- dataColumns: _col0:string
+ dataColumns: _col0:int
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Vectorization:
@@ -307,15 +187,15 @@ STAGE PLANS:
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: string)
+ expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Limit
- Number of rows: 5
- Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+ Number of rows: 3
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -323,179 +203,81 @@ STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: 5
+ limit: 3
Processor Tree:
ListSink
-PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: query: SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
#### A masked pattern was here ####
-POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: query: SELECT cint1 FROM t_test GROUP BY cint1 ORDER BY cint1 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
#### A masked pattern was here ####
-0
-10
-100
-103
-104
-PREHOOK: query: explain vectorization detail
-SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+4
+6
+7
+PREHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1, cint2 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
#### A masked pattern was here ####
-POSTHOOK: query: explain vectorization detail
-SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1, cint2 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src1
- filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan
- alias: src2
- filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Map Vectorization:
- enabled: false
- enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:_col0:string, 1:_col1:string]
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: string)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: _col0:string, _col1:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 5
- Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 5
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+4 1
+4 2
+6 2
+PREHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1 DESC, cint2 LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cint1, cint2 FROM t_test GROUP BY cint1, cint2 ORDER BY cint1 DESC, cint2 LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+8 9
+7 8
+6 2
+PREHOOK: query: SELECT cint1, cdouble FROM t_test GROUP BY cint1, cdouble ORDER BY cint1, cdouble LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cint1, cdouble FROM t_test GROUP BY cint1, cdouble ORDER BY cint1, cdouble LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+4 2.0
+4 3.3
+6 1.8
+PREHOOK: query: SELECT cvarchar, cdouble FROM t_test GROUP BY cvarchar, cdouble ORDER BY cvarchar, cdouble LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cvarchar, cdouble FROM t_test GROUP BY cvarchar, cdouble ORDER BY cvarchar, cdouble LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+five 2.0
+four 4.5
+one 2.0
+PREHOOK: query: SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3
PREHOOK: type: QUERY
-PREHOOK: Input: default@src
+PREHOOK: Input: default@t_test
#### A masked pattern was here ####
-POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: query: SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
+POSTHOOK: Input: default@t_test
#### A masked pattern was here ####
-0 val_0
-0 val_0
-0 val_0
-0 val_0
-0 val_0
+1.80 1.80000
+2.00 2.00000
+3.30 3.30000
+PREHOOK: query: DROP TABLE t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectComparator.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectComparator.java
new file mode 100644
index 0000000..9fb7787
--- /dev/null
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectComparator.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.Comparator;
+
+import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.NullValueOption;
+
+/**
+ * This class wraps the ObjectInspectorUtils.compare method and implements java.util.Comparator.
+ */
+public class ObjectComparator implements Comparator<Object> {
+
+ private final ObjectInspector objectInspector1;
+ private final ObjectInspector objectInspector2;
+ private final NullValueOption nullSortOrder;
+ private final MapEqualComparer mapEqualComparer = new FullMapEqualComparer();
+
+ public ObjectComparator(ObjectInspector objectInspector1, ObjectInspector objectInspector2,
+ NullValueOption nullSortOrder) {
+ this.objectInspector1 = objectInspector1;
+ this.objectInspector2 = objectInspector2;
+ this.nullSortOrder = nullSortOrder;
+ }
+
+ @Override
+ public int compare(Object o1, Object o2) {
+ return ObjectInspectorUtils.compare(o1, objectInspector1, o2, objectInspector2, mapEqualComparer, nullSortOrder);
+ }
+}