You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2015/12/21 02:18:10 UTC
[03/15] hive git commit: HIVE-11531: Add mysql-style LIMIT support to
Hive, or improve ROW_NUMBER performance-wise (Hui Zheng,
reviewed by Sergey Shelukhin, Jesus Camacho Rodriguez)
HIVE-11531: Add mysql-style LIMIT support to Hive, or improve ROW_NUMBER performance-wise (Hui Zheng, reviewed by Sergey Shelukhin, Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6273bee3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6273bee3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6273bee3
Branch: refs/heads/branch-2.0
Commit: 6273bee32601c759f16293fae119d7a7bb7473e1
Parents: a59d4ff
Author: Hui Zheng <hu...@yahoo-corp.jp>
Authored: Mon Dec 21 02:11:51 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Mon Dec 21 02:11:51 2015 +0100
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/LimitOperator.java | 6 +-
.../ql/exec/vector/VectorLimitOperator.java | 26 +-
.../hive/ql/optimizer/GenMapRedUtils.java | 3 +-
.../hive/ql/optimizer/GlobalLimitOptimizer.java | 22 +-
.../ql/optimizer/LimitPushdownOptimizer.java | 5 +-
.../calcite/reloperators/HiveSortLimit.java | 4 +
.../calcite/translator/ASTBuilder.java | 5 +-
.../calcite/translator/ASTConverter.java | 11 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 15 +-
.../hadoop/hive/ql/parse/GlobalLimitCtx.java | 10 +-
.../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 1 +
.../apache/hadoop/hive/ql/parse/HiveParser.g | 5 +-
.../hadoop/hive/ql/parse/IdentifiersParser.g | 2 +-
.../hadoop/hive/ql/parse/MapReduceCompiler.java | 3 +-
.../hadoop/hive/ql/parse/QBParseInfo.java | 20 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 27 +-
.../apache/hadoop/hive/ql/plan/LimitDesc.java | 19 +
.../test/queries/clientpositive/offset_limit.q | 25 +
.../offset_limit_global_optimizer.q | 45 +
.../clientpositive/offset_limit_ppd_optimizer.q | 80 +
.../clientpositive/vectorization_offset_limit.q | 10 +
.../results/clientpositive/offset_limit.q.out | 257 ++
.../offset_limit_global_optimizer.q.out | 3390 ++++++++++++++++++
.../offset_limit_ppd_optimizer.q.out | 1377 +++++++
.../vectorization_offset_limit.q.out | 118 +
25 files changed, 5442 insertions(+), 44 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java
index 8fe96be..fc85bea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java
@@ -34,6 +34,7 @@ public class LimitOperator extends Operator<LimitDesc> implements Serializable {
private static final long serialVersionUID = 1L;
protected transient int limit;
+ protected transient int offset;
protected transient int leastRow;
protected transient int currCount;
protected transient boolean isMap;
@@ -43,15 +44,18 @@ public class LimitOperator extends Operator<LimitDesc> implements Serializable {
super.initializeOp(hconf);
limit = conf.getLimit();
leastRow = conf.getLeastRows();
+ offset = (conf.getOffset() == null) ? 0 : conf.getOffset();
currCount = 0;
isMap = hconf.getBoolean("mapred.task.is.map", true);
}
@Override
public void process(Object row, int tag) throws HiveException {
- if (currCount < limit) {
+ if (offset <= currCount && currCount < (offset + limit)) {
forward(row, inputObjInspectors[tag]);
currCount++;
+ } else if (offset > currCount) {
+ currCount++;
} else {
setDone(true);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
index 2f4e46b..4cb91d4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
@@ -42,12 +42,30 @@ public class VectorLimitOperator extends LimitOperator {
public void process(Object row, int tag) throws HiveException {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
- if (currCount < limit) {
- batch.size = Math.min(batch.size, limit - currCount);
- forward(row, inputObjInspectors[tag]);
+ if (currCount + batch.size < offset) {
currCount += batch.size;
- } else {
+ } else if (currCount >= offset + limit) {
setDone(true);
+ } else {
+ int skipSize = 0;
+ if (currCount < offset) {
+ skipSize = offset - currCount;
+ }
+ //skip skipSize rows of batch
+ batch.size = Math.min(batch.size, offset + limit - currCount);
+ if (batch.selectedInUse == false) {
+ batch.selectedInUse = true;
+ batch.selected = new int[batch.size];
+ for (int i = 0; i < batch.size - skipSize; i++) {
+ batch.selected[i] = skipSize + i;
+ }
+ } else {
+ for (int i = 0; i < batch.size - skipSize; i++) {
+ batch.selected[i] = batch.selected[skipSize + i];
+ }
+ }
+ forward(row, inputObjInspectors[tag]);
+ currCount += batch.size;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 0cd7b62..a42df71 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -544,7 +544,8 @@ public final class GenMapRedUtils {
} else {
long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(),
HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
- sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow;
+ sizeNeeded = (parseCtx.getGlobalLimitCtx().getGlobalOffset()
+ + parseCtx.getGlobalLimitCtx().getGlobalLimit()) * sizePerRow;
// for the optimization that reduce number of input file, we limit number
// of files allowed. If more than specific number of files have to be
// selected, we skip this optimization. Since having too many files as
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java
index 74d6292..2062df1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java
@@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
@@ -93,16 +94,19 @@ public class GlobalLimitOptimizer extends Transform {
// SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
//
TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
- Integer tempGlobalLimit = checkQbpForGlobalLimit(ts);
+ LimitOperator tempGlobalLimit = checkQbpForGlobalLimit(ts);
// query qualify for the optimization
- if (tempGlobalLimit != null && tempGlobalLimit != 0) {
+ if (tempGlobalLimit != null) {
+ LimitDesc tempGlobalLimitDesc = tempGlobalLimit.getConf();
Table tab = ts.getConf().getTableMetadata();
Set<FilterOperator> filterOps = OperatorUtils.findOperators(ts, FilterOperator.class);
if (!tab.isPartitioned()) {
if (filterOps.size() == 0) {
- globalLimitCtx.enableOpt(tempGlobalLimit);
+ Integer tempOffset = tempGlobalLimitDesc.getOffset();
+ globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(),
+ (tempOffset == null) ? 0 : tempOffset);
}
} else {
// check if the pruner only contains partition columns
@@ -114,11 +118,15 @@ public class GlobalLimitOptimizer extends Transform {
// If there is any unknown partition, create a map-reduce job for
// the filter to prune correctly
if (!partsList.hasUnknownPartitions()) {
- globalLimitCtx.enableOpt(tempGlobalLimit);
+ Integer tempOffset = tempGlobalLimitDesc.getOffset();
+ globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(),
+ (tempOffset == null) ? 0 : tempOffset);
}
}
}
if (globalLimitCtx.isEnable()) {
+ LOG.info("Qualify the optimize that reduces input size for 'offset' for offset "
+ + globalLimitCtx.getGlobalOffset());
LOG.info("Qualify the optimize that reduces input size for 'limit' for limit "
+ globalLimitCtx.getGlobalLimit());
}
@@ -143,7 +151,7 @@ public class GlobalLimitOptimizer extends Transform {
* if there is no limit, return 0
* otherwise, return null
*/
- private static Integer checkQbpForGlobalLimit(TableScanOperator ts) {
+ private static LimitOperator checkQbpForGlobalLimit(TableScanOperator ts) {
Set<Class<? extends Operator<?>>> searchedClasses =
new ImmutableSet.Builder<Class<? extends Operator<?>>>()
.add(ReduceSinkOperator.class)
@@ -185,10 +193,10 @@ public class GlobalLimitOptimizer extends Transform {
// Otherwise, return null
Collection<Operator<?>> limitOps = ops.get(LimitOperator.class);
if (limitOps.size() == 1) {
- return ((LimitOperator) limitOps.iterator().next()).getConf().getLimit();
+ return (LimitOperator) limitOps.iterator().next();
}
else if (limitOps.size() == 0) {
- return 0;
+ return null;
}
return null;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
index 5a55061..4ca2d7d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
/**
* Make RS calculate top-K selection for limit clause.
@@ -130,7 +131,9 @@ public class LimitPushdownOptimizer extends Transform {
return false;
}
LimitOperator limit = (LimitOperator) nd;
- rs.getConf().setTopN(limit.getConf().getLimit());
+ LimitDesc limitDesc = limit.getConf();
+ Integer offset = limitDesc.getOffset();
+ rs.getConf().setTopN(limitDesc.getLimit() + ((offset == null) ? 0 : offset));
rs.getConf().setTopNMemoryUsage(((LimitPushdownContext) procCtx).threshold);
if (rs.getNumChild() == 1 && rs.getChildren().get(0) instanceof GroupByOperator) {
rs.getConf().setMapGroupBy(true);
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
index 0c8728d..6ed2914 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
@@ -83,6 +83,10 @@ public class HiveSortLimit extends Sort implements HiveRelNode {
return fetch;
}
+ public RexNode getOffsetExpr() {
+ return offset;
+ }
+
public void setInputRefToCallMap(ImmutableMap<Integer, RexNode> refToCall) {
this.mapOfInputRefToRexCall = refToCall;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
index 1f5d919..425514d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
@@ -128,9 +128,10 @@ class ASTBuilder {
return ASTBuilder.construct(HiveParser.TOK_HAVING, "TOK_HAVING").add(cond).node();
}
- static ASTNode limit(Object value) {
+ static ASTNode limit(Object offset, Object limit) {
return ASTBuilder.construct(HiveParser.TOK_LIMIT, "TOK_LIMIT")
- .add(HiveParser.Number, value.toString()).node();
+ .add(HiveParser.Number, offset.toString())
+ .add(HiveParser.Number, limit.toString()).node();
}
static ASTNode selectExpr(ASTNode expr, String alias) {
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index d026e58..3f2267d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -251,10 +251,13 @@ public class ASTConverter {
hiveAST.order = orderAst;
}
- RexNode limitExpr = hiveSortLimit.getFetchExpr();
- if (limitExpr != null) {
- Object val = ((RexLiteral) limitExpr).getValue2();
- hiveAST.limit = ASTBuilder.limit(val);
+ RexNode offsetExpr = hiveSortLimit.getOffsetExpr();
+ RexNode fetchExpr = hiveSortLimit.getFetchExpr();
+ if (fetchExpr != null) {
+ Object offset = (offsetExpr == null) ?
+ new Integer(0) : ((RexLiteral) offsetExpr).getValue2();
+ Object fetch = ((RexLiteral) fetchExpr).getValue2();
+ hiveAST.limit = ASTBuilder.limit(offset, fetch);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 20945a0..0ca6949 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -26,6 +26,7 @@ import java.util.BitSet;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
+import java.util.AbstractMap.SimpleEntry;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
@@ -2440,13 +2441,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
HiveRelNode sortRel = null;
QBParseInfo qbp = getQBParseInfo(qb);
- Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next());
-
- if (limit != null) {
- RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit));
+ SimpleEntry<Integer,Integer> entry =
+ qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next());
+ Integer offset = (entry == null) ? 0 : entry.getKey();
+ Integer fetch = (entry == null) ? null : entry.getValue();
+
+ if (fetch != null) {
+ RexNode offsetRN = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(offset));
+ RexNode fetchRN = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(fetch));
RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION);
RelCollation canonizedCollation = traitSet.canonize(RelCollations.EMPTY);
- sortRel = new HiveSortLimit(cluster, traitSet, srcRel, canonizedCollation, null, fetch);
+ sortRel = new HiveSortLimit(cluster, traitSet, srcRel, canonizedCollation, offsetRN, fetchRN);
RowResolver outputRR = new RowResolver();
if (!RowResolver.add(outputRR, relToHiveRR.get(srcRel))) {
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
index 6cd636c..c37f9ce 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
@@ -27,6 +27,7 @@ public class GlobalLimitCtx {
private boolean enable;
private int globalLimit;
+ private int globalOffset;
private boolean hasTransformOrUDTF;
private LimitDesc lastReduceLimitDesc;
@@ -38,6 +39,10 @@ public class GlobalLimitCtx {
return globalLimit;
}
+ public int getGlobalOffset() {
+ return globalOffset;
+ }
+
public boolean ifHasTransformOrUDTF() {
return hasTransformOrUDTF;
}
@@ -58,20 +63,23 @@ public class GlobalLimitCtx {
return enable;
}
- public void enableOpt(int globalLimit) {
+ public void enableOpt(int globalLimit, int globalOffset) {
this.enable = true;
this.globalLimit = globalLimit;
+ this.globalOffset = globalOffset;
}
public void disableOpt() {
this.enable = false;
this.globalLimit = -1;
+ this.globalOffset = 0;
this.lastReduceLimitDesc = null;
}
public void reset() {
enable = false;
globalLimit = -1;
+ globalOffset = 0;
hasTransformOrUDTF = false;
lastReduceLimitDesc = null;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
index 395152f..1c72b1c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
@@ -185,6 +185,7 @@ KW_DEFERRED: 'DEFERRED';
KW_SERDEPROPERTIES: 'SERDEPROPERTIES';
KW_DBPROPERTIES: 'DBPROPERTIES';
KW_LIMIT: 'LIMIT';
+KW_OFFSET: 'OFFSET';
KW_SET: 'SET';
KW_UNSET: 'UNSET';
KW_TBLPROPERTIES: 'TBLPROPERTIES';
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index f6ea2a3..d5051ce 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -249,6 +249,7 @@ TOK_INDEXPROPERTIES;
TOK_INDEXPROPLIST;
TOK_TABTYPE;
TOK_LIMIT;
+TOK_OFFSET;
TOK_TABLEPROPERTY;
TOK_IFEXISTS;
TOK_IFNOTEXISTS;
@@ -497,6 +498,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
xlateMap.put("KW_WITH", "WITH");
xlateMap.put("KW_SERDEPROPERTIES", "SERDEPROPERTIES");
xlateMap.put("KW_LIMIT", "LIMIT");
+ xlateMap.put("KW_OFFSET", "OFFSET");
xlateMap.put("KW_SET", "SET");
xlateMap.put("KW_PROPERTIES", "TBLPROPERTIES");
xlateMap.put("KW_VALUE_TYPE", "\$VALUE\$");
@@ -2362,7 +2364,8 @@ limitClause
@init { pushMsg("limit clause", state); }
@after { popMsg(state); }
:
- KW_LIMIT num=Number -> ^(TOK_LIMIT $num)
+ KW_LIMIT ((offset=Number COMMA)? num=Number) -> ^(TOK_LIMIT ($offset)? $num)
+ | KW_LIMIT num=Number KW_OFFSET offset=Number -> ^(TOK_LIMIT ($offset)? $num)
;
//DELETE FROM <tableName> WHERE ...;
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index bac0d22..15ca754 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -653,7 +653,7 @@ nonReserved
| KW_ENABLE | KW_ESCAPED | KW_EXCLUSIVE | KW_EXPLAIN | KW_EXPORT | KW_FIELDS | KW_FILE | KW_FILEFORMAT
| KW_FIRST | KW_FORMAT | KW_FORMATTED | KW_FUNCTIONS | KW_HOLD_DDLTIME | KW_HOUR | KW_IDXPROPERTIES | KW_IGNORE
| KW_INDEX | KW_INDEXES | KW_INPATH | KW_INPUTDRIVER | KW_INPUTFORMAT | KW_ITEMS | KW_JAR
- | KW_KEYS | KW_KEY_TYPE | KW_LIMIT | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG
+ | KW_KEYS | KW_KEY_TYPE | KW_LIMIT | KW_OFFSET | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG
| KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_OFFLINE
| KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY
| KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java
index d41253f..5b08ed2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java
@@ -215,7 +215,8 @@ public class MapReduceCompiler extends TaskCompiler {
//
long sizePerRow = HiveConf.getLongVar(conf,
HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
- estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow;
+ estimatedInput = (globalLimitCtx.getGlobalOffset() +
+ globalLimitCtx.getGlobalLimit()) * sizePerRow;
long minSplitSize = HiveConf.getLongVar(conf,
HiveConf.ConfVars.MAPREDMINSPLITSIZE);
long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
index 186c2a8..3a226e7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
@@ -26,6 +26,7 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.AbstractMap.SimpleEntry;
import org.antlr.runtime.tree.Tree;
import org.slf4j.Logger;
@@ -99,7 +100,10 @@ public class QBParseInfo {
/* Order by clause */
private final HashMap<String, ASTNode> destToOrderby;
- private final HashMap<String, Integer> destToLimit;
+ // Use SimpleEntry to save the offset and rowcount of limit clause
+ // KEY of SimpleEntry: offset
+ // VALUE of SimpleEntry: rowcount
+ private final HashMap<String, SimpleEntry<Integer, Integer>> destToLimit;
private int outerQueryLimit;
// used by GroupBy
@@ -128,7 +132,7 @@ public class QBParseInfo {
destToDistributeby = new HashMap<String, ASTNode>();
destToSortby = new HashMap<String, ASTNode>();
destToOrderby = new HashMap<String, ASTNode>();
- destToLimit = new HashMap<String, Integer>();
+ destToLimit = new HashMap<String, SimpleEntry<Integer, Integer>>();
insertIntoTables = new HashSet<String>();
destRollups = new HashSet<String>();
destCubes = new HashSet<String>();
@@ -440,12 +444,16 @@ public class QBParseInfo {
exprToColumnAlias.put(expr, alias);
}
- public void setDestLimit(String dest, Integer limit) {
- destToLimit.put(dest, limit);
+ public void setDestLimit(String dest, Integer offset, Integer limit) {
+ destToLimit.put(dest, new SimpleEntry<>(offset, limit));
}
public Integer getDestLimit(String dest) {
- return destToLimit.get(dest);
+ return destToLimit.get(dest) == null ? null : destToLimit.get(dest).getValue();
+ }
+
+ public Integer getDestLimitOffset(String dest) {
+ return destToLimit.get(dest) == null ? 0 : destToLimit.get(dest).getKey();
}
/**
@@ -566,7 +574,7 @@ public class QBParseInfo {
return tableSpecs.get(tName.next());
}
- public HashMap<String, Integer> getDestToLimit() {
+ public HashMap<String, SimpleEntry<Integer,Integer>> getDestToLimit() {
return destToLimit;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 5803a9c..b888860 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -1328,7 +1328,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
break;
case HiveParser.TOK_LIMIT:
- qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText()));
+ if (ast.getChildCount() == 2) {
+ qbp.setDestLimit(ctx_1.dest,
+ new Integer(ast.getChild(0).getText()),
+ new Integer(ast.getChild(1).getText()));
+ } else {
+ qbp.setDestLimit(ctx_1.dest, new Integer(0),
+ new Integer(ast.getChild(0).getText()));
+ }
break;
case HiveParser.TOK_ANALYZE:
@@ -6801,7 +6808,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
@SuppressWarnings("nls")
- private Operator genLimitPlan(String dest, QB qb, Operator input, int limit)
+ private Operator genLimitPlan(String dest, QB qb, Operator input, int offset, int limit)
throws SemanticException {
// A map-only job can be optimized - instead of converting it to a
// map-reduce job, we can have another map
@@ -6812,7 +6819,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
- LimitDesc limitDesc = new LimitDesc(limit);
+ LimitDesc limitDesc = new LimitDesc(offset, limit);
globalLimitCtx.setLastReduceLimitDesc(limitDesc);
Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild(
@@ -6922,14 +6929,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
@SuppressWarnings("nls")
private Operator genLimitMapRedPlan(String dest, QB qb, Operator input,
- int limit, boolean extraMRStep) throws SemanticException {
+ int offset, int limit, boolean extraMRStep) throws SemanticException {
// A map-only job can be optimized - instead of converting it to a
// map-reduce job, we can have another map
// job to do the same to avoid the cost of sorting in the map-reduce phase.
// A better approach would be to
// write into a local file and then have a map-only job.
// Add the limit operator to get the value fields
- Operator curr = genLimitPlan(dest, qb, input, limit);
+ Operator curr = genLimitPlan(dest, qb, input, offset, limit);
// the client requested that an extra map-reduce step be performed
if (!extraMRStep) {
@@ -6938,7 +6945,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// Create a reduceSink operator followed by another limit
curr = genReduceSinkPlan(dest, qb, curr, 1, false);
- return genLimitPlan(dest, qb, curr, limit);
+ return genLimitPlan(dest, qb, curr, offset, limit);
}
private ArrayList<ExprNodeDesc> getPartitionColsFromBucketCols(String dest, QB qb, Table tab,
@@ -8871,6 +8878,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
curr = genSelectPlan(dest, qb, curr, gbySource);
Integer limit = qbp.getDestLimit(dest);
+ Integer offset = (qbp.getDestLimitOffset(dest) == null) ? 0 : qbp.getDestLimitOffset(dest);
// Expressions are not supported currently without a alias.
@@ -8915,7 +8923,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (limit != null) {
// In case of order by, only 1 reducer is used, so no need of
// another shuffle
- curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), !hasOrderBy);
+ curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(),
+ limit.intValue(), !hasOrderBy);
}
} else {
// exact limit can be taken care of by the fetch operator
@@ -8928,8 +8937,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
extraMRStep = false;
}
- curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(),
- extraMRStep);
+ curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(),
+ limit.intValue(), extraMRStep);
qb.getParseInfo().setOuterQueryLimit(limit.intValue());
}
if (!SessionState.get().getHiveOperation().equals(HiveOperation.CREATEVIEW)) {
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java
index f88bf63..8448a41 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.plan.Explain.Level;
@Explain(displayName = "Limit", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public class LimitDesc extends AbstractOperatorDesc {
private static final long serialVersionUID = 1L;
+ private int offset = 0;
private int limit;
private int leastRows = -1;
@@ -37,6 +38,24 @@ public class LimitDesc extends AbstractOperatorDesc {
this.limit = limit;
}
+ public LimitDesc(final int offset, final int limit) {
+ this.offset = offset;
+ this.limit = limit;
+ }
+
+ /**
+ * not to print the offset if it is 0 we need to turn null.
+ * use Integer instead of int.
+ */
+ @Explain(displayName = "Offset of rows", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
+ public Integer getOffset() {
+ return (offset == 0) ? null : new Integer(offset);
+ }
+
+ public void setOffset(Integer offset) {
+ this.offset = offset;
+ }
+
@Explain(displayName = "Number of rows", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public int getLimit() {
return limit;
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/test/queries/clientpositive/offset_limit.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/offset_limit.q b/ql/src/test/queries/clientpositive/offset_limit.q
new file mode 100644
index 0000000..80d559a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/offset_limit.q
@@ -0,0 +1,25 @@
+EXPLAIN
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300;
+
+set hive.cbo.enable=false;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q b/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q
new file mode 100644
index 0000000..5ddb9a6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q
@@ -0,0 +1,45 @@
+set hive.limit.optimize.enable=true;
+set hive.limit.row.max.size=12;
+set hive.mapred.mode=nonstrict;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600;
+
+set hive.cbo.enable=false;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q b/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q
new file mode 100644
index 0000000..2895203
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q
@@ -0,0 +1,80 @@
+set hive.explain.user=false;
+set hive.limit.pushdown.memory.usage=0.3f;
+set hive.optimize.reducededuplication.min.reducer=1;
+
+explain
+select key,value from src order by key limit 10,20;
+select key,value from src order by key limit 10,20;
+
+explain
+select key,value from src order by key desc limit 10,20;
+select key,value from src order by key desc limit 10,20;
+
+explain
+select value, sum(key + 1) as sum from src group by value order by value limit 10,20;
+select value, sum(key + 1) as sum from src group by value order by value limit 10,20;
+
+-- deduped RS
+explain
+select value,avg(key + 1) from src group by value order by value limit 10,20;
+select value,avg(key + 1) from src group by value order by value limit 10,20;
+
+-- distincts
+explain
+select distinct(cdouble) as dis from alltypesorc order by dis limit 10,20;
+select distinct(cdouble) as dis from alltypesorc order by dis limit 10,20;
+
+explain
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 10,20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 10,20;
+
+explain
+select ctinyint, count(cdouble) from (select ctinyint, cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint limit 10,20;
+select ctinyint, count(cdouble) from (select ctinyint, cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint limit 10,20;
+
+-- multi distinct
+explain
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint limit 10,20;
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint limit 10,20;
+
+-- limit zero
+explain
+select key,value from src order by key limit 0,0;
+select key,value from src order by key limit 0,0;
+
+-- 2MR (applied to last RS)
+explain
+select value, sum(key) as sum from src group by value order by sum limit 10,20;
+select value, sum(key) as sum from src group by value order by sum limit 10,20;
+
+set hive.map.aggr=false;
+-- map aggregation disabled
+explain
+select value, sum(key) as sum from src group by value order by value limit 10,20;
+select value, sum(key) as sum from src group by value order by value limit 10,20;
+
+set hive.limit.pushdown.memory.usage=0.00002f;
+
+-- flush for order-by
+explain
+select key,value,value,value,value,value,value,value,value from src order by key limit 30,70;
+select key,value,value,value,value,value,value,value,value from src order by key limit 30,70;
+
+-- flush for group-by
+explain
+select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 30,70;
+select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 30,70;
+
+-- subqueries
+explain
+select * from
+(select key, count(1) from src group by key order by key limit 10,20) subq
+join
+(select key, count(1) from src group by key limit 20,20) subq2
+on subq.key=subq2.key limit 3,5;
+select * from
+(select key, count(1) from src group by key order by key limit 10,20) subq
+join
+(select key, count(1) from src group by key order by key limit 20,20) subq2
+on subq.key=subq2.key limit 3,5;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/test/queries/clientpositive/vectorization_offset_limit.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorization_offset_limit.q b/ql/src/test/queries/clientpositive/vectorization_offset_limit.q
new file mode 100644
index 0000000..3d01154
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vectorization_offset_limit.q
@@ -0,0 +1,10 @@
+set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=true;
+set hive.mapred.mode=nonstrict;
+
+explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2;
+SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2;
+
+explain
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3;
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/test/results/clientpositive/offset_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/offset_limit.q.out b/ql/src/test/results/clientpositive/offset_limit.q.out
new file mode 100644
index 0000000..2092c1d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/offset_limit.q.out
@@ -0,0 +1,257 @@
+PREHOOK: query: EXPLAIN
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(substr(value, 5))
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: double)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Offset of rows: 10
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+116 116.0
+118 236.0
+119 357.0
+12 24.0
+120 240.0
+125 250.0
+126 126.0
+128 384.0
+129 258.0
+131 131.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+116 116.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+116 116.0
+118 236.0
+119 357.0
+12 24.0
+120 240.0
+125 250.0
+126 126.0
+128 384.0
+129 258.0
+131 131.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+116 116.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0