You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2015/01/24 01:16:37 UTC
svn commit: r1654439 [1/10] - in /hive/trunk: itests/src/test/resources/
itests/util/src/main/java/org/apache/hadoop/hive/ql/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/
ql/src/java/org/apache...
Author: hashutosh
Date: Sat Jan 24 00:16:36 2015
New Revision: 1654439
URL: http://svn.apache.org/r1654439
Log:
HIVE-9341 : Apply ColumnPrunning for noop PTFs (Navis via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/test/results/clientpositive/tez/ptf_streaming.q.out
Modified:
hive/trunk/itests/src/test/resources/testconfiguration.properties
hive/trunk/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
hive/trunk/ql/src/test/queries/clientpositive/ptf.q
hive/trunk/ql/src/test/queries/clientpositive/ptf_streaming.q
hive/trunk/ql/src/test/results/clientpositive/ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/ptf_streaming.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/ptf_streaming.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorized_ptf.q.out
Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1654439&r1=1654438&r2=1654439&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Sat Jan 24 00:16:36 2015
@@ -139,6 +139,7 @@ minitez.query.files.shared=alter_merge_2
orc_vectorization_ppd.q,\
parallel.q,\
ptf.q,\
+ ptf_streaming.q,\
sample1.q,\
selectDistinctStar.q,\
script_env_var1.q,\
Modified: hive/trunk/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java?rev=1654439&r1=1654438&r2=1654439&view=diff
==============================================================================
--- hive/trunk/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java (original)
+++ hive/trunk/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java Sat Jan 24 00:16:36 2015
@@ -976,7 +976,9 @@ public class QTestUtil {
}
command = "";
}
-
+ if (SessionState.get() != null) {
+ SessionState.get().setLastCommand(null); // reset
+ }
return rc;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1654439&r1=1654438&r2=1654439&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Sat Jan 24 00:16:36 2015
@@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.plan.Ta
import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
+import org.apache.hadoop.hive.ql.udf.ptf.Noop;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@@ -266,26 +267,28 @@ public final class ColumnPrunerProcFacto
PTFDesc conf = op.getConf();
//Since we cannot know what columns will be needed by a PTF chain,
//we do not prune columns on PTFOperator for PTF chains.
- if (!conf.forWindowing()) {
+ if (!conf.forWindowing() && !Noop.class.isInstance(conf.getFuncDef().getTFunction())) {
return super.process(nd, stack, cppCtx, nodeOutputs);
}
-
- WindowTableFunctionDef def = (WindowTableFunctionDef) conf.getFuncDef();
- ArrayList<ColumnInfo> sig = new ArrayList<ColumnInfo>();
-
List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0));
- //we create a copy of prunedCols to create a list of pruned columns for PTFOperator
- prunedCols = new ArrayList<String>(prunedCols);
- prunedColumnsList(prunedCols, def);
RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
+
+ WindowTableFunctionDef def = null;
+ if (conf.forWindowing()) {
+ def = (WindowTableFunctionDef) conf.getFuncDef();
+ prunedCols = prunedColumnsList(prunedCols, def);
+ }
+ ArrayList<ColumnInfo> sig = new ArrayList<ColumnInfo>();
RowResolver newRR = buildPrunedRR(prunedCols, oldRR, sig);
- cppCtx.getPrunedColLists().put(op, prunedInputList(prunedCols, def));
cppCtx.getOpToParseCtxMap().get(op).setRowResolver(newRR);
op.getSchema().setSignature(sig);
+
+ prunedCols = def == null ? prunedCols : prunedInputList(prunedCols, def);
+ cppCtx.getPrunedColLists().put(op, prunedCols);
return null;
}
- private static RowResolver buildPrunedRR(List<String> prunedCols,
+ private RowResolver buildPrunedRR(List<String> prunedCols,
RowResolver oldRR, ArrayList<ColumnInfo> sig) throws SemanticException{
RowResolver newRR = new RowResolver();
HashSet<String> prunedColsSet = new HashSet<String>(prunedCols);
@@ -302,7 +305,10 @@ public final class ColumnPrunerProcFacto
/*
* add any input columns referenced in WindowFn args or expressions.
*/
- private void prunedColumnsList(List<String> prunedCols, WindowTableFunctionDef tDef) {
+ private ArrayList<String> prunedColumnsList(List<String> prunedCols,
+ WindowTableFunctionDef tDef) {
+ //we create a copy of prunedCols to create a list of pruned columns for PTFOperator
+ ArrayList<String> mergedColList = new ArrayList<String>(prunedCols);
if ( tDef.getWindowFunctions() != null ) {
for(WindowFunctionDef wDef : tDef.getWindowFunctions() ) {
if ( wDef.getArgs() == null) {
@@ -310,22 +316,23 @@ public final class ColumnPrunerProcFacto
}
for(PTFExpressionDef arg : wDef.getArgs()) {
ExprNodeDesc exprNode = arg.getExprNode();
- Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+ Utilities.mergeUniqElems(mergedColList, exprNode.getCols());
}
}
}
if(tDef.getPartition() != null){
for(PTFExpressionDef col : tDef.getPartition().getExpressions()){
ExprNodeDesc exprNode = col.getExprNode();
- Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+ Utilities.mergeUniqElems(mergedColList, exprNode.getCols());
}
}
if(tDef.getOrder() != null){
for(PTFExpressionDef col : tDef.getOrder().getExpressions()){
ExprNodeDesc exprNode = col.getExprNode();
- Utilities.mergeUniqElems(prunedCols, exprNode.getCols());
+ Utilities.mergeUniqElems(mergedColList, exprNode.getCols());
}
}
+ return mergedColList;
}
/*
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java?rev=1654439&r1=1654438&r2=1654439&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java Sat Jan 24 00:16:36 2015
@@ -99,7 +99,7 @@ public class SparkSkewJoinProcFactory {
private static void splitTask(SparkTask currentTask, ReduceWork reduceWork,
ParseContext parseContext) throws SemanticException {
SparkWork currentWork = currentTask.getWork();
- Set<Operator<? extends OperatorDesc>> reduceSinkSet =
+ Set<Operator<?>> reduceSinkSet =
SparkMapJoinResolver.getOp(reduceWork, ReduceSinkOperator.class);
if (currentWork.getChildren(reduceWork).size() == 1 && canSplit(currentWork)
&& reduceSinkSet.size() == 1) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java?rev=1654439&r1=1654438&r2=1654439&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java Sat Jan 24 00:16:36 2015
@@ -134,7 +134,9 @@ public class PredicatePushDown implement
topNodes.addAll(pGraphContext.getTopOps().values());
ogw.startWalking(topNodes, null);
- LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values()));
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values()));
+ }
return pGraphContext;
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/ptf.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ptf.q?rev=1654439&r1=1654438&r2=1654439&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ptf.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ptf.q Sat Jan 24 00:16:36 2015
@@ -1,6 +1,16 @@
-- SORT_QUERY_RESULTS
--1. test1
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noop(on part
+ partition by p_mfgr
+ order by p_name
+ );
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -11,6 +21,14 @@ from noop(on part
);
-- 2. testJoinWithNoop
+explain
+select p_mfgr, p_name,
+p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
+from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j
+distribute by j.p_mfgr
+sort by j.p_name)
+;
+
select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j
@@ -19,12 +37,28 @@ sort by j.p_name)
;
-- 3. testOnlyPTF
+explain
+select p_mfgr, p_name, p_size
+from noop(on part
+partition by p_mfgr
+order by p_name);
+
select p_mfgr, p_name, p_size
from noop(on part
partition by p_mfgr
order by p_name);
-- 4. testPTFAlias
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noop(on part
+ partition by p_mfgr
+ order by p_name
+ ) abc;
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -35,6 +69,17 @@ from noop(on part
) abc;
-- 5. testPTFAndWhereWithWindowing
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
+from noop(on part
+ partition by p_mfgr
+ order by p_name
+ )
+;
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -46,6 +91,18 @@ from noop(on part
;
-- 6. testSWQAndPTFAndGBy
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
+from noop(on part
+ partition by p_mfgr
+ order by p_name
+ )
+group by p_mfgr, p_name, p_size
+;
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -58,6 +115,13 @@ group by p_mfgr, p_name, p_size
;
-- 7. testJoin
+explain
+select abc.*
+from noop(on part
+partition by p_mfgr
+order by p_name
+) abc join part p1 on abc.p_partkey = p1.p_partkey;
+
select abc.*
from noop(on part
partition by p_mfgr
@@ -65,6 +129,13 @@ order by p_name
) abc join part p1 on abc.p_partkey = p1.p_partkey;
-- 8. testJoinRight
+explain
+select abc.*
+from part p1 join noop(on part
+partition by p_mfgr
+order by p_name
+) abc on abc.p_partkey = p1.p_partkey;
+
select abc.*
from part p1 join noop(on part
partition by p_mfgr
@@ -72,6 +143,13 @@ order by p_name
) abc on abc.p_partkey = p1.p_partkey;
-- 9. testNoopWithMap
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name, p_size desc) as r
+from noopwithmap(on part
+partition by p_mfgr
+order by p_name, p_size desc);
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part
@@ -79,6 +157,15 @@ partition by p_mfgr
order by p_name, p_size desc);
-- 10. testNoopWithMapWithWindowing
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noopwithmap(on part
+ partition by p_mfgr
+ order by p_name);
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -88,6 +175,16 @@ from noopwithmap(on part
order by p_name);
-- 11. testHavingWithWindowingPTFNoGBY
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noop(on part
+partition by p_mfgr
+order by p_name)
+;
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -98,6 +195,16 @@ order by p_name)
;
-- 12. testFunctionChain
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noop(on noopwithmap(on noop(on part
+partition by p_mfgr
+order by p_mfgr, p_name
+)));
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -108,6 +215,19 @@ order by p_mfgr, p_name
)));
-- 13. testPTFAndWindowingInSubQ
+explain
+select p_mfgr, p_name,
+sub1.cd, sub1.s1
+from (select p_mfgr, p_name,
+count(p_size) over (partition by p_mfgr order by p_name) as cd,
+p_retailprice,
+sum(p_retailprice) over w1 as s1
+from noop(on part
+partition by p_mfgr
+order by p_name)
+window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following)
+) sub1 ;
+
select p_mfgr, p_name,
sub1.cd, sub1.s1
from (select p_mfgr, p_name,
@@ -121,6 +241,19 @@ window w1 as (partition by p_mfgr order
) sub1 ;
-- 14. testPTFJoinWithWindowingWithCount
+explain
+select abc.p_mfgr, abc.p_name,
+rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r,
+dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr,
+count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd,
+abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1,
+abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
+from noop(on part
+partition by p_mfgr
+order by p_name
+) abc join part p1 on abc.p_partkey = p1.p_partkey
+;
+
select abc.p_mfgr, abc.p_name,
rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r,
dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr,
@@ -134,6 +267,12 @@ order by p_name
;
-- 15. testDistinctInSelectWithPTF
+explain
+select DISTINCT p_mfgr, p_name, p_size
+from noop(on part
+partition by p_mfgr
+order by p_name);
+
select DISTINCT p_mfgr, p_name, p_size
from noop(on part
partition by p_mfgr
@@ -147,6 +286,14 @@ sum(p_retailprice) as s
from part
group by p_mfgr, p_brand;
+explain
+select p_mfgr, p_brand, s,
+sum(s) over w1 as s1
+from noop(on mfgr_price_view
+partition by p_mfgr
+order by p_mfgr)
+window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row);
+
select p_mfgr, p_brand, s,
sum(s) over w1 as s1
from noop(on mfgr_price_view
@@ -173,6 +320,22 @@ dr INT,
cud DOUBLE,
fv1 INT);
+explain
+from noop(on part
+partition by p_mfgr
+order by p_name)
+INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size,
+rank() over (distribute by p_mfgr sort by p_name) as r,
+dense_rank() over (distribute by p_mfgr sort by p_name) as dr,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s
+INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size,
+round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2,
+rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r,
+dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr,
+cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud,
+first_value(p_size, true) over w1 as fv1
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
+
from noop(on part
partition by p_mfgr
order by p_name)
@@ -193,6 +356,23 @@ select * from part_4;
select * from part_5;
-- 18. testMulti2OperatorsFunctionChainWithMap
+explain
+select p_mfgr, p_name,
+rank() over (partition by p_mfgr,p_name) as r,
+dense_rank() over (partition by p_mfgr,p_name) as dr,
+p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
+from noop(on
+ noopwithmap(on
+ noop(on
+ noop(on part
+ partition by p_mfgr
+ order by p_mfgr)
+ )
+ partition by p_mfgr,p_name
+ order by p_mfgr,p_name)
+ partition by p_mfgr,p_name
+ order by p_mfgr,p_name) ;
+
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
@@ -210,6 +390,23 @@ from noop(on
order by p_mfgr,p_name) ;
-- 19. testMulti3OperatorsFunctionChain
+explain
+select p_mfgr, p_name,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noop(on
+ noop(on
+ noop(on
+ noop(on part
+ partition by p_mfgr
+ order by p_mfgr)
+ )
+ partition by p_mfgr,p_name
+ order by p_mfgr,p_name)
+ partition by p_mfgr
+ order by p_mfgr ) ;
+
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -227,6 +424,21 @@ from noop(on
order by p_mfgr ) ;
-- 20. testMultiOperatorChainWithNoWindowing
+explain
+select p_mfgr, p_name,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1
+from noop(on
+ noop(on
+ noop(on
+ noop(on part
+ partition by p_mfgr,p_name
+ order by p_mfgr,p_name)
+ )
+ partition by p_mfgr
+ order by p_mfgr));
+
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -243,6 +455,23 @@ from noop(on
-- 21. testMultiOperatorChainEndsWithNoopMap
+explain
+select p_mfgr, p_name,
+rank() over (partition by p_mfgr,p_name) as r,
+dense_rank() over (partition by p_mfgr,p_name) as dr,
+p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
+from noopwithmap(on
+ noop(on
+ noop(on
+ noop(on part
+ partition by p_mfgr,p_name
+ order by p_mfgr,p_name)
+ )
+ partition by p_mfgr
+ order by p_mfgr)
+ partition by p_mfgr,p_name
+ order by p_mfgr,p_name);
+
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
@@ -260,6 +489,22 @@ from noopwithmap(on
order by p_mfgr,p_name);
-- 22. testMultiOperatorChainWithDiffPartitionForWindow1
+explain
+select p_mfgr, p_name,
+rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r,
+dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr,
+p_size,
+sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1,
+sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2
+from noop(on
+ noopwithmap(on
+ noop(on part
+ partition by p_mfgr, p_name
+ order by p_mfgr, p_name)
+ partition by p_mfgr
+ order by p_mfgr
+ ));
+
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr,
@@ -276,6 +521,20 @@ from noop(on
));
-- 23. testMultiOperatorChainWithDiffPartitionForWindow2
+explain
+select p_mfgr, p_name,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+p_size,
+sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1,
+sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2
+from noopwithmap(on
+ noop(on
+ noop(on part
+ partition by p_mfgr, p_name
+ order by p_mfgr, p_name)
+ ));
+
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
Modified: hive/trunk/ql/src/test/queries/clientpositive/ptf_streaming.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ptf_streaming.q?rev=1654439&r1=1654438&r2=1654439&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ptf_streaming.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ptf_streaming.q Sat Jan 24 00:16:36 2015
@@ -1,8 +1,16 @@
-create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver';
-
-- SORT_QUERY_RESULTS
--1. test1
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noopstreaming(on part
+ partition by p_mfgr
+ order by p_name
+ );
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -12,7 +20,15 @@ from noopstreaming(on part
order by p_name
);
- -- 2. testJoinWithNoop
+-- 2. testJoinWithNoop
+explain
+select p_mfgr, p_name,
+p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
+from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j
+distribute by j.p_mfgr
+sort by j.p_name)
+;
+
select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j
@@ -21,6 +37,13 @@ sort by j.p_name)
;
-- 7. testJoin
+explain
+select abc.*
+from noopstreaming(on part
+partition by p_mfgr
+order by p_name
+) abc join part p1 on abc.p_partkey = p1.p_partkey;
+
select abc.*
from noopstreaming(on part
partition by p_mfgr
@@ -28,6 +51,13 @@ order by p_name
) abc join part p1 on abc.p_partkey = p1.p_partkey;
-- 9. testNoopWithMap
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name, p_size desc) as r
+from noopwithmapstreaming(on part
+partition by p_mfgr
+order by p_name, p_size desc);
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmapstreaming(on part
@@ -35,6 +65,15 @@ partition by p_mfgr
order by p_name, p_size desc);
-- 10. testNoopWithMapWithWindowing
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noopwithmapstreaming(on part
+ partition by p_mfgr
+ order by p_name);
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -44,6 +83,16 @@ from noopwithmapstreaming(on part
order by p_name);
-- 12. testFunctionChain
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part
+partition by p_mfgr
+order by p_mfgr, p_name
+)));
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -54,6 +103,16 @@ order by p_mfgr, p_name
)));
-- 12.1 testFunctionChain
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noopstreaming(on noopwithmap(on noopstreaming(on part
+partition by p_mfgr
+order by p_mfgr, p_name
+)));
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -64,6 +123,16 @@ order by p_mfgr, p_name
)));
-- 12.2 testFunctionChain
+explain
+select p_mfgr, p_name, p_size,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noop(on noopwithmapstreaming(on noopstreaming(on part
+partition by p_mfgr
+order by p_mfgr, p_name
+)));
+
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -74,6 +143,19 @@ order by p_mfgr, p_name
)));
-- 14. testPTFJoinWithWindowingWithCount
+explain
+select abc.p_mfgr, abc.p_name,
+rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r,
+dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr,
+count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd,
+abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1,
+abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
+from noopstreaming(on part
+partition by p_mfgr
+order by p_name
+) abc join part p1 on abc.p_partkey = p1.p_partkey
+;
+
select abc.p_mfgr, abc.p_name,
rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r,
dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr,
@@ -87,6 +169,23 @@ order by p_name
;
-- 18. testMulti2OperatorsFunctionChainWithMap
+explain
+select p_mfgr, p_name,
+rank() over (partition by p_mfgr,p_name) as r,
+dense_rank() over (partition by p_mfgr,p_name) as dr,
+p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
+from noopstreaming(on
+ noopwithmap(on
+ noop(on
+ noopstreaming(on part
+ partition by p_mfgr
+ order by p_mfgr)
+ )
+ partition by p_mfgr,p_name
+ order by p_mfgr,p_name)
+ partition by p_mfgr,p_name
+ order by p_mfgr,p_name) ;
+
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
@@ -104,6 +203,23 @@ from noopstreaming(on
order by p_mfgr,p_name) ;
-- 19. testMulti3OperatorsFunctionChain
+explain
+select p_mfgr, p_name,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
+from noop(on
+ noopstreaming(on
+ noop(on
+ noopstreaming(on part
+ partition by p_mfgr
+ order by p_mfgr)
+ )
+ partition by p_mfgr,p_name
+ order by p_mfgr,p_name)
+ partition by p_mfgr
+ order by p_mfgr ) ;
+
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -121,6 +237,20 @@ from noop(on
order by p_mfgr ) ;
-- 23. testMultiOperatorChainWithDiffPartitionForWindow2
+explain
+select p_mfgr, p_name,
+rank() over (partition by p_mfgr order by p_name) as r,
+dense_rank() over (partition by p_mfgr order by p_name) as dr,
+p_size,
+sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1,
+sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2
+from noopwithmapstreaming(on
+ noop(on
+ noopstreaming(on part
+ partition by p_mfgr, p_name
+ order by p_mfgr, p_name)
+ ));
+
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,