You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/07 17:09:43 UTC
svn commit: r1465408 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
test/queries/clientpositive/windowing_expressions.q
test/results/clientpositive/windowing_expressions.q.out
Author: hashutosh
Date: Sun Apr 7 15:09:42 2013
New Revision: 1465408
URL: http://svn.apache.org/r1465408
Log:
HIVE-4302 : Fix how RowSchema and RowResolver are set on ReduceSinkOp that precedes PTFOp (Harish Butani via Ashutosh Chauhan)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/test/queries/clientpositive/windowing_expressions.q
hive/trunk/ql/src/test/results/clientpositive/windowing_expressions.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1465408&r1=1465407&r2=1465408&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sun Apr 7 15:09:42 2013
@@ -10486,6 +10486,7 @@ public class SemanticAnalyzer extends Ba
Map<String, ExprNodeDesc> colExprMap,
List<String> outputColumnNames,
StringBuilder orderString,
+ RowResolver rsOpRR,
RowResolver extractRR) throws SemanticException {
ArrayList<PTFExpressionDef> partColList = tabDef.getPartition().getExpressions();
@@ -10515,16 +10516,12 @@ public class SemanticAnalyzer extends Ba
orderCols.add(colDef.getExprNode());
}
+ ArrayList<ColumnInfo> colInfoList = inputRR.getColumnInfos();
/*
- * We add the column to value columns or output column names
- * only if it is not a virtual column
+ * construct the ReduceSinkRR
*/
- ArrayList<ColumnInfo> colInfoList = inputRR.getColumnInfos();
- LinkedHashMap<String[], ColumnInfo> colsAddedByHaving =
- new LinkedHashMap<String[], ColumnInfo>();
int pos = 0;
for (ColumnInfo colInfo : colInfoList) {
- if (!colInfo.isHiddenVirtualCol()) {
ExprNodeDesc valueColExpr = new ExprNodeColumnDesc(colInfo.getType(), colInfo
.getInternalName(), colInfo.getTabAlias(), colInfo
.getIsVirtualCol());
@@ -10534,6 +10531,21 @@ public class SemanticAnalyzer extends Ba
outputColumnNames.add(outColName);
String[] alias = inputRR.reverseLookup(colInfo.getInternalName());
+ ColumnInfo newColInfo = new ColumnInfo(
+ outColName, colInfo.getType(), alias[0],
+ colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
+ rsOpRR.put(alias[0], alias[1], newColInfo);
+ }
+
+ /*
+ * construct the ExtractRR
+ */
+ LinkedHashMap<String[], ColumnInfo> colsAddedByHaving =
+ new LinkedHashMap<String[], ColumnInfo>();
+ pos = 0;
+ for (ColumnInfo colInfo : colInfoList) {
+ if (!colInfo.isHiddenVirtualCol()) {
+ String[] alias = inputRR.reverseLookup(colInfo.getInternalName());
/*
* if we have already encountered this colInfo internalName.
* We encounter it again because it must be put for the Having clause.
@@ -10544,7 +10556,7 @@ public class SemanticAnalyzer extends Ba
}
ASTNode astNode = PTFTranslator.getASTNode(colInfo, inputRR);
ColumnInfo eColInfo = new ColumnInfo(
- outColName, colInfo.getType(), alias[0],
+ SemanticAnalyzer.getColumnInternalName(pos++), colInfo.getType(), alias[0],
colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
if ( astNode == null ) {
@@ -10578,6 +10590,8 @@ public class SemanticAnalyzer extends Ba
*/
RowResolver rr = opParseCtx.get(input).getRowResolver();
PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
+
+ RowResolver rsOpRR = new RowResolver();
/*
* Build an RR for the Extract Op from the ResuceSink Op's RR.
* Why?
@@ -10647,13 +10661,14 @@ public class SemanticAnalyzer extends Ba
colExprMap,
outputColumnNames,
orderString,
+ rsOpRR,
extractOpRR);
input = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
.getReduceSinkDesc(orderCols,
valueCols, outputColumnNames, false,
-1, partCols, orderString.toString(), -1),
- new RowSchema(rr.getColumnInfos()), input), rr);
+ new RowSchema(rsOpRR.getColumnInfos()), input), rsOpRR);
input.setColumnExprMap(colExprMap);
}
@@ -10775,7 +10790,7 @@ public class SemanticAnalyzer extends Ba
.getReduceSinkDesc(orderCols,
valueCols, outputColumnNames, false,
-1, partCols, orderString.toString(), -1),
- new RowSchema(inputRR.getColumnInfos()), input), rsNewRR);
+ new RowSchema(rsNewRR.getColumnInfos()), input), rsNewRR);
input.setColumnExprMap(colExprMap);
Modified: hive/trunk/ql/src/test/queries/clientpositive/windowing_expressions.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/windowing_expressions.q?rev=1465408&r1=1465407&r2=1465408&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/windowing_expressions.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/windowing_expressions.q Sun Apr 7 15:09:42 2013
@@ -57,3 +57,10 @@ select s, lag(s, 3, 'fred') over (partit
select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part;
select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part;
+
+-- multi table insert test
+create table t1 (a1 int, b1 string);
+create table t2 (a1 int, b1 string);
+from (select sum(i) over (), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * ;
+select * from t1 limit 3;
+select * from t2 limit 3;
Modified: hive/trunk/ql/src/test/results/clientpositive/windowing_expressions.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/windowing_expressions.q.out?rev=1465408&r1=1465407&r2=1465408&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/windowing_expressions.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/windowing_expressions.q.out Sun Apr 7 15:09:42 2013
@@ -662,3 +662,59 @@ Manufacturer#5 1241.29
Manufacturer#5 1424.0900000000001
Manufacturer#5 1515.25
Manufacturer#5 1534.532
+PREHOOK: query: -- multi table insert test
+create table t1 (a1 int, b1 string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- multi table insert test
+create table t1 (a1 int, b1 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (a1 int, b1 string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table t2 (a1 int, b1 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@t2
+PREHOOK: query: from (select sum(i) over (), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+PREHOOK: Output: default@t1
+PREHOOK: Output: default@t2
+POSTHOOK: query: from (select sum(i) over (), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+POSTHOOK: Output: default@t1
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t1.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+PREHOOK: query: select * from t1 limit 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t1 limit 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t1.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+656584379 bob davidson
+656584379 alice zipper
+656584379 katie davidson
+PREHOOK: query: select * from t2 limit 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t2 limit 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t1.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+656584379 bob davidson
+656584379 alice zipper
+656584379 katie davidson