You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/07 17:09:43 UTC

svn commit: r1465408 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java test/queries/clientpositive/windowing_expressions.q test/results/clientpositive/windowing_expressions.q.out

Author: hashutosh
Date: Sun Apr  7 15:09:42 2013
New Revision: 1465408

URL: http://svn.apache.org/r1465408
Log:
HIVE-4302 : Fix how RowSchema and RowResolver are set on ReduceSinkOp that precedes PTFOp (Harish Butani via Ashutosh Chauhan)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/test/queries/clientpositive/windowing_expressions.q
    hive/trunk/ql/src/test/results/clientpositive/windowing_expressions.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1465408&r1=1465407&r2=1465408&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sun Apr  7 15:09:42 2013
@@ -10486,6 +10486,7 @@ public class SemanticAnalyzer extends Ba
       Map<String, ExprNodeDesc> colExprMap,
       List<String> outputColumnNames,
       StringBuilder orderString,
+      RowResolver rsOpRR,
       RowResolver extractRR) throws SemanticException {
 
     ArrayList<PTFExpressionDef> partColList = tabDef.getPartition().getExpressions();
@@ -10515,16 +10516,12 @@ public class SemanticAnalyzer extends Ba
       orderCols.add(colDef.getExprNode());
     }
 
+    ArrayList<ColumnInfo> colInfoList = inputRR.getColumnInfos();
     /*
-     * We add the column to value columns or output column names
-     * only if it is not a virtual column
+     * construct the ReduceSinkRR
      */
-    ArrayList<ColumnInfo> colInfoList = inputRR.getColumnInfos();
-    LinkedHashMap<String[], ColumnInfo> colsAddedByHaving =
-        new LinkedHashMap<String[], ColumnInfo>();
     int pos = 0;
     for (ColumnInfo colInfo : colInfoList) {
-      if (!colInfo.isHiddenVirtualCol()) {
         ExprNodeDesc valueColExpr = new ExprNodeColumnDesc(colInfo.getType(), colInfo
             .getInternalName(), colInfo.getTabAlias(), colInfo
             .getIsVirtualCol());
@@ -10534,6 +10531,21 @@ public class SemanticAnalyzer extends Ba
         outputColumnNames.add(outColName);
 
         String[] alias = inputRR.reverseLookup(colInfo.getInternalName());
+        ColumnInfo newColInfo = new ColumnInfo(
+            outColName, colInfo.getType(), alias[0],
+            colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
+        rsOpRR.put(alias[0], alias[1], newColInfo);
+    }
+
+    /*
+     * construct the ExtractRR
+     */
+    LinkedHashMap<String[], ColumnInfo> colsAddedByHaving =
+        new LinkedHashMap<String[], ColumnInfo>();
+    pos = 0;
+    for (ColumnInfo colInfo : colInfoList) {
+      if (!colInfo.isHiddenVirtualCol()) {
+        String[] alias = inputRR.reverseLookup(colInfo.getInternalName());
         /*
          * if we have already encountered this colInfo internalName.
          * We encounter it again because it must be put for the Having clause.
@@ -10544,7 +10556,7 @@ public class SemanticAnalyzer extends Ba
         }
         ASTNode astNode = PTFTranslator.getASTNode(colInfo, inputRR);
         ColumnInfo eColInfo = new ColumnInfo(
-            outColName, colInfo.getType(), alias[0],
+            SemanticAnalyzer.getColumnInternalName(pos++), colInfo.getType(), alias[0],
             colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
 
         if ( astNode == null ) {
@@ -10578,6 +10590,8 @@ public class SemanticAnalyzer extends Ba
      */
     RowResolver rr = opParseCtx.get(input).getRowResolver();
     PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
+
+    RowResolver rsOpRR = new RowResolver();
     /*
      * Build an RR for the Extract Op from the ResuceSink Op's RR.
      * Why?
@@ -10647,13 +10661,14 @@ public class SemanticAnalyzer extends Ba
           colExprMap,
           outputColumnNames,
           orderString,
+          rsOpRR,
           extractOpRR);
 
       input = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
           .getReduceSinkDesc(orderCols,
               valueCols, outputColumnNames, false,
               -1, partCols, orderString.toString(), -1),
-          new RowSchema(rr.getColumnInfos()), input), rr);
+          new RowSchema(rsOpRR.getColumnInfos()), input), rsOpRR);
       input.setColumnExprMap(colExprMap);
     }
 
@@ -10775,7 +10790,7 @@ public class SemanticAnalyzer extends Ba
         .getReduceSinkDesc(orderCols,
             valueCols, outputColumnNames, false,
             -1, partCols, orderString.toString(), -1),
-        new RowSchema(inputRR.getColumnInfos()), input), rsNewRR);
+        new RowSchema(rsNewRR.getColumnInfos()), input), rsNewRR);
     input.setColumnExprMap(colExprMap);
 
 

Modified: hive/trunk/ql/src/test/queries/clientpositive/windowing_expressions.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/windowing_expressions.q?rev=1465408&r1=1465407&r2=1465408&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/windowing_expressions.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/windowing_expressions.q Sun Apr  7 15:09:42 2013
@@ -57,3 +57,10 @@ select s, lag(s, 3, 'fred') over (partit
 select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part;
 
 select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part;
+
+-- multi table insert test
+create table t1 (a1 int, b1 string); 
+create table t2 (a1 int, b1 string);
+from (select sum(i) over (), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * ;
+select * from t1 limit 3;
+select * from t2 limit 3;

Modified: hive/trunk/ql/src/test/results/clientpositive/windowing_expressions.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/windowing_expressions.q.out?rev=1465408&r1=1465407&r2=1465408&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/windowing_expressions.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/windowing_expressions.q.out Sun Apr  7 15:09:42 2013
@@ -662,3 +662,59 @@ Manufacturer#5	1241.29
 Manufacturer#5	1424.0900000000001
 Manufacturer#5	1515.25
 Manufacturer#5	1534.532
+PREHOOK: query: -- multi table insert test
+create table t1 (a1 int, b1 string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- multi table insert test
+create table t1 (a1 int, b1 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (a1 int, b1 string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table t2 (a1 int, b1 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@t2
+PREHOOK: query: from (select sum(i) over (), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+PREHOOK: Output: default@t1
+PREHOOK: Output: default@t2
+POSTHOOK: query: from (select sum(i) over (), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+POSTHOOK: Output: default@t1
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t1.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+PREHOOK: query: select * from t1 limit 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t1 limit 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t1.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+656584379	bob davidson
+656584379	alice zipper
+656584379	katie davidson
+PREHOOK: query: select * from t2 limit 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t2 limit 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t1.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: t2.b1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal, comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), (over10k)over10k.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), (over10k)over10k.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+656584379	bob davidson
+656584379	alice zipper
+656584379	katie davidson