You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by nz...@apache.org on 2010/04/23 03:07:13 UTC

svn commit: r937123 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java ql/src/test/queries/clientpositive/lineage1.q ql/src/test/results/clientpositive/lineage1.q.out

Author: nzhang
Date: Fri Apr 23 01:07:12 2010
New Revision: 937123

URL: http://svn.apache.org/viewvc?rev=937123&view=rev
Log:
HIVE-1320. NPE with lineage in a query of union alls on unions (Ashish Thusoo via Ning Zhang)

Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/lineage1.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=937123&r1=937122&r2=937123&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Apr 23 01:07:12 2010
@@ -387,6 +387,9 @@ Trunk -  Unreleased
     HIVE-1316. Increase hive client side memory
     (Ning Zhang via namit)
 
+    HIVE-1320. NPE with lineage in a query of union all on joins
+    (Ashish Thusoo via Ning Zhang)
+
 Release 0.5.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java?rev=937123&r1=937122&r2=937123&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java Fri Apr 23 01:07:12 2010
@@ -393,8 +393,10 @@ public class OpProcFactory {
       ArrayList<ColumnInfo> inp_cols = inpOp.getSchema().getSignature();
       int cnt = 0;
       for(ColumnInfo ci : rs.getSignature()) {
-        lCtx.getIndex().mergeDependency(op, ci,
-            lCtx.getIndex().getDependency(inpOp, inp_cols.get(cnt++)));
+        Dependency inp_dep = lCtx.getIndex().getDependency(inpOp, inp_cols.get(cnt++));
+        if (inp_dep != null) {
+          lCtx.getIndex().mergeDependency(op, ci, inp_dep);
+        }
       }
       return null;
     }

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/lineage1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/lineage1.q?rev=937123&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/lineage1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/lineage1.q Fri Apr 23 01:07:12 2010
@@ -0,0 +1,30 @@
+drop table dest_l1;
+
+CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE;
+
+
+EXPLAIN
+INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value 
+      FROM src1 t1 
+      LEFT OUTER JOIN src p1
+      ON (t1.key = p1.key)
+      UNION ALL
+      SELECT t2.key, p2.value
+      FROM src1 t2
+      LEFT OUTER JOIN src p2
+      ON (t2.key = p2.key)) j;
+
+INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value
+      FROM src1 t1
+      LEFT OUTER JOIN src p1
+      ON (t1.key = p1.key)
+      UNION ALL
+      SELECT t2.key, p2.value
+      FROM src1 t2
+      LEFT OUTER JOIN src p2
+      ON (t2.key = p2.key)) j;
+

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out?rev=937123&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out Fri Apr 23 01:07:12 2010
@@ -0,0 +1,290 @@
+PREHOOK: query: drop table dest_l1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table dest_l1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_l1
+PREHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value 
+      FROM src1 t1 
+      LEFT OUTER JOIN src p1
+      ON (t1.key = p1.key)
+      UNION ALL
+      SELECT t2.key, p2.value
+      FROM src1 t2
+      LEFT OUTER JOIN src p2
+      ON (t2.key = p2.key)) j
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value 
+      FROM src1 t1 
+      LEFT OUTER JOIN src p1
+      ON (t1.key = p1.key)
+      UNION ALL
+      SELECT t2.key, p2.value
+      FROM src1 t2
+      LEFT OUTER JOIN src p2
+      ON (t2.key = p2.key)) j
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF src1 t1) (TOK_TABREF src p1) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL p1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL p1) value))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF src1 t2) (TOK_TABREF src p2) (= (. (TOK_TABLE_OR_COL t2) key) (. (TOK_TABLE_OR_COL p2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL p2) value)))))) j)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_l1)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF j)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-6
+  Stage-5 depends on stages: Stage-2 , consists of Stage-4, Stage-3
+  Stage-4
+  Stage-0 depends on stages: Stage-4, Stage-3
+  Stage-3
+  Stage-6 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:j-subquery1:p1 
+          TableScan
+            alias: p1
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 1
+              value expressions:
+                    expr: value
+                    type: string
+        null-subquery1:j-subquery1:t1 
+          TableScan
+            alias: t1
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 0
+              value expressions:
+                    expr: key
+                    type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          condition expressions:
+            0 {VALUE._col0}
+            1 {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col3
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col3
+                  type: string
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10002 
+          Union
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: UDFToInteger(_col0)
+                      type: int
+                      expr: _col1
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: dest_l1
+        file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10004 
+          Union
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: UDFToInteger(_col0)
+                      type: int
+                      expr: _col1
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: dest_l1
+
+  Stage: Stage-5
+    Conditional Operator
+
+  Stage: Stage-4
+    Move Operator
+      files:
+          hdfs directory: true
+          destination: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10000
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: dest_l1
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10003 
+            Reduce Output Operator
+              sort order: 
+              Map-reduce partition columns:
+                    expr: rand()
+                    type: double
+              tag: -1
+              value expressions:
+                    expr: key
+                    type: int
+                    expr: value
+                    type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: dest_l1
+
+  Stage: Stage-6
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:j-subquery2:p2 
+          TableScan
+            alias: p2
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 1
+              value expressions:
+                    expr: value
+                    type: string
+        null-subquery2:j-subquery2:t2 
+          TableScan
+            alias: t2
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 0
+              value expressions:
+                    expr: key
+                    type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          condition expressions:
+            0 {VALUE._col0}
+            1 {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col3
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col3
+                  type: string
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value
+      FROM src1 t1
+      LEFT OUTER JOIN src p1
+      ON (t1.key = p1.key)
+      UNION ALL
+      SELECT t2.key, p2.value
+      FROM src1 t2
+      LEFT OUTER JOIN src p2
+      ON (t2.key = p2.key)) j
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest_l1
+POSTHOOK: query: INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value
+      FROM src1 t1
+      LEFT OUTER JOIN src p1
+      ON (t1.key = p1.key)
+      UNION ALL
+      SELECT t2.key, p2.value
+      FROM src1 t2
+      LEFT OUTER JOIN src p2
+      ON (t2.key = p2.key)) j
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_l1
+POSTHOOK: Lineage: dest_l1.key EXPRESSION [(src1)t1.FieldSchema(name:key, type:string, comment:default), (src1)t2.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_l1.value EXPRESSION [(src)p2.FieldSchema(name:value, type:string, comment:default), (src)p1.FieldSchema(name:value, type:string, comment:default), ]