You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by nz...@apache.org on 2010/04/23 03:07:13 UTC
svn commit: r937123 - in /hadoop/hive/trunk: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
ql/src/test/queries/clientpositive/lineage1.q
ql/src/test/results/clientpositive/lineage1.q.out
Author: nzhang
Date: Fri Apr 23 01:07:12 2010
New Revision: 937123
URL: http://svn.apache.org/viewvc?rev=937123&view=rev
Log:
HIVE-1320. NPE with lineage in a query of union alls on unions (Ashish Thusoo via Ning Zhang)
Added:
hadoop/hive/trunk/ql/src/test/queries/clientpositive/lineage1.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=937123&r1=937122&r2=937123&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Apr 23 01:07:12 2010
@@ -387,6 +387,9 @@ Trunk - Unreleased
HIVE-1316. Increase hive client side memory
(Ning Zhang via namit)
+ HIVE-1320. NPE with lineage in a query of union all on joins
+ (Ashish Thusoo via Ning Zhang)
+
Release 0.5.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java?rev=937123&r1=937122&r2=937123&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java Fri Apr 23 01:07:12 2010
@@ -393,8 +393,10 @@ public class OpProcFactory {
ArrayList<ColumnInfo> inp_cols = inpOp.getSchema().getSignature();
int cnt = 0;
for(ColumnInfo ci : rs.getSignature()) {
- lCtx.getIndex().mergeDependency(op, ci,
- lCtx.getIndex().getDependency(inpOp, inp_cols.get(cnt++)));
+ Dependency inp_dep = lCtx.getIndex().getDependency(inpOp, inp_cols.get(cnt++));
+ if (inp_dep != null) {
+ lCtx.getIndex().mergeDependency(op, ci, inp_dep);
+ }
}
return null;
}
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/lineage1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/lineage1.q?rev=937123&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/lineage1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/lineage1.q Fri Apr 23 01:07:12 2010
@@ -0,0 +1,30 @@
+drop table dest_l1;
+
+CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE;
+
+
+EXPLAIN
+INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value
+ FROM src1 t1
+ LEFT OUTER JOIN src p1
+ ON (t1.key = p1.key)
+ UNION ALL
+ SELECT t2.key, p2.value
+ FROM src1 t2
+ LEFT OUTER JOIN src p2
+ ON (t2.key = p2.key)) j;
+
+INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value
+ FROM src1 t1
+ LEFT OUTER JOIN src p1
+ ON (t1.key = p1.key)
+ UNION ALL
+ SELECT t2.key, p2.value
+ FROM src1 t2
+ LEFT OUTER JOIN src p2
+ ON (t2.key = p2.key)) j;
+
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out?rev=937123&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/lineage1.q.out Fri Apr 23 01:07:12 2010
@@ -0,0 +1,290 @@
+PREHOOK: query: drop table dest_l1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table dest_l1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_l1
+PREHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value
+ FROM src1 t1
+ LEFT OUTER JOIN src p1
+ ON (t1.key = p1.key)
+ UNION ALL
+ SELECT t2.key, p2.value
+ FROM src1 t2
+ LEFT OUTER JOIN src p2
+ ON (t2.key = p2.key)) j
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value
+ FROM src1 t1
+ LEFT OUTER JOIN src p1
+ ON (t1.key = p1.key)
+ UNION ALL
+ SELECT t2.key, p2.value
+ FROM src1 t2
+ LEFT OUTER JOIN src p2
+ ON (t2.key = p2.key)) j
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF src1 t1) (TOK_TABREF src p1) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL p1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL p1) value))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF src1 t2) (TOK_TABREF src p2) (= (. (TOK_TABLE_OR_COL t2) key) (. (TOK_TABLE_OR_COL p2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL p2) value)))))) j)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_l1)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF j)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-6
+ Stage-5 depends on stages: Stage-2 , consists of Stage-4, Stage-3
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3
+ Stage-3
+ Stage-6 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:j-subquery1:p1
+ TableScan
+ alias: p1
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: value
+ type: string
+ null-subquery1:j-subquery1:t1
+ TableScan
+ alias: t1
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1 {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col3
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10002
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest_l1
+ file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10004
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest_l1
+
+ Stage: Stage-5
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10000
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest_l1
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10003
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns:
+ expr: rand()
+ type: double
+ tag: -1
+ value expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest_l1
+
+ Stage: Stage-6
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:j-subquery2:p2
+ TableScan
+ alias: p2
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: value
+ type: string
+ null-subquery2:j-subquery2:t2
+ TableScan
+ alias: t2
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1 {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col3
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value
+ FROM src1 t1
+ LEFT OUTER JOIN src p1
+ ON (t1.key = p1.key)
+ UNION ALL
+ SELECT t2.key, p2.value
+ FROM src1 t2
+ LEFT OUTER JOIN src p2
+ ON (t2.key = p2.key)) j
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest_l1
+POSTHOOK: query: INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value
+ FROM src1 t1
+ LEFT OUTER JOIN src p1
+ ON (t1.key = p1.key)
+ UNION ALL
+ SELECT t2.key, p2.value
+ FROM src1 t2
+ LEFT OUTER JOIN src p2
+ ON (t2.key = p2.key)) j
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_l1
+POSTHOOK: Lineage: dest_l1.key EXPRESSION [(src1)t1.FieldSchema(name:key, type:string, comment:default), (src1)t2.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_l1.value EXPRESSION [(src)p2.FieldSchema(name:value, type:string, comment:default), (src)p1.FieldSchema(name:value, type:string, comment:default), ]