You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/04/07 01:04:12 UTC

svn commit: r931363 [1/27] - in /hadoop/hive/trunk: ./ contrib/src/test/results/clientpositive/ ql/src/java/org/apache/hadoop/hive/ql/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/hooks/ ql/src/java/org/apache/hadoo...

Author: namit
Date: Tue Apr  6 23:04:06 2010
New Revision: 931363

URL: http://svn.apache.org/viewvc?rev=931363&view=rev
Log:
HIVE-1131. Add column lineage information to the pre execution hooks
(Ashish Thusoo via namit)


Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/Utils.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcCtx.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/LineageCtx.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/contrib/src/test/results/clientpositive/fileformat_base64.q.out
    hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes.q.out
    hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out
    hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out
    hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out
    hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecute.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/PreExecute.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PostExecutePrinter.java
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/PreExecutePrinter.java
    hadoop/hive/trunk/ql/src/test/results/clientnegative/fileformat_void_input.q.out
    hadoop/hive/trunk/ql/src/test/results/clientnegative/smb_bucketmapjoin.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/alter3.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/binary_output_format.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucket_groupby.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin3.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin6.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/case_sensitivity.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/cast1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/combine1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/create_escape.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/create_genericudf.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/create_insert_outputformat.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/create_like.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/create_udaf.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/create_view.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/fileformat_sequencefile.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/fileformat_text.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby10.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby11.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby1_limit.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby1_map.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby1_map_skew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby1_noskew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby2_map.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby2_noskew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby3.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby3_map.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby3_noskew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby4_map.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby4_map_skew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby4_noskew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby5.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby5_map.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby5_map_skew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby5_noskew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby6.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby6_map.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby6_map_skew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby6_noskew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby7.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby7_map.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby7_map_skew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby7_noskew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby8.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby8_map.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby8_map_skew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby8_noskew.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby9.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby_map_ppr.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/groupby_ppr.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input11.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input11_limit.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input12.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input13.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input14.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input14_limit.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input17.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input18.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input1_limit.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input20.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input28.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input30.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input31.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input32.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input33.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input34.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input35.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input36.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input38.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input39.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input3_limit.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input41.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input5.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input6.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input7.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input8.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input9.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_columnarserde.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_dynamicserde.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part10.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part5.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_testsequencefile.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_testxpath.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_testxpath2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/insert1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/insertexternal1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join14.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join17.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join24.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join25.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join26.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join27.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join28.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join29.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join3.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join30.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join31.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join32.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join33.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join34.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join35.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join37.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join39.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join5.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join6.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join7.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join8.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join9.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join_map_ppr.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join_rc.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/loadpart1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/mapreduce1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/mapreduce2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/mapreduce3.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/mapreduce4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/mapreduce5.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/mapreduce6.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/mapreduce7.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/mapreduce8.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/multi_insert.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/notable_alias1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/notable_alias2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/null_column.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/partition_vs_table_metadata.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/ppd_constant_expr.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/ppd_multi_insert.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/quote1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_bigdata.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_columnar.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_default_format.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_lazydecompress.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_null_value.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_union.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/sample1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/sample2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/sample4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/sample5.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/sample6.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/sample7.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/scriptfile1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/skewjoin.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_6.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/transform1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf3.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf5.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf6.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf7.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf8.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_10_trims.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_concat_insert1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_concat_insert2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_length.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_reverse.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_testlength.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_testlength2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union10.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union12.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union17.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union18.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union19.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union22.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union3.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union6.q.out
    hadoop/hive/trunk/ql/src/test/results/compiler/plan/sample1.q.xml
    hadoop/hive/trunk/ql/src/test/results/compiler/plan/sample2.q.xml
    hadoop/hive/trunk/ql/src/test/results/compiler/plan/sample3.q.xml
    hadoop/hive/trunk/ql/src/test/results/compiler/plan/sample4.q.xml
    hadoop/hive/trunk/ql/src/test/results/compiler/plan/sample5.q.xml
    hadoop/hive/trunk/ql/src/test/results/compiler/plan/sample6.q.xml
    hadoop/hive/trunk/ql/src/test/results/compiler/plan/sample7.q.xml

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Apr  6 23:04:06 2010
@@ -66,6 +66,9 @@ Trunk -  Unreleased
     HIVE-1272. Add SymlinkTextInputFormat to Hive.
     (Guanghao Shen via zshao)
 
+    HIVE-1131. Add column lineage information to the pre execution hooks
+    (Ashish Thusoo via namit)
+
   IMPROVEMENTS
     HIVE-983. Function from_unixtime takes long.
     (Ning Zhang via zshao)

Modified: hadoop/hive/trunk/contrib/src/test/results/clientpositive/fileformat_base64.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/test/results/clientpositive/fileformat_base64.q.out?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/test/results/clientpositive/fileformat_base64.q.out (original)
+++ hadoop/hive/trunk/contrib/src/test/results/clientpositive/fileformat_base64.q.out Tue Apr  6 23:04:06 2010
@@ -47,7 +47,7 @@ POSTHOOK: type: DESCTABLE
 key	int	
 value	string	
 	 	 
-Detailed Table Information	Table(tableName:base64_test, dbName:default, owner:njain, createTime:1253817673, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/base64_test, inputFormat:org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextInputFormat, outputFormat:org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), partitionKeys:[], parameters:{})	
+Detailed Table Information	Table(tableName:base64_test, dbName:default, owner:athusoo, createTime:1270517100, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/test/data/warehouse/base64_test, inputFormat:org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextInputFormat, outputFormat:org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), partitionKeys:[], parameters:{transient_lastDdlTime=1270517100}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
 PREHOOK: query: FROM src
 INSERT OVERWRITE TABLE base64_test
 SELECT key, value WHERE key < 10
@@ -60,14 +60,18 @@ SELECT key, value WHERE key < 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@base64_test
+POSTHOOK: Lineage: base64_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: SELECT * FROM base64_test
 PREHOOK: type: QUERY
 PREHOOK: Input: default@base64_test
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1384563275/10000
+PREHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-05_682_3115169889278239261/10000
 POSTHOOK: query: SELECT * FROM base64_test
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@base64_test
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1384563275/10000
+POSTHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-05_682_3115169889278239261/10000
+POSTHOOK: Lineage: base64_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
 0	val_0
 4	val_4
 8	val_8
@@ -96,14 +100,22 @@ SELECT key, value WHERE key < 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@base64_test
+POSTHOOK: Lineage: base64_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: SELECT * FROM base64_test
 PREHOOK: type: QUERY
 PREHOOK: Input: default@base64_test
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1705395068/10000
+PREHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-10_137_3364760810303279176/10000
 POSTHOOK: query: SELECT * FROM base64_test
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@base64_test
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1705395068/10000
+POSTHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-10_137_3364760810303279176/10000
+POSTHOOK: Lineage: base64_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
 0	val_0
 4	val_4
 8	val_8
@@ -119,3 +131,7 @@ PREHOOK: type: DROPTABLE
 POSTHOOK: query: DROP TABLE base64_test
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Output: default@base64_test
+POSTHOOK: Lineage: base64_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: base64_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]

Modified: hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes.q.out?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes.q.out (original)
+++ hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes.q.out Tue Apr  6 23:04:06 2010
@@ -88,7 +88,7 @@ STAGE PLANS:
     Move Operator
       files:
           hdfs directory: true
-          destination: file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/1970906774/10000
+          destination: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-42_688_7908626219117423181/10000
 
   Stage: Stage-0
     Move Operator
@@ -103,7 +103,7 @@ STAGE PLANS:
   Stage: Stage-2
     Map Reduce
       Alias -> Map Operator Tree:
-        file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/1458788735/10002 
+        file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-42_688_7908626219117423181/10002 
             Reduce Output Operator
               sort order: 
               Map-reduce partition columns:
@@ -151,14 +151,18 @@ INSERT OVERWRITE TABLE dest1 SELECT tkey
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: SELECT dest1.* FROM dest1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/141823915/10000
+PREHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-49_238_6880226342565911331/10000
 POSTHOOK: query: SELECT dest1.* FROM dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/141823915/10000
+POSTHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-49_238_6880226342565911331/10000
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
 238	val_238
 86	val_86
 311	val_311
@@ -664,3 +668,5 @@ PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table dest1
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]

Modified: hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out (original)
+++ hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out Tue Apr  6 23:04:06 2010
@@ -81,7 +81,7 @@ STAGE PLANS:
     Move Operator
       files:
           hdfs directory: true
-          destination: file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/124860625/10000
+          destination: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-52_775_2497627275831457391/10000
 
   Stage: Stage-0
     Move Operator
@@ -96,7 +96,7 @@ STAGE PLANS:
   Stage: Stage-2
     Map Reduce
       Alias -> Map Operator Tree:
-        file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/1806478393/10002 
+        file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-52_775_2497627275831457391/10002 
             Reduce Output Operator
               sort order: 
               Map-reduce partition columns:
@@ -144,14 +144,18 @@ INSERT OVERWRITE TABLE dest1 SELECT tkey
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: SELECT dest1.* FROM dest1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/1698289796/10000
+PREHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-57_501_345062460969212417/10000
 POSTHOOK: query: SELECT dest1.* FROM dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/1698289796/10000
+POSTHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-25-57_501_345062460969212417/10000
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
 238	val_238
 86	val_86
 311	val_311
@@ -657,3 +661,5 @@ PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table dest1
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]

Modified: hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out (original)
+++ hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out Tue Apr  6 23:04:06 2010
@@ -81,7 +81,7 @@ STAGE PLANS:
     Move Operator
       files:
           hdfs directory: true
-          destination: file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/1218273829/10000
+          destination: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-26-00_389_1203185612969029629/10000
 
   Stage: Stage-0
     Move Operator
@@ -96,7 +96,7 @@ STAGE PLANS:
   Stage: Stage-2
     Map Reduce
       Alias -> Map Operator Tree:
-        file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/230578366/10002 
+        file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-26-00_389_1203185612969029629/10002 
             Reduce Output Operator
               sort order: 
               Map-reduce partition columns:
@@ -144,14 +144,18 @@ INSERT OVERWRITE TABLE dest1 SELECT tkey
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: SELECT dest1.* FROM dest1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/2038033327/10000
+PREHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-26-07_272_5588477151591042367/10000
 POSTHOOK: query: SELECT dest1.* FROM dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive/trunk/build/contrib/scratchdir/2038033327/10000
+POSTHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-26-07_272_5588477151591042367/10000
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
 238	val_238
 86	val_86
 311	val_311
@@ -657,3 +661,5 @@ PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table dest1
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]

Modified: hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out (original)
+++ hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out Tue Apr  6 23:04:06 2010
@@ -133,14 +133,18 @@ INSERT OVERWRITE TABLE dest1 SELECT tkey
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: SELECT dest1.* FROM dest1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/tmp/1776839815/10000
+PREHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-26-14_856_1577877725443660966/10000
 POSTHOOK: query: SELECT dest1.* FROM dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/tmp/1776839815/10000
+POSTHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-26-14_856_1577877725443660966/10000
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
 0	val_0
 0	val_0
 0	val_0
@@ -230,3 +234,5 @@ PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table dest1
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]

Modified: hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out (original)
+++ hadoop/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out Tue Apr  6 23:04:06 2010
@@ -88,7 +88,7 @@ STAGE PLANS:
     Move Operator
       files:
           hdfs directory: true
-          destination: file:/data/users/njain/hive1/hive1/build/contrib/scratchdir/hive_2010-02-26_16-46-00_966_8770926542722067748/10000
+          destination: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-26-17_395_398422767039417691/10000
 
   Stage: Stage-0
     Move Operator
@@ -103,7 +103,7 @@ STAGE PLANS:
   Stage: Stage-2
     Map Reduce
       Alias -> Map Operator Tree:
-        file:/data/users/njain/hive1/hive1/build/contrib/scratchdir/hive_2010-02-26_16-46-00_966_8770926542722067748/10002 
+        file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-26-17_395_398422767039417691/10002 
             Reduce Output Operator
               sort order: 
               Map-reduce partition columns:
@@ -151,14 +151,18 @@ INSERT OVERWRITE TABLE dest1 SELECT tkey
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: SELECT dest1.* FROM dest1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/contrib/scratchdir/hive_2010-02-26_16-46-05_557_2000233677894920906/10000
+PREHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-26-21_626_1802489414598698423/10000
 POSTHOOK: query: SELECT dest1.* FROM dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/contrib/scratchdir/hive_2010-02-26_16-46-05_557_2000233677894920906/10000
+POSTHOOK: Output: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/.ptest_3/build/contrib/scratchdir/hive_2010-04-05_18-26-21_626_1802489414598698423/10000
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
 238	val_238
 86	val_86
 311	val_311
@@ -664,3 +668,5 @@ PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table dest1
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Tue Apr  6 23:04:06 2010
@@ -162,7 +162,7 @@ public class Driver implements CommandPr
       List<FieldSchema> lst = sem.getResultSchema();
       schema = new Schema(lst, null);
     } else if (sem.getFetchTask() != null) {
-      FetchTask ft = (FetchTask) sem.getFetchTask();
+      FetchTask ft = sem.getFetchTask();
       TableDesc td = ft.getTblDesc();
       // partitioned tables don't have tableDesc set on the FetchTask. Instead
       // they have a list of PartitionDesc objects, each with a table desc.
@@ -294,7 +294,7 @@ public class Driver implements CommandPr
 
     try {
       ctx = new Context(conf);
-      
+
       ParseDriver pd = new ParseDriver();
       ASTNode tree = pd.parse(command, ctx);
       tree = ParseUtils.findRootNonNullToken(tree);
@@ -317,25 +317,25 @@ public class Driver implements CommandPr
       schema = getSchema(sem, conf);
 
       // Serialize the query plan
-      //   get temp file name and remove file: 
+      //   get temp file name and remove file:
       String queryPlanFileName = ctx.getLocalScratchDir() + Path.SEPARATOR_CHAR
           + "queryplan.xml";
       LOG.info("query plan = " + queryPlanFileName);
       queryPlanFileName = new Path(queryPlanFileName).toUri().getPath();
-      
-      //   serialize the queryPlan 
+
+      //   serialize the queryPlan
       FileOutputStream fos = new FileOutputStream(queryPlanFileName);
       Utilities.serializeQueryPlan(plan, fos);
       fos.close();
-      
-      //   deserialize the queryPlan 
+
+      //   deserialize the queryPlan
       FileInputStream fis = new FileInputStream(queryPlanFileName);
       QueryPlan newPlan = Utilities.deserializeQueryPlan(fis, conf);
       fis.close();
-      
+
       // Use the deserialized plan
       plan = newPlan;
-      
+
       // initialize FetchTask right here
       if (plan.getFetchTask() != null) {
         plan.getFetchTask().initialize(conf, plan, null);
@@ -540,6 +540,7 @@ public class Driver implements CommandPr
       // Get all the post execution hooks and execute them.
       for (PostExecute peh : getPostExecHooks()) {
         peh.run(SessionState.get(), plan.getInputs(), plan.getOutputs(),
+            (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo() : null),
             UnixUserGroupInformation.readFromConf(conf,
                 UnixUserGroupInformation.UGI_PROPERTY_NAME));
       }
@@ -676,7 +677,7 @@ public class Driver implements CommandPr
 
   public boolean getResults(ArrayList<String> res) throws IOException {
     if (plan != null && plan.getFetchTask() != null) {
-      FetchTask ft = (FetchTask) plan.getFetchTask();
+      FetchTask ft = plan.getFetchTask();
       ft.setMaxRows(maxRows);
       return ft.fetch(res);
     }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java Tue Apr  6 23:04:06 2010
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.Ex
 import org.apache.hadoop.hive.ql.exec.FetchTask;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
@@ -73,6 +74,10 @@ public class QueryPlan implements Serial
    * to the outputs here. 
    */
   private HashSet<WriteEntity> outputs;
+  /**
+   * Lineage information for the query.
+   */
+  protected LineageInfo linfo;
 
   private HashMap<String, String> idToTableNameMap;
 
@@ -94,6 +99,7 @@ public class QueryPlan implements Serial
     // Note that inputs and outputs can be changed when the query gets executed
     inputs = sem.getInputs();
     outputs = sem.getOutputs();
+    linfo = sem.getLineageInfo();
     idToTableNameMap = new HashMap<String, String>(sem.getIdToTableNameMap());
 
     queryId = makeQueryId();
@@ -711,4 +717,21 @@ public class QueryPlan implements Serial
     this.started = started;
   }
 
+  /**
+   * Gets the lineage information.
+   * 
+   * @return LineageInfo associated with the query.
+   */
+  public LineageInfo getLineageInfo() {
+    return linfo;
+  }
+  
+  /**
+   * Sets the lineage information.
+   * 
+   * @param linfo The LineageInfo structure that is set in the optimization phase.
+   */
+  public void setLineageInfo(LineageInfo linfo) {
+    this.linfo = linfo;
+  }  
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java Tue Apr  6 23:04:06 2010
@@ -121,4 +121,9 @@ public class LateralViewJoinOperator ext
 
   }
 
+  @Override
+  public String getName() {
+    return "LVJ";
+  }
+
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java Tue Apr  6 23:04:06 2010
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
@@ -39,6 +40,7 @@ import org.apache.hadoop.hive.ql.plan.Lo
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 import org.apache.hadoop.hive.ql.plan.MoveWork;
 import org.apache.hadoop.hive.ql.plan.api.StageType;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.util.StringUtils;
 
 /**
@@ -151,7 +153,10 @@ public class MoveTask extends Task<MoveW
           }
         }
 
+        // Create a data container
+        DataContainer dc = null;
         if (tbd.getPartitionSpec().size() == 0) {
+          dc = new DataContainer(table.getTTable());
           db.loadTable(new Path(tbd.getSourceDir()), tbd.getTable()
               .getTableName(), tbd.getReplace(), new Path(tbd.getTmpDir()));
           if (work.getOutputs() != null) {
@@ -164,10 +169,16 @@ public class MoveTask extends Task<MoveW
               new Path(tbd.getTmpDir()));
           Partition partn = db.getPartition(table, tbd.getPartitionSpec(),
               false);
+          dc = new DataContainer(table.getTTable(), partn.getTPartition());
           if (work.getOutputs() != null) {
             work.getOutputs().add(new WriteEntity(partn));
           }
         }
+
+        if (SessionState.get() != null) {
+          SessionState.get().getLineageState()
+            .setLineage(tbd.getSourceDir(), dc, table.getCols());
+        }
       }
 
       return 0;

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java?rev=931363&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java Tue Apr  6 23:04:06 2010
@@ -0,0 +1,403 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.hooks;
+
+import java.io.Serializable;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.Table;
+
+/**
+ * This class contains the lineage information that is passed
+ * to the PreExecution hook.
+ */
+public class LineageInfo implements Serializable {
+
+  /**
+   * Serial version id.
+   */
+  private static final long serialVersionUID = 1L;
+
+  /**
+   * Enum to track dependency. This enum has the following values:
+   * 1. SIMPLE - Indicates that the column is derived from another table column
+   *             with no transformations e.g. T2.c1 = T1.c1.
+   * 2. EXPRESSION - Indicates that the column is derived from a UDF, UDAF, UDTF or
+   *                 set operations like union on columns on other tables
+   *                 e.g. T2.c1 = T1.c1 + T3.c1.
+   * 4. SCRIPT - Indicates that the column is derived from the output
+   *             of a user script through a TRANSFORM, MAP or REDUCE syntax.
+   */
+  public static enum DependencyType {
+    SIMPLE, EXPRESSION, SCRIPT
+  }
+
+  /**
+   * Table or Partition data container. We need this class because the output
+   * of the query can either go to a table or a partition within a table. The
+   * data container class subsumes both of these.
+   */
+  public static class DataContainer implements Serializable {
+
+    /**
+     * Serial version id.
+     */
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * The table in case this container is a table.
+     */
+    private final Table tab;
+
+    /**
+     * The partition in case this container is a partition.
+     */
+    private final Partition part;
+
+    /**
+     * Constructor for non partitioned tables.
+     *
+     * @param tab The associated table.
+     */
+    public DataContainer(Table tab) {
+      this.tab = tab;
+      this.part = null;
+    }
+
+    /**
+     * Constructor for a partitioned tables partition.
+     *
+     * @param part The associated partition.
+     */
+    public DataContainer(Table tab, Partition part) {
+      this.tab = tab;
+      this.part = part;
+    }
+
+    /**
+     * Returns true in case this data container is a partition.
+     *
+     * @return boolean TRUE if the container is a table partition.
+     */
+    public boolean isPartition() {
+      return (part != null);
+    }
+
+    public Table getTable() {
+      return this.tab;
+    }
+
+    public Partition getPartition() {
+      return this.part;
+    }
+  }
+
+  /**
+   * Class that captures the lookup key for the dependency. The dependency
+   * is from (DataContainer, FieldSchema) to a Dependency structure. This
+   * class captures the (DataContainer, FieldSchema) tuple.
+   */
+  public static class DependencyKey implements Serializable {
+
+    /**
+     * Serial version id.
+     */
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * The data container for this key.
+     */
+    private final DataContainer dc;
+
+    /**
+     * The field schema for this key.
+     */
+    private final FieldSchema fld;
+
+    /**
+     * Constructor.
+     *
+     * @param dc The associated data container.
+     * @param fld The associated field schema.
+     */
+    public DependencyKey(DataContainer dc, FieldSchema fld) {
+      this.dc = dc;
+      this.fld = fld;
+    }
+
+    public DataContainer getDataContainer() {
+      return this.dc;
+    }
+
+    public FieldSchema getFieldSchema() {
+      return this.fld;
+    }
+
+    /* (non-Javadoc)
+     * @see java.lang.Object#hashCode()
+     */
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + ((dc == null) ? 0 : dc.hashCode());
+      result = prime * result + ((fld == null) ? 0 : fld.hashCode());
+      return result;
+    }
+
+    /* (non-Javadoc)
+     * @see java.lang.Object#equals(java.lang.Object)
+     */
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+      if (obj == null) {
+        return false;
+      }
+      if (getClass() != obj.getClass()) {
+        return false;
+      }
+      DependencyKey other = (DependencyKey) obj;
+      if (dc != other.dc) {
+        return false;
+      }
+      if (fld != other.fld) {
+        return false;
+      }
+      return true;
+    }
+  }
+
+  /**
+   * Base Column information.
+   */
+  public static class BaseColumnInfo implements Serializable {
+
+    /**
+     * Serial version id.
+     */
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * The table and alias info encapsulated in a different class.
+     */
+    private TableAliasInfo tabAlias;
+
+    /**
+     * The metastore column information. The column can be null
+     * and that denotes that the expression is dependent on the row
+     * of the table and not particular column. This can happen in case
+     * of count(1).
+     */
+    private FieldSchema column;
+
+    /**
+     * @return the tabAlias
+     */
+    public TableAliasInfo getTabAlias() {
+      return tabAlias;
+    }
+
+    /**
+     * @param tabAlias the tabAlias to set
+     */
+    public void setTabAlias(TableAliasInfo tabAlias) {
+      this.tabAlias = tabAlias;
+    }
+
+    /**
+     * @return the column
+     */
+    public FieldSchema getColumn() {
+      return column;
+    }
+
+    /**
+     * @param column the column to set
+     */
+    public void setColumn(FieldSchema column) {
+      this.column = column;
+    }
+  }
+
+  public static class TableAliasInfo implements Serializable {
+
+    /**
+     * Serail version id.
+     */
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * The alias for the table.
+     */
+    private String alias;
+
+    /**
+     * The metastore table information.
+     */
+    private Table table;
+
+    /**
+     * @return the alias
+     */
+    public String getAlias() {
+      return alias;
+    }
+
+    /**
+     * @param alias the alias to set
+     */
+    public void setAlias(String alias) {
+      this.alias = alias;
+    }
+
+    /**
+     * @return the table
+     */
+    public Table getTable() {
+      return table;
+    }
+
+    /**
+     * @param table the table to set
+     */
+    public void setTable(Table table) {
+      this.table = table;
+    }
+  }
+
+  /**
+   * This class tracks the dependency information for the base column.
+   */
+  public static class Dependency implements Serializable {
+
+    /**
+     *
+     */
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * The type of dependency.
+     */
+    private DependencyType type;
+
+    /**
+     * Expression string for the dependency.
+     */
+    private String expr;
+
+    /**
+     * The list of base columns that the particular column depends on.
+     */
+    private List<BaseColumnInfo> baseCols;
+
+    /**
+     * @return the type
+     */
+    public DependencyType getType() {
+      return type;
+    }
+
+    /**
+     * @param type the type to set
+     */
+    public void setType(DependencyType type) {
+      this.type = type;
+    }
+
+    /**
+     * @return the expr
+     */
+    public String getExpr() {
+      return expr;
+    }
+
+    /**
+     * @param expr the expr to set
+     */
+    public void setExpr(String expr) {
+      this.expr = expr;
+    }
+
+    /**
+     * @return the baseCols
+     */
+    public List<BaseColumnInfo> getBaseCols() {
+      return baseCols;
+    }
+
+    /**
+     * @param basecols the baseCols to set
+     */
+    public void setBaseCols(List<BaseColumnInfo> baseCols) {
+      this.baseCols = baseCols;
+    }
+  }
+
+  /**
+   * The map contains an index from the (datacontainer, columnname) to the
+   * dependency vector for that tuple. This is used to generate the
+   * dependency vectors during the walk of the operator tree.
+   */
+  protected Map<DependencyKey, Dependency> index;
+
+  /**
+   * Constructor.
+   */
+  public LineageInfo() {
+    index = new LinkedHashMap<DependencyKey, Dependency>();
+  }
+
+  /**
+   * Gets the dependency for a table, column tuple.
+   * @param dc The data container of the column whose dependency is being inspected.
+   * @param col The column whose dependency is being inspected.
+   * @return Dependency for that particular table, column tuple.
+   *         null if no dependency is found.
+   */
+  public Dependency getDependency(DataContainer dc, FieldSchema col) {
+    return index.get(new DependencyKey(dc, col));
+  }
+
+  /**
+   * Puts the dependency for a table, column tuple.
+   * @param dc The datacontainer whose dependency is being inserted.
+   * @param col The column whose dependency is being inserted.
+   * @param dep The dependency.
+   */
+  public void putDependency(DataContainer dc, FieldSchema col, Dependency dep) {
+    index.put(new DependencyKey(dc, col), dep);
+  }
+
+  /**
+   * Gets the entry set on this structure.
+   *
+   * @return LineageInfo entry set
+   */
+  public Set<Map.Entry<DependencyKey, Dependency>> entrySet() {
+    return index.entrySet();
+  }
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecute.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecute.java?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecute.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecute.java Tue Apr  6 23:04:06 2010
@@ -31,17 +31,20 @@ public interface PostExecute {
 
   /**
    * The run command that is called just before the execution of the query.
-   * 
+   *
    * @param sess
    *          The session state.
    * @param inputs
    *          The set of input tables and partitions.
    * @param outputs
    *          The set of output tables, partitions, local and hdfs directories.
+   * @param lInfo
+   *           The column level lineage information.
    * @param ugi
    *          The user group security information.
    */
   void run(SessionState sess, Set<ReadEntity> inputs,
-      Set<WriteEntity> outputs, UserGroupInformation ugi) throws Exception;
+      Set<WriteEntity> outputs, LineageInfo lInfo,
+      UserGroupInformation ugi) throws Exception;
 
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/PreExecute.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/PreExecute.java?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/PreExecute.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/PreExecute.java Tue Apr  6 23:04:06 2010
@@ -31,7 +31,7 @@ public interface PreExecute {
 
   /**
    * The run command that is called just before the execution of the query.
-   * 
+   *
    * @param sess
    *          The session state.
    * @param inputs
@@ -41,7 +41,8 @@ public interface PreExecute {
    * @param ugi
    *          The user group security information.
    */
-  void run(SessionState sess, Set<ReadEntity> inputs,
-      Set<WriteEntity> outputs, UserGroupInformation ugi) throws Exception;
+  public void run(SessionState sess, Set<ReadEntity> inputs,
+      Set<WriteEntity> outputs, UserGroupInformation ugi)
+    throws Exception;
 
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java Tue Apr  6 23:04:06 2010
@@ -38,7 +38,6 @@ public class DefaultGraphWalker implemen
 
   protected Stack<Node> opStack;
   private final List<Node> toWalk = new ArrayList<Node>();
-  private final Set<Node> seenList = new HashSet<Node>();
   private final HashMap<Node, Object> retMap = new HashMap<Node, Object>();
   private final Dispatcher dispatcher;
 

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/Utils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/Utils.java?rev=931363&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/Utils.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/Utils.java Tue Apr  6 23:04:06 2010
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.lib;
+
+import java.util.Stack;
+
+/**
+ * Contains common utility functions to manipulate nodes, walkers etc.
+ */
+public class Utils {
+
+  /**
+   * Gets the nth ancestor (the parent being the 1st ancestor) in the traversal
+   * path. n=0 returns the currently visited node.
+   * 
+   * @param st The stack that encodes the traversal path.
+   * @param n The value of n (n=0 is the currently visited node).
+   * 
+   * @return Node The Nth ancestor in the path with respect to the current node.
+   */
+  public static Node getNthAncestor(Stack<Node> st, int n) {
+    assert(st.size() - 1 >= n);
+    
+    Stack<Node> tmpStack = new Stack<Node>();
+    for(int i=0; i<=n; i++)
+      tmpStack.push(st.pop());
+   
+    Node ret_nd = tmpStack.peek();
+    
+    for(int i=0; i<=n; i++)
+      st.push(tmpStack.pop());
+    
+    assert(tmpStack.isEmpty());
+    
+    return ret_nd;
+  }
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=931363&r1=931362&r2=931363&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Tue Apr  6 23:04:06 2010
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.optimiz
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.ppd.PredicatePushDown;
+import org.apache.hadoop.hive.ql.optimizer.lineage.Generator;
 
 /**
  * Implementation of the optimizer.
@@ -42,6 +43,8 @@ public class Optimizer {
    */
   public void initialize(HiveConf hiveConf) {
     transformations = new ArrayList<Transform>();
+    // Add the transformation that computes the lineage information.
+    transformations.add(new Generator());
     if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCP)) {
       transformations.add(new ColumnPruner());
     }

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcCtx.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcCtx.java?rev=931363&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcCtx.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcCtx.java Tue Apr  6 23:04:06 2010
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.lineage;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+
+/**
+ * The processor context for the lineage information. This contains the
+ * lineage context and the column info and operator information that is
+ * being used for the current expression.
+ */
+public class ExprProcCtx implements NodeProcessorCtx {
+
+  /**
+   * The lineage context that is being populated.
+   */
+  private LineageCtx lctx;
+  
+  /**
+   * The input operator in case the current operator is not a leaf.
+   */
+  private Operator<? extends Serializable> inpOp;
+  
+  /**
+   * Constructor.
+   * 
+   * @param lctx The lineage context thatcontains the dependencies for the inputs.
+   * @param inpOp The input operator to the current operator.
+   */
+  public ExprProcCtx(LineageCtx lctx,
+      Operator<? extends Serializable> inpOp) {
+    this.lctx = lctx;
+    this.inpOp = inpOp;
+  }
+  
+  /**
+   * Gets the lineage context.
+   * 
+   * @return LineageCtx The lineage context.
+   */
+  public LineageCtx getLineageCtx() {
+    return lctx;
+  }
+ 
+  /**
+   * Gets the input operator.
+   * 
+   * @return Operator The input operator - this is null in case the current 
+   * operator is a leaf.
+   */
+  public Operator<? extends Serializable> getInputOperator() {
+    return inpOp;
+  }
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java?rev=931363&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java Tue Apr  6 23:04:06 2010
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.lineage;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+
+/**
+ * Expression processor factory for lineage. Each processor is responsible to
+ * create the leaf level column info objects that the expression depends upon
+ * and also generates a string representation of the expression.
+ */
+public class ExprProcFactory {
+
+  /**
+   * Processor for column expressions.
+   */
+  public static class ColumnExprProcessor implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+
+      ExprNodeColumnDesc cd = (ExprNodeColumnDesc) nd;
+      ExprProcCtx epc = (ExprProcCtx) procCtx;
+
+      // assert that the input operator is not null as there are no
+      // exprs associated with table scans.
+      assert (epc.getInputOperator() != null);
+
+      ColumnInfo inp_ci = null;
+      for (ColumnInfo tmp_ci : epc.getInputOperator().getSchema()
+          .getSignature()) {
+        if (tmp_ci.getInternalName().equals(cd.getColumn())) {
+          inp_ci = tmp_ci;
+          break;
+        }
+      }
+
+      // Insert the dependencies of inp_ci to that of the current operator, ci
+      LineageCtx lc = epc.getLineageCtx();
+      Dependency dep = lc.getIndex().getDependency(epc.getInputOperator(), inp_ci);
+
+      return dep;
+    }
+
+  }
+
+  /**
+   * Processor for any function or field expression.
+   */
+  public static class GenericExprProcessor implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+
+      assert (nd instanceof ExprNodeGenericFuncDesc || nd instanceof ExprNodeFieldDesc);
+
+      // Concatenate the dependencies of all the children to compute the new
+      // dependency.
+      Dependency dep = new Dependency();
+
+      LinkedHashSet<BaseColumnInfo> bci_set = new LinkedHashSet<BaseColumnInfo>();
+      LineageInfo.DependencyType new_type = LineageInfo.DependencyType.EXPRESSION;
+
+      for (Object child : nodeOutputs) {
+        if (child == null) {
+          continue;
+        }
+
+        Dependency child_dep = (Dependency) child;
+        new_type = LineageCtx.getNewDependencyType(child_dep.getType(), new_type);
+        bci_set.addAll(child_dep.getBaseCols());
+      }
+
+      dep.setBaseCols(new ArrayList<BaseColumnInfo>(bci_set));
+      dep.setType(new_type);
+
+      return dep;
+    }
+
+  }
+
+  /**
+   * Processor for constants and null expressions. For such expressions the
+   * processor simply returns a null dependency vector.
+   */
+  public static class DefaultExprProcessor implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      assert (nd instanceof ExprNodeConstantDesc || nd instanceof ExprNodeNullDesc);
+
+      // Create a dependency that has no basecols
+      Dependency dep = new Dependency();
+      dep.setType(LineageInfo.DependencyType.SIMPLE);
+      dep.setBaseCols(new ArrayList<BaseColumnInfo>());
+      return dep;
+    }
+  }
+
+  public static NodeProcessor getDefaultExprProcessor() {
+    return new DefaultExprProcessor();
+  }
+
+  public static NodeProcessor getGenericFuncProcessor() {
+    return new GenericExprProcessor();
+  }
+
+  public static NodeProcessor getFieldProcessor() {
+    return new GenericExprProcessor();
+  }
+
+  public static NodeProcessor getColumnProcessor() {
+    return new ColumnExprProcessor();
+  }
+
+  /**
+   * Gets the expression dependencies for the expression.
+   *
+   * @param lctx
+   *          The lineage context containing the input operators dependencies.
+   * @param inpOp
+   *          The input operator to the current operator.
+   * @param expr
+   *          The expression that is being processed.
+   * @throws SemanticException
+   */
+  public static Dependency getExprDependency(LineageCtx lctx,
+      Operator<? extends Serializable> inpOp, ExprNodeDesc expr)
+      throws SemanticException {
+
+    // Create the walker, the rules dispatcher and the context.
+    ExprProcCtx exprCtx = new ExprProcCtx(lctx, inpOp);
+
+    // create a walker which walks the tree in a DFS manner while maintaining
+    // the operator stack. The dispatcher
+    // generates the plan from the operator tree
+    Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
+    exprRules.put(
+        new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"),
+        getColumnProcessor());
+    exprRules.put(
+        new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"),
+        getFieldProcessor());
+    exprRules.put(new RuleRegExp("R3", ExprNodeGenericFuncDesc.class.getName()
+        + "%"), getGenericFuncProcessor());
+
+    // The dispatcher fires the processor corresponding to the closest matching
+    // rule and passes the context along
+    Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
+        exprRules, exprCtx);
+    GraphWalker egw = new DefaultGraphWalker(disp);
+
+    List<Node> startNodes = new ArrayList<Node>();
+    startNodes.add(expr);
+
+    HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
+    egw.startWalking(startNodes, outputMap);
+    return (Dependency)outputMap.get(expr);
+  }
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java?rev=931363&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java Tue Apr  6 23:04:06 2010
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.lineage;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.optimizer.Transform;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.session.SessionState;
+
+/**
+ * This class generates the lineage information for the columns
+ * and tables from the plan before it goes through other
+ * optimization phases.
+ */
+public class Generator implements Transform {
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext)
+   */
+  @Override
+  public ParseContext transform(ParseContext pctx) throws SemanticException {
+
+    // Create the lineage context
+    LineageCtx lCtx = new LineageCtx(pctx);
+
+    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+    opRules.put(new RuleRegExp("R1", "TS%"), OpProcFactory.getTSProc());
+    opRules.put(new RuleRegExp("R2", "SCR%"), OpProcFactory.getTransformProc());
+    opRules.put(new RuleRegExp("R3", "UDTF%"), OpProcFactory.getTransformProc());
+    opRules.put(new RuleRegExp("R4", "SEL%"), OpProcFactory.getSelProc());
+    opRules.put(new RuleRegExp("R5", "GBY%"), OpProcFactory.getGroupByProc());
+    opRules.put(new RuleRegExp("R6", "UNION%"), OpProcFactory.getUnionProc());
+    opRules.put(new RuleRegExp("R7", "JOIN%|MAPJOIN%"), OpProcFactory.getJoinProc());
+    opRules.put(new RuleRegExp("R8", "RS%"), OpProcFactory.getReduceSinkProc());
+    opRules.put(new RuleRegExp("R9", "LVJ%"), OpProcFactory.getLateralViewJoinProc());
+
+    // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
+    Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, lCtx);
+    GraphWalker ogw = new PreOrderWalker(disp);
+
+    // Create a list of topop nodes
+    ArrayList<Node> topNodes = new ArrayList<Node>();
+    topNodes.addAll(pctx.getTopOps().values());
+    ogw.startWalking(topNodes, null);
+
+    // Transfer the index from the lineage context to the session state.
+    if (SessionState.get() != null) {
+      SessionState.get().getLineageState().setIndex(lCtx.getIndex());
+    }
+
+    return pctx;
+  }
+
+}