You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jv...@apache.org on 2011/07/13 18:49:42 UTC

svn commit: r1146129 [1/41] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ contrib/src/test/results/clientpositive/ hbase-handler/src/test/results/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/ppd/ q...

Author: jvs
Date: Wed Jul 13 16:49:22 2011
New Revision: 1146129

URL: http://svn.apache.org/viewvc?rev=1146129&view=rev
Log:
HIVE-1538. FilterOperator is applied twice with ppd on.
(Amareshwari Sriramadasu via jvs)


Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/contrib/src/test/results/clientpositive/dboutput.q.out
    hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out
    hive/trunk/hbase-handler/src/test/results/hbase_pushdown.q.out
    hive/trunk/hbase-handler/src/test/results/hbase_queries.q.out
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
    hive/trunk/ql/src/test/queries/clientpositive/ppd1.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_clusterby.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_constant_expr.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_gby.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_gby2.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_gby_join.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_join.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_join2.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_join3.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_multi_insert.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join1.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join2.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join3.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join4.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_random.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_transform.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_udf_case.q
    hive/trunk/ql/src/test/queries/clientpositive/ppd_union.q
    hive/trunk/ql/src/test/results/clientpositive/auto_join0.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join11.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join12.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join13.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join14.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join16.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join19.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join20.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join21.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join23.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join28.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join29.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join4.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join5.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join6.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join7.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join8.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join9.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucket_groupby.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin1.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin2.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin3.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
    hive/trunk/ql/src/test/results/clientpositive/case_sensitivity.q.out
    hive/trunk/ql/src/test/results/clientpositive/cast1.q.out
    hive/trunk/ql/src/test/results/clientpositive/cluster.q.out
    hive/trunk/ql/src/test/results/clientpositive/combine2.q.out
    hive/trunk/ql/src/test/results/clientpositive/create_view.q.out
    hive/trunk/ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out
    hive/trunk/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_map_ppr.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_ppr.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
    hive/trunk/ql/src/test/results/clientpositive/having.q.out
    hive/trunk/ql/src/test/results/clientpositive/implicit_cast1.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_file_format.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_multiple.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_partitioned.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_unused.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_bitmap3.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out
    hive/trunk/ql/src/test/results/clientpositive/input11.q.out
    hive/trunk/ql/src/test/results/clientpositive/input11_limit.q.out
    hive/trunk/ql/src/test/results/clientpositive/input14.q.out
    hive/trunk/ql/src/test/results/clientpositive/input18.q.out
    hive/trunk/ql/src/test/results/clientpositive/input23.q.out
    hive/trunk/ql/src/test/results/clientpositive/input24.q.out
    hive/trunk/ql/src/test/results/clientpositive/input25.q.out
    hive/trunk/ql/src/test/results/clientpositive/input26.q.out
    hive/trunk/ql/src/test/results/clientpositive/input2_limit.q.out
    hive/trunk/ql/src/test/results/clientpositive/input31.q.out
    hive/trunk/ql/src/test/results/clientpositive/input39.q.out
    hive/trunk/ql/src/test/results/clientpositive/input42.q.out
    hive/trunk/ql/src/test/results/clientpositive/input6.q.out
    hive/trunk/ql/src/test/results/clientpositive/input9.q.out
    hive/trunk/ql/src/test/results/clientpositive/input_part1.q.out
    hive/trunk/ql/src/test/results/clientpositive/input_part5.q.out
    hive/trunk/ql/src/test/results/clientpositive/input_part6.q.out
    hive/trunk/ql/src/test/results/clientpositive/input_part7.q.out
    hive/trunk/ql/src/test/results/clientpositive/input_part9.q.out
    hive/trunk/ql/src/test/results/clientpositive/input_testxpath2.q.out
    hive/trunk/ql/src/test/results/clientpositive/input_testxpath4.q.out
    hive/trunk/ql/src/test/results/clientpositive/join0.q.out
    hive/trunk/ql/src/test/results/clientpositive/join11.q.out
    hive/trunk/ql/src/test/results/clientpositive/join12.q.out
    hive/trunk/ql/src/test/results/clientpositive/join13.q.out
    hive/trunk/ql/src/test/results/clientpositive/join14.q.out
    hive/trunk/ql/src/test/results/clientpositive/join16.q.out
    hive/trunk/ql/src/test/results/clientpositive/join19.q.out
    hive/trunk/ql/src/test/results/clientpositive/join20.q.out
    hive/trunk/ql/src/test/results/clientpositive/join21.q.out
    hive/trunk/ql/src/test/results/clientpositive/join23.q.out
    hive/trunk/ql/src/test/results/clientpositive/join26.q.out
    hive/trunk/ql/src/test/results/clientpositive/join28.q.out
    hive/trunk/ql/src/test/results/clientpositive/join32.q.out
    hive/trunk/ql/src/test/results/clientpositive/join33.q.out
    hive/trunk/ql/src/test/results/clientpositive/join34.q.out
    hive/trunk/ql/src/test/results/clientpositive/join35.q.out
    hive/trunk/ql/src/test/results/clientpositive/join38.q.out
    hive/trunk/ql/src/test/results/clientpositive/join39.q.out
    hive/trunk/ql/src/test/results/clientpositive/join4.q.out
    hive/trunk/ql/src/test/results/clientpositive/join40.q.out
    hive/trunk/ql/src/test/results/clientpositive/join5.q.out
    hive/trunk/ql/src/test/results/clientpositive/join6.q.out
    hive/trunk/ql/src/test/results/clientpositive/join7.q.out
    hive/trunk/ql/src/test/results/clientpositive/join8.q.out
    hive/trunk/ql/src/test/results/clientpositive/join9.q.out
    hive/trunk/ql/src/test/results/clientpositive/join_map_ppr.q.out
    hive/trunk/ql/src/test/results/clientpositive/lateral_view_ppd.q.out
    hive/trunk/ql/src/test/results/clientpositive/load_dyn_part10.q.out
    hive/trunk/ql/src/test/results/clientpositive/load_dyn_part13.q.out
    hive/trunk/ql/src/test/results/clientpositive/load_dyn_part2.q.out
    hive/trunk/ql/src/test/results/clientpositive/load_dyn_part3.q.out
    hive/trunk/ql/src/test/results/clientpositive/load_dyn_part4.q.out
    hive/trunk/ql/src/test/results/clientpositive/load_dyn_part9.q.out
    hive/trunk/ql/src/test/results/clientpositive/louter_join_ppr.q.out
    hive/trunk/ql/src/test/results/clientpositive/mapjoin_distinct.q.out
    hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery.q.out
    hive/trunk/ql/src/test/results/clientpositive/merge3.q.out
    hive/trunk/ql/src/test/results/clientpositive/merge4.q.out
    hive/trunk/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out
    hive/trunk/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out
    hive/trunk/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out
    hive/trunk/ql/src/test/results/clientpositive/no_hooks.q.out
    hive/trunk/ql/src/test/results/clientpositive/noalias_subq1.q.out
    hive/trunk/ql/src/test/results/clientpositive/notable_alias1.q.out
    hive/trunk/ql/src/test/results/clientpositive/notable_alias2.q.out
    hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out
    hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out
    hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out
    hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out
    hive/trunk/ql/src/test/results/clientpositive/nullgroup5.q.out
    hive/trunk/ql/src/test/results/clientpositive/outer_join_ppr.q.out
    hive/trunk/ql/src/test/results/clientpositive/pcr.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd1.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_clusterby.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_constant_expr.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_gby.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_gby2.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_gby_join.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_join.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_join2.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_join3.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_multi_insert.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_outer_join1.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_outer_join2.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_outer_join3.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_outer_join4.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_random.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_transform.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_udf_case.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppd_union.q.out
    hive/trunk/ql/src/test/results/clientpositive/ppr_pushdown3.q.out
    hive/trunk/ql/src/test/results/clientpositive/query_result_fileformat.q.out
    hive/trunk/ql/src/test/results/clientpositive/quote1.q.out
    hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
    hive/trunk/ql/src/test/results/clientpositive/rcfile_null_value.q.out
    hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate.q.out
    hive/trunk/ql/src/test/results/clientpositive/regex_col.q.out
    hive/trunk/ql/src/test/results/clientpositive/regexp_extract.q.out
    hive/trunk/ql/src/test/results/clientpositive/router_join_ppr.q.out
    hive/trunk/ql/src/test/results/clientpositive/sample1.q.out
    hive/trunk/ql/src/test/results/clientpositive/sample10.q.out
    hive/trunk/ql/src/test/results/clientpositive/sample2.q.out
    hive/trunk/ql/src/test/results/clientpositive/sample3.q.out
    hive/trunk/ql/src/test/results/clientpositive/sample4.q.out
    hive/trunk/ql/src/test/results/clientpositive/sample5.q.out
    hive/trunk/ql/src/test/results/clientpositive/sample6.q.out
    hive/trunk/ql/src/test/results/clientpositive/sample7.q.out
    hive/trunk/ql/src/test/results/clientpositive/sample8.q.out
    hive/trunk/ql/src/test/results/clientpositive/sample9.q.out
    hive/trunk/ql/src/test/results/clientpositive/semijoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/set_processor_namespaces.q.out
    hive/trunk/ql/src/test/results/clientpositive/skewjoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin9.q.out
    hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_6.q.out
    hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out
    hive/trunk/ql/src/test/results/clientpositive/stats11.q.out
    hive/trunk/ql/src/test/results/clientpositive/stats2.q.out
    hive/trunk/ql/src/test/results/clientpositive/subq.q.out
    hive/trunk/ql/src/test/results/clientpositive/subq2.q.out
    hive/trunk/ql/src/test/results/clientpositive/transform_ppr1.q.out
    hive/trunk/ql/src/test/results/clientpositive/transform_ppr2.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf1.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf9.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_10_trims.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_hour.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_isnull_isnotnull.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_like.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_lower.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_minute.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_notequal.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_parse_url.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_second.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_size.q.out
    hive/trunk/ql/src/test/results/clientpositive/union.q.out
    hive/trunk/ql/src/test/results/clientpositive/union20.q.out
    hive/trunk/ql/src/test/results/clientpositive/union22.q.out
    hive/trunk/ql/src/test/results/clientpositive/union_ppr.q.out
    hive/trunk/ql/src/test/results/compiler/plan/case_sensitivity.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/cast1.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/groupby1.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/groupby2.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/groupby3.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/groupby4.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/groupby5.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/groupby6.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input1.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input2.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input3.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input6.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input7.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input8.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input9.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input_part1.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input_testsequencefile.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input_testxpath.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input_testxpath2.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/join1.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/join2.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/join3.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/join4.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/join5.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/join6.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/join7.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/join8.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample1.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample2.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample3.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample4.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample5.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample6.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample7.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/subq.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/udf1.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/udf4.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/udf6.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/udf_case.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/udf_when.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/union.q.xml

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Jul 13 16:49:22 2011
@@ -368,6 +368,7 @@ public class HiveConf extends Configurat
     HIVEOPTCP("hive.optimize.cp", true), // column pruner
     HIVEOPTINDEXFILTER("hive.optimize.index.filter", false), // automatically use indexes
     HIVEOPTPPD("hive.optimize.ppd", true), // predicate pushdown
+    HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true),
     // push predicates down to storage handlers
     HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true),
     HIVEOPTGROUPBY("hive.optimize.groupby", true), // optimize group by

Modified: hive/trunk/contrib/src/test/results/clientpositive/dboutput.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/contrib/src/test/results/clientpositive/dboutput.q.out?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/contrib/src/test/results/clientpositive/dboutput.q.out (original)
+++ hive/trunk/contrib/src/test/results/clientpositive/dboutput.q.out Wed Jul 13 16:49:22 2011
@@ -143,21 +143,17 @@ STAGE PLANS:
               predicate:
                   expr: (key < 10)
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: (key < 10)
-                    type: boolean
-                Select Operator
-                  expressions:
-                        expr: dboutput('jdbc:derby:../build/test_dboutput_db','','','INSERT INTO app_info (kkey,vvalue) VALUES (?,?)',key,value)
-                        type: int
-                  outputColumnNames: _col0
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              Select Operator
+                expressions:
+                      expr: dboutput('jdbc:derby:../build/test_dboutput_db','','','INSERT INTO app_info (kkey,vvalue) VALUES (?,?)',key,value)
+                      type: int
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 
   Stage: Stage-0
     Fetch Operator

Modified: hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out (original)
+++ hive/trunk/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out Wed Jul 13 16:49:22 2011
@@ -50,42 +50,38 @@ STAGE PLANS:
               predicate:
                   expr: (key < 100)
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: (key < 100)
-                    type: boolean
-                Select Operator
-                  expressions:
-                        expr: UDFToByte(key)
-                        type: tinyint
-                        expr: value
-                        type: string
-                  outputColumnNames: _col0, _col1
-                  Transform Operator
-                    command: /bin/cat
-                    output info:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    Select Operator
-                      expressions:
+              Select Operator
+                expressions:
+                      expr: UDFToByte(key)
+                      type: tinyint
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                Transform Operator
+                  command: /bin/cat
+                  output info:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col1
+                            type: string
+                      sort order: ++
+                      tag: -1
+                      value expressions:
                             expr: _col0
                             type: string
                             expr: _col1
                             type: string
-                      outputColumnNames: _col0, _col1
-                      Reduce Output Operator
-                        key expressions:
-                              expr: _col0
-                              type: string
-                              expr: _col1
-                              type: string
-                        sort order: ++
-                        tag: -1
-                        value expressions:
-                              expr: _col0
-                              type: string
-                              expr: _col1
-                              type: string
       Reduce Operator Tree:
         Extract
           File Output Operator

Modified: hive/trunk/hbase-handler/src/test/results/hbase_pushdown.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/results/hbase_pushdown.q.out?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/test/results/hbase_pushdown.q.out (original)
+++ hive/trunk/hbase-handler/src/test/results/hbase_pushdown.q.out Wed Jul 13 16:49:22 2011
@@ -103,23 +103,19 @@ STAGE PLANS:
               predicate:
                   expr: (value like '%90%')
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: ((key = 90) and (value like '%90%'))
-                    type: boolean
-                Select Operator
-                  expressions:
-                        expr: key
-                        type: int
-                        expr: value
-                        type: string
-                  outputColumnNames: _col0, _col1
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 
   Stage: Stage-0
     Fetch Operator
@@ -166,23 +162,19 @@ STAGE PLANS:
               predicate:
                   expr: ((value like '%90%') and (key = UDFToInteger(value)))
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: (((key = 90) and (value like '%90%')) and (key = UDFToInteger(value)))
-                    type: boolean
-                Select Operator
-                  expressions:
-                        expr: key
-                        type: int
-                        expr: value
-                        type: string
-                  outputColumnNames: _col0, _col1
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 
   Stage: Stage-0
     Fetch Operator
@@ -217,23 +209,19 @@ STAGE PLANS:
               predicate:
                   expr: (((key = 80) and (key = 90)) and (value like '%90%'))
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: (((key = 80) and (key = 90)) and (value like '%90%'))
-                    type: boolean
-                Select Operator
-                  expressions:
-                        expr: key
-                        type: int
-                        expr: value
-                        type: string
-                  outputColumnNames: _col0, _col1
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 
   Stage: Stage-0
     Fetch Operator
@@ -300,23 +288,19 @@ STAGE PLANS:
               predicate:
                   expr: (CASE WHEN ((key = 90)) THEN (2) ELSE (4) END > 3)
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: (CASE WHEN ((key = 90)) THEN (2) ELSE (4) END > 3)
-                    type: boolean
-                Select Operator
-                  expressions:
-                        expr: key
-                        type: int
-                        expr: value
-                        type: string
-                  outputColumnNames: _col0, _col1
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 
   Stage: Stage-0
     Fetch Operator
@@ -353,23 +337,19 @@ STAGE PLANS:
               predicate:
                   expr: ((key = 80) or (value like '%90%'))
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: ((key = 80) or (value like '%90%'))
-                    type: boolean
-                Select Operator
-                  expressions:
-                        expr: key
-                        type: int
-                        expr: value
-                        type: string
-                  outputColumnNames: _col0, _col1
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 
   Stage: Stage-0
     Fetch Operator
@@ -402,23 +382,19 @@ STAGE PLANS:
               predicate:
                   expr: (key = 90)
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: (key = 90)
-                    type: boolean
-                Select Operator
-                  expressions:
-                        expr: key
-                        type: int
-                        expr: value
-                        type: string
-                  outputColumnNames: _col0, _col1
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 
   Stage: Stage-0
     Fetch Operator

Modified: hive/trunk/hbase-handler/src/test/results/hbase_queries.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/results/hbase_queries.q.out?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/test/results/hbase_queries.q.out (original)
+++ hive/trunk/hbase-handler/src/test/results/hbase_queries.q.out Wed Jul 13 16:49:22 2011
@@ -50,32 +50,28 @@ STAGE PLANS:
               predicate:
                   expr: ((key % 2) = 0)
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: ((key % 2) = 0)
-                    type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
                 Select Operator
                   expressions:
-                        expr: key
-                        type: string
-                        expr: value
+                        expr: UDFToInteger(_col0)
+                        type: int
+                        expr: _col1
                         type: string
                   outputColumnNames: _col0, _col1
-                  Select Operator
-                    expressions:
-                          expr: UDFToInteger(_col0)
-                          type: int
-                          expr: _col1
-                          type: string
-                    outputColumnNames: _col0, _col1
-                    File Output Operator
-                      compressed: false
-                      GlobalTableId: 1
-                      table:
-                          input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat
-                          output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat
-                          serde: org.apache.hadoop.hive.hbase.HBaseSerDe
-                          name: default.hbase_table_1
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 1
+                    table:
+                        input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat
+                        output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat
+                        serde: org.apache.hadoop.hive.hbase.HBaseSerDe
+                        name: default.hbase_table_1
 
 
 PREHOOK: query: FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0
@@ -306,24 +302,20 @@ STAGE PLANS:
               predicate:
                   expr: (key > 100)
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: (key > 100)
-                    type: boolean
-                Select Operator
-                  expressions:
-                        expr: key
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                outputColumnNames: _col0
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: int
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
                         type: int
-                  outputColumnNames: _col0
-                  Reduce Output Operator
-                    key expressions:
-                          expr: _col0
-                          type: int
-                    sort order: +
-                    Map-reduce partition columns:
-                          expr: _col0
-                          type: int
-                    tag: 0
+                  tag: 0
         y:hbase_table_2 
           TableScan
             alias: hbase_table_2
@@ -331,31 +323,27 @@ STAGE PLANS:
               predicate:
                   expr: (key < 120)
                   type: boolean
-              Filter Operator
-                predicate:
-                    expr: (key < 120)
-                    type: boolean
-                Select Operator
-                  expressions:
-                        expr: key
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
                         type: int
-                        expr: value
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: int
+                  tag: 1
+                  value expressions:
+                        expr: _col0
+                        type: int
+                        expr: _col1
                         type: string
-                  outputColumnNames: _col0, _col1
-                  Reduce Output Operator
-                    key expressions:
-                          expr: _col0
-                          type: int
-                    sort order: +
-                    Map-reduce partition columns:
-                          expr: _col0
-                          type: int
-                    tag: 1
-                    value expressions:
-                          expr: _col0
-                          type: int
-                          expr: _col1
-                          type: string
       Reduce Operator Tree:
         Join Operator
           condition map:

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Wed Jul 13 16:49:22 2011
@@ -675,6 +675,24 @@ public abstract class Operator<T extends
     }
   }
 
+  public void removeParent(Operator<? extends Serializable> parent) {
+    int parentIndex = parentOperators.indexOf(parent);
+    assert parentIndex != -1;
+    if (parentOperators.size() == 1) {
+      parentOperators = null;
+    } else {
+      parentOperators.remove(parentIndex);
+    }
+
+    int childIndex = parent.getChildOperators().indexOf(this);
+    assert childIndex != -1;
+    if (parent.getChildOperators().size() == 1) {
+      parent.setChildOperators(null);
+    } else {
+      parent.getChildOperators().remove(childIndex);
+    }
+  }
+
   /**
    * Replace one parent with another at the same position. Chilren of the new
    * parent are not updated

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java Wed Jul 13 16:49:22 2011
@@ -66,21 +66,30 @@ public class ExprWalkerInfo implements N
   private RowResolver toRR = null;
 
   /**
-   * this map contains a expr infos. Each key is a node in the expression tree
-   * and the information for each node is the value which is used while walking
-   * the tree by its parent.
+   * Values the expression sub-trees (predicates) that can be pushed down for
+   * root expression tree. Since there can be more than one alias in an
+   * expression tree, this is a map from the alias to predicates.
    */
   private final Map<String, List<ExprNodeDesc>> pushdownPreds;
+
   /**
-   * Values the expression sub-trees (predicates) that can be pushed down for
+   * Values the expression sub-trees (predicates) that can not be pushed down for
    * root expression tree. Since there can be more than one alias in an
    * expression tree, this is a map from the alias to predicates.
    */
+  private final Map<String, List<ExprNodeDesc>> nonFinalPreds;
+
+  /**
+   * this map contains a expr infos. Each key is a node in the expression tree
+   * and the information for each node is the value which is used while walking
+   * the tree by its parent.
+   */
   private final Map<ExprNodeDesc, ExprInfo> exprInfoMap;
   private boolean isDeterministic = true;
 
   public ExprWalkerInfo() {
     pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
+    nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
     exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
   }
 
@@ -91,6 +100,7 @@ public class ExprWalkerInfo implements N
 
     pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
     exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
+    nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
   }
 
   /**
@@ -214,6 +224,19 @@ public class ExprWalkerInfo implements N
   }
 
   /**
+   * Adds the passed list of pushDowns for the alias.
+   *
+   * @param alias
+   * @param pushDowns
+   */
+  public void addPushDowns(String alias, List<ExprNodeDesc> pushDowns) {
+    if (pushdownPreds.get(alias) == null) {
+      pushdownPreds.put(alias, new ArrayList<ExprNodeDesc>());
+    }
+    pushdownPreds.get(alias).addAll(pushDowns);
+  }
+
+  /**
    * Returns the list of pushdown expressions for each alias that appear in the
    * current operator's RowResolver. The exprs in each list can be combined
    * using conjunction (AND).
@@ -225,6 +248,28 @@ public class ExprWalkerInfo implements N
   }
 
   /**
+   * Adds the specified expr as a non-final candidate
+   *
+   * @param expr
+   */
+  public void addNonFinalCandidate(ExprNodeDesc expr) {
+    String alias = getAlias(expr);
+    if (nonFinalPreds.get(alias) == null) {
+      nonFinalPreds.put(alias, new ArrayList<ExprNodeDesc>());
+    }
+    nonFinalPreds.get(alias).add(expr.clone());
+  }
+
+  /**
+   * Returns list of non-final candidate predicate for each map.
+   *
+   * @return
+   */
+  public Map<String, List<ExprNodeDesc>> getNonFinalCandidates() {
+    return nonFinalPreds;
+  }
+
+  /**
    * Merges the specified pushdown predicates with the current class.
    * 
    * @param ewi

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java Wed Jul 13 16:49:22 2011
@@ -26,6 +26,7 @@ import java.util.Stack;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -277,9 +278,10 @@ public final class ExprWalkerProcFactory
 
     egw.startWalking(startNodes, null);
 
+    HiveConf conf = opContext.getParseContext().getConf();
     // check the root expression for final candidates
     for (ExprNodeDesc pred : clonedPreds) {
-      extractFinalCandidates(pred, exprContext);
+      extractFinalCandidates(pred, exprContext, conf);
     }
     return exprContext;
   }
@@ -289,17 +291,20 @@ public final class ExprWalkerProcFactory
    * candidates.
    */
   private static void extractFinalCandidates(ExprNodeDesc expr,
-      ExprWalkerInfo ctx) {
+      ExprWalkerInfo ctx, HiveConf conf) {
     if (ctx.isCandidate(expr)) {
       ctx.addFinalCandidate(expr);
       return;
+    } else if (!FunctionRegistry.isOpAnd(expr) &&
+        HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
+      ctx.addNonFinalCandidate(expr);
     }
 
     if (FunctionRegistry.isOpAnd(expr)) {
       // If the operator is AND, we need to determine if any of the children are
       // final candidates.
       for (Node ch : expr.getChildren()) {
-        extractFinalCandidates((ExprNodeDesc) ch, ctx);
+        extractFinalCandidates((ExprNodeDesc) ch, ctx, conf);
       }
     }
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java Wed Jul 13 16:49:22 2011
@@ -88,6 +88,26 @@ public final class OpProcFactory {
       // script operator is a black-box to hive so no optimization here
       // assuming that nothing can be pushed above the script op
       // same with LIMIT op
+      // create a filter with all children predicates
+      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
+      if (HiveConf.getBoolVar(owi.getParseContext().getConf(),
+          HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
+        ExprWalkerInfo unpushedPreds = mergeChildrenPred(nd, owi, null, false);
+        return createFilter((Operator)nd, unpushedPreds, owi);
+      }
+      return null;
+    }
+
+  }
+
+  public static class UDTFPPD extends DefaultPPD implements NodeProcessor {
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      LOG.info("Processing for " + nd.getName() + "("
+          + ((Operator) nd).getIdentifier() + ")");
+      //Predicates for UDTF wont be candidates for its children. So, nothing to
+      //optimize here. See lateral_view_ppd.q for example.
       return null;
     }
 
@@ -102,6 +122,10 @@ public final class OpProcFactory {
           + ((Operator) nd).getIdentifier() + ")");
       OpWalkerInfo owi = (OpWalkerInfo) procCtx;
 
+      // The lateral view forward operator has 2 children, a SELECT(*) and
+      // a SELECT(cols) (for the UDTF operator) The child at index 0 is the
+      // SELECT(*) because that's the way that the DAG was constructed. We
+      // only want to get the predicates from the SELECT(*).
       ExprWalkerInfo childPreds = owi
       .getPrunedPreds((Operator<? extends Serializable>) nd.getChildren()
       .get(0));
@@ -146,22 +170,32 @@ public final class OpProcFactory {
       OpWalkerInfo owi = (OpWalkerInfo) procCtx;
       Operator<? extends Serializable> op = (Operator<? extends Serializable>) nd;
       ExprNodeDesc predicate = (((FilterOperator) nd).getConf()).getPredicate();
-      // get pushdown predicates for this operator's predicate
-      ExprWalkerInfo ewi = ExprWalkerProcFactory.extractPushdownPreds(owi, op,
-          predicate);
-      if (!ewi.isDeterministic()) {
-        /* predicate is not deterministic */
-        if (op.getChildren() != null && op.getChildren().size() == 1) {
-          createFilter(op, owi
-              .getPrunedPreds((Operator<? extends Serializable>) (op
-              .getChildren().get(0))), owi);
+      ExprWalkerInfo ewi = new ExprWalkerInfo();
+      // Don't push a sampling predicate since createFilter() always creates filter
+      // with isSamplePred = false. Also, the filterop with sampling pred is always
+      // a child of TableScan, so there is no need to push this predicate.
+      if (!((FilterOperator)op).getConf().getIsSamplingPred()) {
+        // get pushdown predicates for this operator's predicate
+        ewi = ExprWalkerProcFactory.extractPushdownPreds(owi, op, predicate);
+        if (!ewi.isDeterministic()) {
+          /* predicate is not deterministic */
+          if (op.getChildren() != null && op.getChildren().size() == 1) {
+            createFilter(op, owi
+                .getPrunedPreds((Operator<? extends Serializable>) (op
+                .getChildren().get(0))), owi);
+          }
+          return null;
         }
-
-        return null;
+        if (HiveConf.getBoolVar(owi.getParseContext().getConf(),
+            HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
+          // add this filter for deletion, if it does not have non-final candidates
+          if (ewi.getNonFinalCandidates().values().isEmpty()) {
+            owi.addCandidateFilterOp((FilterOperator)op);
+          }
+        }
+        logExpr(nd, ewi);
+        owi.putPrunedPreds((Operator<? extends Serializable>) nd, ewi);
       }
-
-      logExpr(nd, ewi);
-      owi.putPrunedPreds(op, ewi);
       // merge it with children predicates
       mergeWithChildrenPred(op, owi, ewi, null, false);
 
@@ -182,7 +216,15 @@ public final class OpProcFactory {
       OpWalkerInfo owi = (OpWalkerInfo) procCtx;
       Set<String> aliases = getQualifiedAliases((JoinOperator) nd, owi
           .getRowResolver(nd));
-      mergeWithChildrenPred(nd, owi, null, aliases, false);
+      boolean hasUnpushedPredicates = mergeWithChildrenPred(nd, owi, null, aliases, false);
+      if (HiveConf.getBoolVar(owi.getParseContext().getConf(),
+          HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
+        if (hasUnpushedPredicates) {
+          aliases = null;
+        }
+        ExprWalkerInfo unpushedPreds = mergeChildrenPred(nd, owi, aliases, false);
+        return createFilter((Operator)nd, unpushedPreds, owi);
+      }
       return null;
     }
 
@@ -283,7 +325,15 @@ public final class OpProcFactory {
         Object... nodeOutputs) throws SemanticException {
       LOG.info("Processing for " + nd.getName() + "("
           + ((Operator) nd).getIdentifier() + ")");
-      mergeWithChildrenPred(nd, (OpWalkerInfo) procCtx, null, null, false);
+      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
+      boolean hasUnpushedPredicates = mergeWithChildrenPred(nd, owi, null, null, false);
+      if (HiveConf.getBoolVar(owi.getParseContext().getConf(),
+          HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
+        if (hasUnpushedPredicates) {
+          ExprWalkerInfo unpushedPreds = mergeChildrenPred(nd, owi, null, false);
+          return createFilter((Operator)nd, unpushedPreds, owi);
+        }
+      }
       return null;
     }
 
@@ -318,20 +368,21 @@ public final class OpProcFactory {
      * @param ignoreAliases
      * @throws SemanticException
      */
-    protected void mergeWithChildrenPred(Node nd, OpWalkerInfo owi,
+    protected boolean mergeWithChildrenPred(Node nd, OpWalkerInfo owi,
         ExprWalkerInfo ewi, Set<String> aliases, boolean ignoreAliases)
         throws SemanticException {
+      boolean hasUnpushedPredicates = false;
       if (nd.getChildren() == null || nd.getChildren().size() > 1) {
         // ppd for multi-insert query is not yet implemented
         // no-op for leafs
-        return;
+        return hasUnpushedPredicates;
       }
       Operator<? extends Serializable> op = (Operator<? extends Serializable>) nd;
       ExprWalkerInfo childPreds = owi
           .getPrunedPreds((Operator<? extends Serializable>) nd.getChildren()
           .get(0));
       if (childPreds == null) {
-        return;
+        return hasUnpushedPredicates;
       }
       if (ewi == null) {
         ewi = new ExprWalkerInfo();
@@ -344,11 +395,40 @@ public final class OpProcFactory {
           // input8.q
           ExprWalkerInfo extractPushdownPreds = ExprWalkerProcFactory
               .extractPushdownPreds(owi, op, e.getValue());
+          if (!extractPushdownPreds.getNonFinalCandidates().isEmpty()) {
+            hasUnpushedPredicates = true;
+          }
           ewi.merge(extractPushdownPreds);
           logExpr(nd, extractPushdownPreds);
         }
       }
       owi.putPrunedPreds((Operator<? extends Serializable>) nd, ewi);
+      return hasUnpushedPredicates;
+    }
+
+    protected ExprWalkerInfo mergeChildrenPred(Node nd, OpWalkerInfo owi,
+        Set<String> excludedAliases, boolean ignoreAliases)
+        throws SemanticException {
+      if (nd.getChildren() == null) {
+        return null;
+      }
+      Operator<? extends Serializable> op = (Operator<? extends Serializable>) nd;
+      ExprWalkerInfo ewi = new ExprWalkerInfo();
+      for (Operator<? extends Serializable> child : op.getChildOperators()) {
+        ExprWalkerInfo childPreds = owi.getPrunedPreds(child);
+        if (childPreds == null) {
+          continue;
+        }
+        for (Entry<String, List<ExprNodeDesc>> e : childPreds
+            .getFinalCandidates().entrySet()) {
+          if (ignoreAliases || excludedAliases == null ||
+              !excludedAliases.contains(e.getKey()) || e.getKey() == null) {
+            ewi.addPushDowns(e.getKey(), e.getValue());
+            logExpr(nd, ewi);
+          }
+        }
+      }
+      return ewi;
     }
   }
 
@@ -386,7 +466,7 @@ public final class OpProcFactory {
     if (condn == null) {
       return null;
     }
-    
+
     if (op instanceof TableScanOperator) {
       boolean pushFilterToStorage;
       HiveConf hiveConf = owi.getParseContext().getConf();
@@ -423,6 +503,24 @@ public final class OpProcFactory {
     }
     OpParseContext ctx = new OpParseContext(inputRR);
     owi.put(output, ctx);
+
+    if (HiveConf.getBoolVar(owi.getParseContext().getConf(),
+        HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
+      // remove the candidate filter ops
+      for (FilterOperator fop : owi.getCandidateFilterOps()) {
+        List<Operator<? extends Serializable>> children = fop.getChildOperators();
+        List<Operator<? extends Serializable>> parents = fop.getParentOperators();
+        for (Operator<? extends Serializable> parent : parents) {
+          parent.getChildOperators().addAll(children);
+          parent.removeChild(fop);
+        }
+        for (Operator<? extends Serializable> child : children) {
+          child.getParentOperators().addAll(parents);
+          child.removeParent(fop);
+        }
+      }
+      owi.getCandidateFilterOps().clear();
+    }
     return output;
   }
 
@@ -506,7 +604,7 @@ public final class OpProcFactory {
     tableScanDesc.setFilterExpr(decomposed.pushedPredicate);
     return decomposed.residualPredicate;
   }
-  
+
   public static NodeProcessor getFilterProc() {
     return new FilterPPD();
   }
@@ -536,7 +634,7 @@ public final class OpProcFactory {
   }
 
   public static NodeProcessor getUDTFProc() {
-    return new ScriptPPD();
+    return new UDTFPPD();
   }
 
   public static NodeProcessor getLVFProc() {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java Wed Jul 13 16:49:22 2011
@@ -18,9 +18,12 @@
 package org.apache.hadoop.hive.ql.ppd;
 
 import java.io.Serializable;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
@@ -39,11 +42,13 @@ public class OpWalkerInfo implements Nod
   private final HashMap<Operator<? extends Serializable>, ExprWalkerInfo> opToPushdownPredMap;
   private final Map<Operator<? extends Serializable>, OpParseContext> opToParseCtxMap;
   private final ParseContext pGraphContext;
+  private final List<FilterOperator> candidateFilterOps;
 
   public OpWalkerInfo(ParseContext pGraphContext) {
     this.pGraphContext = pGraphContext;
     opToParseCtxMap = pGraphContext.getOpParseCtx();
     opToPushdownPredMap = new HashMap<Operator<? extends Serializable>, ExprWalkerInfo>();
+    candidateFilterOps = new ArrayList<FilterOperator>();
   }
 
   public ExprWalkerInfo getPrunedPreds(Operator<? extends Serializable> op) {
@@ -67,4 +72,13 @@ public class OpWalkerInfo implements Nod
   public ParseContext getParseContext() {
     return pGraphContext;
   }
+
+  public List<FilterOperator> getCandidateFilterOps() {
+    return candidateFilterOps;
+  }
+
+  public void addCandidateFilterOp(FilterOperator fop) {
+    candidateFilterOps.add(fop);
+  }
+
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java Wed Jul 13 16:49:22 2011
@@ -17,13 +17,10 @@
  */
 package org.apache.hadoop.hive.ql.ppd;
 
-import java.io.Serializable;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.Map;
 
-import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -33,7 +30,6 @@ import org.apache.hadoop.hive.ql.lib.Nod
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.optimizer.Transform;
-import org.apache.hadoop.hive.ql.parse.OpParseContext;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 
@@ -73,12 +69,10 @@ import org.apache.hadoop.hive.ql.parse.S
 public class PredicatePushDown implements Transform {
 
   private ParseContext pGraphContext;
-  private HashMap<Operator<? extends Serializable>, OpParseContext> opToParseCtxMap;
 
   @Override
   public ParseContext transform(ParseContext pctx) throws SemanticException {
     pGraphContext = pctx;
-    opToParseCtxMap = pGraphContext.getOpParseCtx();
 
     // create a the context for walking operators
     OpWalkerInfo opWalkerInfo = new OpWalkerInfo(pGraphContext);

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd1.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd1.q Wed Jul 13 16:49:22 2011
@@ -1,7 +1,14 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
 
 EXPLAIN
 SELECT src.key as c3 from src where src.key > '2';
 
 SELECT src.key as c3 from src where src.key > '2';
 
+set hive.ppd.remove.duplicatefilters=true;
+
+EXPLAIN
+SELECT src.key as c3 from src where src.key > '2';
+
+SELECT src.key as c3 from src where src.key > '2';

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_clusterby.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_clusterby.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_clusterby.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_clusterby.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,15 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
+EXPLAIN
+SELECT * FROM SRC x where x.key = 10 CLUSTER BY x.key;
+SELECT * FROM SRC x where x.key = 10 CLUSTER BY x.key;
+
+EXPLAIN 
+SELECT x.key, x.value as v1, y.key  FROM SRC x JOIN SRC y ON (x.key = y.key)  where x.key = 20 CLUSTER BY v1;;
+SELECT x.key, x.value as v1, y.key  FROM SRC x JOIN SRC y ON (x.key = y.key) where x.key = 20 CLUSTER BY v1;
+
+set hive.ppd.remove.duplicatefilters=true;
 
 EXPLAIN
 SELECT * FROM SRC x where x.key = 10 CLUSTER BY x.key;

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_constant_expr.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_constant_expr.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_constant_expr.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_constant_expr.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,6 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
 CREATE TABLE ppd_constant_expr(c1 STRING, c2 INT, c3 DOUBLE) STORED AS TEXTFILE;
 
 EXPLAIN
@@ -10,4 +12,13 @@ INSERT OVERWRITE TABLE ppd_constant_expr
 
 SELECT ppd_constant_expr.* FROM ppd_constant_expr;
 
+set hive.ppd.remove.duplicatefilters=true;
+
+EXPLAIN
+FROM src1 
+INSERT OVERWRITE TABLE ppd_constant_expr SELECT 4 + NULL, src1.key - NULL, NULL + NULL;
 
+FROM src1 
+INSERT OVERWRITE TABLE ppd_constant_expr SELECT 4 + NULL, src1.key - NULL, NULL + NULL;
+
+SELECT ppd_constant_expr.* FROM ppd_constant_expr;

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_gby.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_gby.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_gby.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_gby.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,18 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
+EXPLAIN
+SELECT src1.c1 
+FROM
+(SELECT src.value as c1, count(src.key) as c2 from src where src.value > 'val_10' group by src.value) src1
+WHERE src1.c1 > 'val_200' and (src1.c2 > 30 or src1.c1 < 'val_400'); 
+
+SELECT src1.c1 
+FROM
+(SELECT src.value as c1, count(src.key) as c2 from src where src.value > 'val_10' group by src.value) src1
+WHERE src1.c1 > 'val_200' and (src1.c2 > 30 or src1.c1 < 'val_400'); 
+
+set hive.ppd.remove.duplicatefilters=true;
 
 EXPLAIN
 SELECT src1.c1 

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_gby2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_gby2.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_gby2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_gby2.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,20 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
+EXPLAIN
+SELECT max(src1.c1), src1.c2 
+FROM
+(SELECT src.value AS c1, count(src.key) AS c2 FROM src WHERE src.value > 'val_10' GROUP BY src.value) src1
+WHERE src1.c1 > 'val_200' AND (src1.c2 > 30 OR src1.c1 < 'val_400')
+GROUP BY src1.c2; 
+
+SELECT max(src1.c1), src1.c2 
+FROM
+(SELECT src.value AS c1, count(src.key) AS c2 FROM src WHERE src.value > 'val_10' GROUP BY src.value) src1
+WHERE src1.c1 > 'val_200' AND (src1.c2 > 30 OR src1.c1 < 'val_400')
+GROUP BY src1.c2; 
+
+set hive.ppd.remove.duplicatefilters=true;
 
 EXPLAIN
 SELECT max(src1.c1), src1.c2 

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_gby_join.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_gby_join.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_gby_join.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_gby_join.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,5 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
 
 EXPLAIN
 SELECT src1.c1, count(1) 
@@ -10,3 +11,14 @@ ON src1.c1 = src2.c3 AND src1.c1 < '400'
 WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '50' OR src1.c1 < '50') AND (src2.c3 <> '4')
 GROUP BY src1.c1;
 
+set hive.ppd.remove.duplicatefilters=true;
+
+EXPLAIN
+SELECT src1.c1, count(1) 
+FROM
+(SELECT src.key AS c1, src.value AS c2 from src where src.key > '1' ) src1
+JOIN
+(SELECT src.key AS c3, src.value AS c4 from src where src.key > '2' ) src2
+ON src1.c1 = src2.c3 AND src1.c1 < '400'
+WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '50' OR src1.c1 < '50') AND (src2.c3 <> '4')
+GROUP BY src1.c1;

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_join.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_join.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_join.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_join.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,24 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
+EXPLAIN
+SELECT src1.c1, src2.c4 
+FROM
+(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1
+JOIN
+(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2
+ON src1.c1 = src2.c3 AND src1.c1 < '400'
+WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4');
+
+SELECT src1.c1, src2.c4 
+FROM
+(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1
+JOIN
+(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2
+ON src1.c1 = src2.c3 AND src1.c1 < '400'
+WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4');
+
+set hive.ppd.remove.duplicatefilters=true;
 
 EXPLAIN
 SELECT src1.c1, src2.c4 

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_join2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_join2.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_join2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_join2.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,30 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
+EXPLAIN
+SELECT src1.c1, src2.c4 
+FROM
+(SELECT src.key as c1, src.value as c2 from src where src.key <> '302' ) src1
+JOIN
+(SELECT src.key as c3, src.value as c4 from src where src.key <> '305' ) src2
+ON src1.c1 = src2.c3 AND src1.c1 < '400'
+JOIN
+(SELECT src.key as c5, src.value as c6 from src where src.key <> '306' ) src3
+ON src1.c2 = src3.c6
+WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13);
+
+SELECT src1.c1, src2.c4 
+FROM
+(SELECT src.key as c1, src.value as c2 from src where src.key <> '302' ) src1
+JOIN
+(SELECT src.key as c3, src.value as c4 from src where src.key <> '305' ) src2
+ON src1.c1 = src2.c3 AND src1.c1 < '400'
+JOIN
+(SELECT src.key as c5, src.value as c6 from src where src.key <> '306' ) src3
+ON src1.c2 = src3.c6
+WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13);
+
+set hive.ppd.remove.duplicatefilters=true;
 
 EXPLAIN
 SELECT src1.c1, src2.c4 

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_join3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_join3.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_join3.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_join3.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,30 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
+EXPLAIN
+SELECT src1.c1, src2.c4 
+FROM
+(SELECT src.key as c1, src.value as c2 from src where src.key <> '11' ) src1
+JOIN
+(SELECT src.key as c3, src.value as c4 from src where src.key <> '12' ) src2
+ON src1.c1 = src2.c3 AND src1.c1 < '400'
+JOIN
+(SELECT src.key as c5, src.value as c6 from src where src.key <> '13' ) src3
+ON src1.c1 = src3.c5
+WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1');
+
+SELECT src1.c1, src2.c4 
+FROM
+(SELECT src.key as c1, src.value as c2 from src where src.key <> '11' ) src1
+JOIN
+(SELECT src.key as c3, src.value as c4 from src where src.key <> '12' ) src2
+ON src1.c1 = src2.c3 AND src1.c1 < '400'
+JOIN
+(SELECT src.key as c5, src.value as c6 from src where src.key <> '13' ) src3
+ON src1.c1 = src3.c5
+WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1');
+
+set hive.ppd.remove.duplicatefilters=true;
 
 EXPLAIN
 SELECT src1.c1, src2.c4 

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_multi_insert.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_multi_insert.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_multi_insert.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_multi_insert.q Wed Jul 13 16:49:22 2011
@@ -1,6 +1,5 @@
 set hive.optimize.ppd=true;
-
-
+set hive.ppd.remove.duplicatefilters=false;
 
 CREATE TABLE mi1(key INT, value STRING) STORED AS TEXTFILE;
 CREATE TABLE mi2(key INT, value STRING) STORED AS TEXTFILE;
@@ -25,5 +24,22 @@ SELECT mi3.* FROM mi3;
 dfs -cat ../build/ql/test/data/warehouse/mi4.out/*;
 
 
+set hive.ppd.remove.duplicatefilters=true;
+
+EXPLAIN
+FROM src a JOIN src b ON (a.key = b.key)
+INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100
+INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < 200
+INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300
+INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
 
+FROM src a JOIN src b ON (a.key = b.key)
+INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100
+INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < 200
+INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300
+INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
 
+SELECT mi1.* FROM mi1;
+SELECT mi2.* FROM mi2;
+SELECT mi3.* FROM mi3;
+dfs -cat ../build/ql/test/data/warehouse/mi4.out/*;

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join1.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join1.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,5 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
 
 EXPLAIN
  FROM 
@@ -17,3 +18,21 @@ EXPLAIN
  SELECT a.key, a.value, b.key, b.value
  WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25;
 
+set hive.ppd.remove.duplicatefilters=true;
+
+EXPLAIN
+ FROM 
+  src a
+ LEFT OUTER JOIN 
+  src b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25;
+
+ FROM 
+  src a
+ LEFT OUTER JOIN 
+  src b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25;

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join2.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join2.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,5 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
 
 EXPLAIN
  FROM 
@@ -17,3 +18,21 @@ EXPLAIN
  SELECT a.key, a.value, b.key, b.value
  WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25';
 
+set hive.ppd.remove.duplicatefilters=true;
+
+EXPLAIN
+ FROM 
+  src a
+ RIGHT OUTER JOIN 
+  src b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25';
+
+ FROM 
+  src a
+ RIGHT OUTER JOIN 
+  src b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25';

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join3.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join3.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join3.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,5 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
 
 EXPLAIN
  FROM 
@@ -17,5 +18,21 @@ EXPLAIN
  SELECT a.key, a.value, b.key, b.value
  WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25';
 
+set hive.ppd.remove.duplicatefilters=true;
 
+EXPLAIN
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  src b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25';
 
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  src b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25';

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join4.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join4.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_outer_join4.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,5 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
 
 EXPLAIN
  FROM 
@@ -23,3 +24,27 @@ EXPLAIN
  SELECT a.key, a.value, b.key, b.value, c.key
  WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 ;
 
+set hive.ppd.remove.duplicatefilters=true;
+
+EXPLAIN
+ FROM 
+  src a
+ LEFT OUTER JOIN
+  src b
+ ON (a.key = b.key)
+ RIGHT OUTER JOIN 
+  src c 
+ ON (a.key = c.key)
+ SELECT a.key, a.value, b.key, b.value, c.key
+ WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 ;
+
+ FROM 
+  src a
+ LEFT OUTER JOIN
+  src b
+ ON (a.key = b.key)
+ RIGHT OUTER JOIN 
+  src c 
+ ON (a.key = c.key)
+ SELECT a.key, a.value, b.key, b.value, c.key
+ WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 ;

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_random.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_random.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_random.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_random.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,16 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
+EXPLAIN
+SELECT src1.c1, src2.c4 
+FROM
+(SELECT src.key as c1, src.value as c2 from src ) src1
+JOIN
+(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2
+ON src1.c1 = src2.c3
+WHERE rand() > 0.5;
+
+set hive.ppd.remove.duplicatefilters=true;
 
 EXPLAIN
 SELECT src1.c1, src2.c4 

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_transform.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_transform.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_transform.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_transform.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,5 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
 
 EXPLAIN
 FROM (
@@ -17,3 +18,21 @@ FROM (
 ) tmap
 SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100;
 
+set hive.ppd.remove.duplicatefilters=true;
+
+EXPLAIN
+FROM (
+  FROM src
+  SELECT TRANSFORM(src.key, src.value)
+         USING '/bin/cat' AS (tkey, tvalue) 
+  CLUSTER BY tkey 
+) tmap
+SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100;
+
+FROM (
+  FROM src
+  SELECT TRANSFORM(src.key, src.value)
+         USING '/bin/cat' AS (tkey, tvalue) 
+  CLUSTER BY tkey 
+) tmap
+SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100;

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_udf_case.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_udf_case.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_udf_case.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_udf_case.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,32 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
+EXPLAIN
+SELECT *
+FROM srcpart a JOIN srcpart b
+ON a.key = b.key
+WHERE a.ds = '2008-04-08' AND
+      b.ds = '2008-04-08' AND
+      CASE a.key
+        WHEN '27' THEN TRUE
+        WHEN '38' THEN FALSE
+        ELSE NULL
+       END
+ORDER BY a.key, a.value, a.ds, a.hr, b.key, b.value, b.ds, b.hr;
+
+SELECT *
+FROM srcpart a JOIN srcpart b
+ON a.key = b.key
+WHERE a.ds = '2008-04-08' AND
+      b.ds = '2008-04-08' AND
+      CASE a.key
+        WHEN '27' THEN TRUE
+        WHEN '38' THEN FALSE
+        ELSE NULL
+       END
+ORDER BY a.key, a.value, a.ds, a.hr, b.key, b.value, b.ds, b.hr;
+
+set hive.ppd.remove.duplicatefilters=true;
 
 EXPLAIN
 SELECT *

Modified: hive/trunk/ql/src/test/queries/clientpositive/ppd_union.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_union.q?rev=1146129&r1=1146128&r2=1146129&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_union.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_union.q Wed Jul 13 16:49:22 2011
@@ -1,4 +1,5 @@
 set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
 
 EXPLAIN
 FROM (
@@ -17,4 +18,21 @@ FROM (
 SELECT unioned_query.*
   WHERE key > '4' and value > 'val_4';
 
+set hive.ppd.remove.duplicatefilters=true;
 
+EXPLAIN
+FROM (
+  FROM src select src.key, src.value WHERE src.key < '100'
+    UNION ALL
+  FROM src SELECT src.* WHERE src.key > '150'
+) unioned_query
+SELECT unioned_query.*
+  WHERE key > '4' and value > 'val_4';
+
+FROM (
+  FROM src select src.key, src.value WHERE src.key < '100'
+    UNION ALL
+  FROM src SELECT src.* WHERE src.key > '150'
+) unioned_query
+SELECT unioned_query.*
+  WHERE key > '4' and value > 'val_4';