You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2015/08/27 09:51:00 UTC

[1/2] hive git commit: HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)

Repository: hive
Updated Branches:
  refs/heads/master 037fb02a8 -> b247cac4f


http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out
new file mode 100644
index 0000000..55edd90
--- /dev/null
+++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
@@ -0,0 +1,1647 @@
+PREHOOK: query: drop table pcr_t1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table pcr_t2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table pcr_t3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t3
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@pcr_t1
+POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@pcr_t1
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds=2000-04-08
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds=2000-04-08
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds=2000-04-09
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds=2000-04-09
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds=2000-04-10
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds=2000-04-10
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: create table pcr_t2 (ds string, key int, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: create table pcr_t2 (ds string, key int, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@pcr_t2
+PREHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Input: default@pcr_t1@ds=2000-04-08
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
+POSTHOOK: Output: default@pcr_t2
+POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Input: default@pcr_t1@ds=2000-04-08
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
+POSTHOOK: Output: default@pcr_t2
+POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: explain extended
+select key, value, ds
+from pcr_t1
+where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
+order by key, value, ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select key, value, ds
+from pcr_t1
+where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
+order by key, value, ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_TABREF
+         TOK_TABNAME
+            pcr_t1
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               key
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               value
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               ds
+      TOK_WHERE
+         or
+            and
+               =
+                  TOK_TABLE_OR_COL
+                     ds
+                  '2000-04-08'
+               =
+                  TOK_TABLE_OR_COL
+                     key
+                  1
+            and
+               =
+                  TOK_TABLE_OR_COL
+                     ds
+                  '2000-04-09'
+               =
+                  TOK_TABLE_OR_COL
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               key
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               value
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               ds
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: pcr_t1
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
+              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: int), value (type: string), ds (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+                  sort order: +++
+                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                  tag: -1
+                  auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [pcr_t1]
+        /pcr_t1/ds=2000-04-09 [pcr_t1]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types int:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
+order by t1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
+order by t1.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t2
+         and
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-08'
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     t2
+                  ds
+               '2000-04-08'
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 0
+                value expressions: value (type: string)
+                auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 1
+                value expressions: value (type: string)
+                auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1, t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 key (type: int)
+            1 key (type: int)
+          outputColumnNames: _col0, _col1, _col6, _col7
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
+            outputColumnNames: _col0, _col1, _col3, _col4
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1,_col3,_col4
+                    columns.types int,string,int,string
+                    escape.delim \
+                    serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col3,_col4
+              columns.types int,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col3,_col4
+                columns.types int,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-08' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:int:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
+order by t1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
+order by t1.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t2
+         and
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-08'
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     t2
+                  ds
+               '2000-04-09'
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 0
+                value expressions: value (type: string)
+                auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 1
+                value expressions: value (type: string)
+                auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1]
+        /pcr_t1/ds=2000-04-09 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 key (type: int)
+            1 key (type: int)
+          outputColumnNames: _col0, _col1, _col6, _col7
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
+            outputColumnNames: _col0, _col1, _col3, _col4
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1,_col3,_col4
+                    columns.types int,string,int,string
+                    escape.delim \
+                    serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col3,_col4
+              columns.types int,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col3,_col4
+                columns.types int,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-09' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:int:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t2
+            t2
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_WHERE
+         or
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-08'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+                  1
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-09'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               key
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               value
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               ds
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+              tag: 0
+              value expressions: key (type: int), value (type: string), ds (type: string)
+              auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              tag: 1
+              value expressions: ds (type: string), key (type: int), value (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: pcr_t2
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns ds,key,value
+              columns.comments 
+              columns.types string:int:string
+#### A masked pattern was here ####
+              name default.pcr_t2
+              numFiles 1
+              numRows 1
+              rawDataSize 18
+              serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 19
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE true
+                bucket_count -1
+                columns ds,key,value
+                columns.comments 
+                columns.types string:int:string
+#### A masked pattern was here ####
+                name default.pcr_t2
+                numFiles 1
+                numRows 1
+                rawDataSize 18
+                serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 19
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t2
+            name: default.pcr_t2
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1]
+        /pcr_t1/ds=2000-04-09 [t1]
+        /pcr_t2 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
+          Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            isSamplingPred: false
+            predicate: ((_col2) IN ('2000-04-08', '2000-04-09') and (struct(_col7,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean)
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns _col0,_col1,_col2,_col3,_col4,_col5
+                      columns.types int,string,string,string,int,string
+                      escape.delim \
+                      serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
+              sort order: +++
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col2,_col3,_col4,_col5
+              columns.types int,string,string,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col2,_col3,_col4,_col5
+                columns.types int,string,string,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:string:int:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
+order by t1.key, t1.value, t2.ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
+order by t1.key, t1.value, t2.ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t2
+            t2
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_WHERE
+         or
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     ds
+                  '2000-04-08'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  1
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     ds
+                  '2000-04-09'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               value
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               ds
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+              tag: 0
+              value expressions: key (type: int), value (type: string), ds (type: string)
+              auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              tag: 1
+              value expressions: ds (type: string), key (type: int), value (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-10
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-10
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: pcr_t2
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns ds,key,value
+              columns.comments 
+              columns.types string:int:string
+#### A masked pattern was here ####
+              name default.pcr_t2
+              numFiles 1
+              numRows 1
+              rawDataSize 18
+              serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 19
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE true
+                bucket_count -1
+                columns ds,key,value
+                columns.comments 
+                columns.types string:int:string
+#### A masked pattern was here ####
+                name default.pcr_t2
+                numFiles 1
+                numRows 1
+                rawDataSize 18
+                serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 19
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t2
+            name: default.pcr_t2
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1]
+        /pcr_t1/ds=2000-04-09 [t1]
+        /pcr_t1/ds=2000-04-10 [t1]
+        /pcr_t2 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
+          Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            isSamplingPred: false
+            predicate: (struct(_col0,_col6)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
+            Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+              Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns _col0,_col1,_col2,_col3,_col4,_col5
+                      columns.types int,string,string,string,int,string
+                      escape.delim \
+                      serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+              sort order: +++
+              Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col2,_col3,_col4,_col5
+              columns.types int,string,string,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col2,_col3,_col4,_col5
+                columns.types int,string,string,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:string:int:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table pcr_t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Output: default@pcr_t1
+POSTHOOK: query: drop table pcr_t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Output: default@pcr_t1
+PREHOOK: query: drop table pcr_t2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@pcr_t2
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: drop table pcr_t2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@pcr_t2
+POSTHOOK: Output: default@pcr_t2
+PREHOOK: query: drop table pcr_t3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t3
+POSTHOOK: type: DROPTABLE

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/ppd_transform.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out b/ql/src/test/results/clientpositive/ppd_transform.q.out
index f536767..17248e4 100644
--- a/ql/src/test/results/clientpositive/ppd_transform.q.out
+++ b/ql/src/test/results/clientpositive/ppd_transform.q.out
@@ -390,21 +390,21 @@ STAGE PLANS:
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: (_col0) IN ('a', 'b') (type: boolean)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 Filter Operator
-                  predicate: (_col0) IN ('c', 'd') (type: boolean)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out
index 5aa0df8..fb08f10 100644
--- a/ql/src/test/results/clientpositive/spark/pcr.q.out
+++ b/ql/src/test/results/clientpositive/spark/pcr.q.out
@@ -2534,16 +2534,16 @@ STAGE PLANS:
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
-                    Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
+                    Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string), ds (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                         sort order: +++
-                        Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                         tag: -1
                         auto parallelism: false
             Path -> Alias:
@@ -2648,13 +2648,13 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
 #### A masked pattern was here ####
                   NumFilesPerFileSink: 1
-                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
index a6e6e38..52a847a 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
@@ -405,21 +405,21 @@ STAGE PLANS:
                           serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                       Filter Operator
-                        predicate: (_col0) IN ('a', 'b') (type: boolean)
-                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                        predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                           table:
                               input format: org.apache.hadoop.mapred.TextInputFormat
                               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       Filter Operator
-                        predicate: (_col0) IN ('c', 'd') (type: boolean)
-                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                        predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                           table:
                               input format: org.apache.hadoop.mapred.TextInputFormat
                               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index 54003c3..c2250e6 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -45,7 +45,7 @@ STAGE PLANS:
         TableScan
           alias: alltypesorc
           Filter Operator
-            predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
+            predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
             Select Operator
               expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
               outputColumnNames: _col0, _col1, _col2

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index e8a9786..9756b0c 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -2909,7 +2909,7 @@ Stage-0
       Select Operator [SEL_2]
          outputColumnNames:["_col0"]
          Filter Operator [FIL_4]
-            predicate:(c_int) IN (-6, 6) (type: boolean)
+            predicate:((c_int = -6) or (c_int = 6)) (type: boolean)
             TableScan [TS_0]
                alias:cbo_t1
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
index 54003c3..c2250e6 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
@@ -45,7 +45,7 @@ STAGE PLANS:
         TableScan
           alias: alltypesorc
           Filter Operator
-            predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
+            predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
             Select Operator
               expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
               outputColumnNames: _col0, _col1, _col2

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
index 9e47014..73bf12d 100644
--- a/ql/src/test/results/clientpositive/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
@@ -46,19 +46,20 @@ STAGE PLANS:
             alias: alltypesorc
             Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
-              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator


[2/2] hive git commit: HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)

Posted by jc...@apache.org.
HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b247cac4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b247cac4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b247cac4

Branch: refs/heads/master
Commit: b247cac4fc3814e422d4f5d5aad96a1c6e385a7b
Parents: 037fb02
Author: Gopal V <go...@apache.org>
Authored: Thu Aug 27 09:50:08 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Aug 27 09:50:08 2015 +0200

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    4 +
 .../hadoop/hive/ql/optimizer/Optimizer.java     |   17 +-
 .../hive/ql/optimizer/PointLookupOptimizer.java |  102 +-
 .../queries/clientpositive/flatten_and_or.q     |    4 +-
 .../test/queries/clientpositive/pointlookup.q   |   59 +
 .../test/queries/clientpositive/pointlookup2.q  |   51 +
 .../alter_partition_coltype.q.out               |   12 +-
 .../clientpositive/annotate_stats_filter.q.out  |    8 +-
 .../results/clientpositive/flatten_and_or.q.out |    8 +-
 ql/src/test/results/clientpositive/pcr.q.out    |   12 +-
 .../results/clientpositive/pointlookup.q.out    |  198 +++
 .../results/clientpositive/pointlookup2.q.out   | 1647 ++++++++++++++++++
 .../results/clientpositive/ppd_transform.q.out  |   12 +-
 .../test/results/clientpositive/spark/pcr.q.out |   12 +-
 .../clientpositive/spark/ppd_transform.q.out    |   12 +-
 .../clientpositive/spark/vectorized_case.q.out  |    2 +-
 .../clientpositive/tez/explainuser_1.q.out      |    2 +-
 .../clientpositive/tez/vectorized_case.q.out    |    2 +-
 .../clientpositive/vectorized_case.q.out        |    9 +-
 19 files changed, 2118 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 8706a2d..8a00079 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1190,6 +1190,10 @@ public class HiveConf extends Configuration {
         "Whether to push predicates down into storage handlers.  Ignored when hive.optimize.ppd is false."),
     HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true,
          "Whether to transform OR clauses in Filter operators into IN clauses"),
+    HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31,
+             "Minimum number of OR clauses needed to transform into IN clauses"),
+    HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true,
+                 "Extract partial expressions when optimizing point lookup IN clauses"),
     // Constant propagation optimizer
     HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"),
     HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"),

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index 14f362f..439f616 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -68,6 +68,18 @@ public class Optimizer {
 
     // Add the transformation that computes the lineage information.
     transformations.add(new Generator());
+
+    // Try to transform OR predicates in Filter into simpler IN clauses first
+    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
+      final int min = HiveConf.getIntVar(hiveConf,
+          HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
+      final boolean extract = HiveConf.getBoolVar(hiveConf,
+          HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT);
+      final boolean testMode = HiveConf.getBoolVar(hiveConf,
+          HiveConf.ConfVars.HIVE_IN_TEST);
+      transformations.add(new PointLookupOptimizer(min, extract, testMode));
+    }
+
     if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
       transformations.add(new PredicateTransitivePropagate());
       if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
@@ -82,11 +94,6 @@ public class Optimizer {
         transformations.add(new ConstantPropagate());
     }
 
-    // Try to transform OR predicates in Filter into IN clauses.
-    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
-      transformations.add(new PointLookupOptimizer());
-    }
-
     if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
       transformations.add(new PartitionPruner());
       transformations.add(new PartitionConditionRemover());

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
index 6a8acec..d83636d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
@@ -18,10 +18,14 @@
 package org.apache.hadoop.hive.ql.optimizer;
 
 import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.Stack;
 
 import org.apache.calcite.util.Pair;
@@ -46,15 +50,18 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
 import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ImmutableSortedSet;
 import com.google.common.collect.ListMultimap;
 
 /**
@@ -71,7 +78,49 @@ public class PointLookupOptimizer implements Transform {
           GenericUDFIn.class.getAnnotation(Description.class).name();
   private static final String STRUCT_UDF =
           GenericUDFStruct.class.getAnnotation(Description.class).name();
+  private static final String AND_UDF =
+      GenericUDFOPAnd.class.getAnnotation(Description.class).name();
+
+  // these are closure-bound for all the walkers in context
+  public final int minOrExpr;
+  public final boolean extract;
+  public final boolean testMode;
+
+  /*
+   * Pass in configs and pre-create a parse context
+   */
+  public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) {
+    this.minOrExpr = min;
+    this.extract = extract;
+    this.testMode = testMode;
+  }
+
+  // Hash Set iteration isn't ordered, but force string sorted order
+  // to get a consistent test run.
+  private Collection<ExprNodeDescEqualityWrapper> sortForTests(
+      Set<ExprNodeDescEqualityWrapper> valuesExpr) {
+    if (!testMode) {
+      // normal case - sorting is wasted for an IN()
+      return valuesExpr;
+    }
+    final Collection<ExprNodeDescEqualityWrapper> sortedValues;
+
+    sortedValues = ImmutableSortedSet.copyOf(
+        new Comparator<ExprNodeDescEqualityWrapper>() {
+          @Override
+          public int compare(ExprNodeDescEqualityWrapper w1,
+              ExprNodeDescEqualityWrapper w2) {
+            // fail if you find nulls (this is a test-code section)
+            if (w1.equals(w2)) {
+              return 0;
+            }
+            return w1.getExprNodeDesc().getExprString()
+                .compareTo(w2.getExprNodeDesc().getExprString());
+          }
+        }, valuesExpr);
 
+    return sortedValues;
+  }
 
   @Override
   public ParseContext transform(ParseContext pctx) throws SemanticException {
@@ -103,7 +152,9 @@ public class PointLookupOptimizer implements Transform {
         if (LOG.isDebugEnabled()) {
           LOG.debug("Generated new predicate with IN clause: " + newPredicate);
         }
-        filterOp.getConf().setOrigPredicate(predicate);
+        if (!extract) {
+          filterOp.getConf().setOrigPredicate(predicate);
+        }
         filterOp.getConf().setPredicate(newPredicate);
       }
 
@@ -140,8 +191,11 @@ public class PointLookupOptimizer implements Transform {
         return null;
       }
 
-      // 2. It is an OR operator
+      // 2. It is an OR operator with enough children
       List<ExprNodeDesc> children = fd.getChildren();
+      if (children.size() < minOrExpr) {
+        return null;
+      }
       ListMultimap<String,Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> columnConstantsMap =
               ArrayListMultimap.create();
       boolean modeAnd = false;
@@ -272,6 +326,50 @@ public class PointLookupOptimizer implements Transform {
       newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
               FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren);
 
+      if (extract && columns.size() > 1) {
+        final List<ExprNodeDesc> subExpr = new ArrayList<ExprNodeDesc>(columns.size()+1);
+
+        // extract pre-conditions for the tuple expressions
+        // (a,b) IN ((1,2),(2,3)) ->
+        //          ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3))
+
+        for (String keyString : columnConstantsMap.keySet()) {
+          final Set<ExprNodeDescEqualityWrapper> valuesExpr = 
+              new HashSet<ExprNodeDescEqualityWrapper>(children.size());
+          final List<Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> partial = 
+              columnConstantsMap.get(keyString);
+          for (int i = 0; i < children.size(); i++) {
+            Pair<ExprNodeColumnDesc, ExprNodeConstantDesc> columnConstant = partial
+                .get(i);
+            valuesExpr
+                .add(new ExprNodeDescEqualityWrapper(columnConstant.right));
+          }
+          ExprNodeColumnDesc lookupCol = partial.get(0).left;
+          // generate a partial IN clause, if the column is a partition column
+          if (lookupCol.getIsPartitionColOrVirtualCol()
+              || valuesExpr.size() < children.size()) {
+            // optimize only nDV reductions
+            final List<ExprNodeDesc> inExpr = new ArrayList<ExprNodeDesc>();
+            inExpr.add(lookupCol);
+            for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) {
+              inExpr.add(value.getExprNodeDesc());
+            }
+            subExpr.add(new ExprNodeGenericFuncDesc(
+                TypeInfoFactory.booleanTypeInfo, FunctionRegistry
+                    .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr));
+          }
+        }
+        // loop complete, inspect the sub expressions generated
+        if (subExpr.size() > 0) {
+          // add the newPredicate to the end & produce an AND clause
+          subExpr.add(newPredicate);
+          newPredicate = new ExprNodeGenericFuncDesc(
+              TypeInfoFactory.booleanTypeInfo, FunctionRegistry
+                  .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr);
+        }
+        // else, newPredicate is unmodified
+      }
+
       return newPredicate;
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/flatten_and_or.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/flatten_and_or.q b/ql/src/test/queries/clientpositive/flatten_and_or.q
index 6d65225..6c6e0f9 100644
--- a/ql/src/test/queries/clientpositive/flatten_and_or.q
+++ b/ql/src/test/queries/clientpositive/flatten_and_or.q
@@ -1,3 +1,5 @@
+set hive.optimize.point.lookup=false;
+
 explain
 SELECT key
 FROM src
@@ -14,4 +16,4 @@ WHERE
    AND value = '1') OR (key = '9'
    AND value = '1') OR (key = '10'
    AND value = '3'))
-;
\ No newline at end of file
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q
new file mode 100644
index 0000000..1aef2ef
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/pointlookup.q
@@ -0,0 +1,59 @@
+explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+;
+
+
+set hive.optimize.point.lookup.min=3;
+set hive.optimize.point.lookup.extract=false;
+
+explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+;
+
+set hive.optimize.point.lookup.extract=true;
+
+explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q
new file mode 100644
index 0000000..31bebbb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/pointlookup2.q
@@ -0,0 +1,51 @@
+drop table pcr_t1;
+drop table pcr_t2;
+drop table pcr_t3;
+
+create table pcr_t1 (key int, value string) partitioned by (ds string);
+insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key;
+insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key;
+insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key;
+
+create table pcr_t2 (ds string, key int, value string);
+from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08';
+from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2;
+
+set hive.optimize.point.lookup.min=2;
+set hive.optimize.point.lookup.extract=true;
+
+explain extended
+select key, value, ds
+from pcr_t1
+where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
+order by key, value, ds;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
+order by t1.key;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
+order by t1.key;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
+order by t1.key, t1.value, t2.ds;
+
+drop table pcr_t1;
+drop table pcr_t2;
+drop table pcr_t3;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
index 06515da..9fc3c8d 100644
--- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
@@ -1134,15 +1134,11 @@ STAGE PLANS:
           alias: alterdynamic_part_table
           Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
           GatherStats: false
-          Filter Operator
-            isSamplingPred: false
-            predicate: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean)
+          Select Operator
+            expressions: intcol (type: string)
+            outputColumnNames: _col0
             Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: intcol (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
-              ListSink
+            ListSink
 
 PREHOOK: query: select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__')
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
index 9e0e78a..054b573 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
@@ -678,15 +678,15 @@ STAGE PLANS:
             alias: loc_orc
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: (state) IN ('OH', 'CA') (type: boolean)
-              Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+              predicate: ((state = 'OH') or (state = 'CA')) (type: boolean)
+              Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/flatten_and_or.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out b/ql/src/test/results/clientpositive/flatten_and_or.q.out
index 5f25daa..9c51ff3 100644
--- a/ql/src/test/results/clientpositive/flatten_and_or.q.out
+++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out
@@ -44,15 +44,15 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out
index 4c9ea77..d7c40a3 100644
--- a/ql/src/test/results/clientpositive/pcr.q.out
+++ b/ql/src/test/results/clientpositive/pcr.q.out
@@ -2475,16 +2475,16 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
-              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
+              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string), ds (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                   sort order: +++
-                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                   tag: -1
                   auto parallelism: false
       Path -> Alias:
@@ -2588,13 +2588,13 @@ STAGE PLANS:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out
new file mode 100644
index 0000000..7e19be4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/pointlookup.q.out
@@ -0,0 +1,198 @@
+PREHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean)
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+