You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by xu...@apache.org on 2015/09/09 09:08:31 UTC

[01/50] [abbrv] hive git commit: HIVE-11366: Avoid right leaning tree hashCode depth in ExprNodeDescEqualityWrapper hashmaps (Gopal V, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/beeline-cli b90a2ae5d -> fc53e5d7e


HIVE-11366: Avoid right leaning tree hashCode depth in ExprNodeDescEqualityWrapper hashmaps (Gopal V, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3e63fc42
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3e63fc42
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3e63fc42

Branch: refs/heads/beeline-cli
Commit: 3e63fc42054e64455d90ca74cb6ebe30ea0d40ae
Parents: 284859c
Author: Gopal V <go...@apache.org>
Authored: Tue Aug 25 14:14:08 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Tue Aug 25 14:14:08 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3e63fc42/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
index 0fe9eda..15267b9 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
@@ -98,21 +98,22 @@ public abstract class ExprNodeDesc implements Serializable, Node {
 
   // This wraps an instance of an ExprNodeDesc, and makes equals work like isSame, see comment on
   // isSame
-  public static class ExprNodeDescEqualityWrapper {
-    private ExprNodeDesc exprNodeDesc;
+  public final static class ExprNodeDescEqualityWrapper {
+    private final ExprNodeDesc exprNodeDesc;
+    // beware of any implementation whose hashcode is mutable by reference
+    // inserting into a Map and then changing the hashcode can make it 
+    // disappear out of the Map during lookups
+    private final int hashcode;
 
     public ExprNodeDescEqualityWrapper(ExprNodeDesc exprNodeDesc) {
       this.exprNodeDesc = exprNodeDesc;
+      this.hashcode = exprNodeDesc == null ? 0 : exprNodeDesc.hashCode();
     }
 
     public ExprNodeDesc getExprNodeDesc() {
       return exprNodeDesc;
     }
 
-    public void setExprNodeDesc(ExprNodeDesc exprNodeDesc) {
-      this.exprNodeDesc = exprNodeDesc;
-    }
-
     @Override
     public boolean equals(Object other) {
 
@@ -125,7 +126,7 @@ public abstract class ExprNodeDesc implements Serializable, Node {
 
     @Override
     public int hashCode() {
-      return exprNodeDesc == null ? 0 : exprNodeDesc.hashCode();
+      return hashcode;
     }
 
     /* helper function to allow Set()/Collection() operations with ExprNodeDesc */

[50/50] [abbrv] hive git commit: HIVE-11769: Merge master to beeline-cli branch 09/09/2015

Posted by xu...@apache.org.

HIVE-11769: Merge master to beeline-cli branch 09/09/2015


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fc53e5d7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fc53e5d7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fc53e5d7

Branch: refs/heads/beeline-cli
Commit: fc53e5d7edfed6dbdaeb43fb6b541ff36fb9a47b
Parents: b90a2ae d51c62a
Author: Ferdinand Xu <ch...@intel.com>
Authored: Wed Sep 9 03:02:24 2015 -0400
Committer: Ferdinand Xu <ch...@intel.com>
Committed: Wed Sep 9 03:02:24 2015 -0400

----------------------------------------------------------------------
 accumulo-handler/pom.xml                        |     4 -
 .../apache/hadoop/hive/ant/GenVectorCode.java   |   105 +
 .../java/org/apache/hive/beeline/BeeLine.java   |    13 +-
 .../org/apache/hive/beeline/BeeLineOpts.java    |    21 +-
 .../apache/hive/beeline/DatabaseConnection.java |     9 +
 .../org/apache/hive/beeline/HiveSchemaTool.java |    14 +-
 beeline/src/main/resources/BeeLine.properties   |     2 +
 .../src/main/resources/beeline-log4j.properties |    24 -
 beeline/src/main/resources/beeline-log4j2.xml   |    40 +
 bin/ext/beeline.sh                              |     2 +-
 bin/hive                                        |     3 +
 .../hadoop/hive/cli/TestOptionsProcessor.java   |     1 -
 common/pom.xml                                  |    27 +-
 .../apache/hadoop/hive/common/JavaUtils.java    |    11 +-
 .../org/apache/hadoop/hive/common/LogUtils.java |    18 +-
 .../hadoop/hive/common/ValidReadTxnList.java    |     2 +-
 .../hadoop/hive/common/type/HiveDecimal.java    |   306 -
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   112 +-
 common/src/main/resources/hive-log4j.properties |    88 -
 common/src/main/resources/hive-log4j2.xml       |   111 +
 .../hadoop/hive/conf/TestHiveLogging.java       |     8 +-
 .../resources/hive-exec-log4j-test.properties   |    59 -
 .../test/resources/hive-exec-log4j2-test.xml    |    86 +
 .../test/resources/hive-log4j-test.properties   |    71 -
 common/src/test/resources/hive-log4j2-test.xml  |    95 +
 data/conf/hive-log4j-old.properties             |    82 -
 data/conf/hive-log4j.properties                 |    97 -
 data/conf/hive-log4j2.xml                       |   144 +
 data/conf/hive-site.xml                         |     6 -
 data/conf/spark/log4j.properties                |    24 -
 data/conf/spark/log4j2.xml                      |    74 +
 data/conf/tez/hive-site.xml                     |     9 +
 docs/xdocs/language_manual/cli.xml              |     2 +-
 errata.txt                                      |    10 +
 .../hadoop/hive/hbase/ColumnMappings.java       |     5 +
 .../apache/hadoop/hive/hbase/HBaseSerDe.java    |    19 +-
 .../hadoop/hive/hbase/HBaseSerDeParameters.java |     8 +-
 .../hadoop/hive/hbase/HBaseStorageHandler.java  |    13 +-
 .../hive/hbase/HiveHBaseInputFormatUtil.java    |    50 +-
 .../hadoop/hive/hbase/LazyHBaseCellMap.java     |    19 +-
 .../apache/hadoop/hive/hbase/LazyHBaseRow.java  |     5 +-
 .../hadoop/hive/hbase/LazyHBaseCellMapTest.java |    72 +
 .../positive/hbase_binary_map_queries_prefix.q  |    15 +-
 .../queries/positive/hbase_null_first_col.q     |    22 +
 .../hbase_binary_map_queries_prefix.q.out       |    40 +
 .../results/positive/hbase_null_first_col.q.out |   109 +
 .../test/results/positive/hbase_timestamp.q.out |     8 +-
 hcatalog/bin/hcat_server.sh                     |     2 +-
 hcatalog/bin/templeton.cmd                      |     4 +-
 .../mapreduce/DefaultOutputFormatContainer.java |     7 +-
 ...namicPartitionFileRecordWriterContainer.java |     3 +-
 .../mapreduce/FileOutputFormatContainer.java    |     3 +-
 .../hive/hcatalog/mapreduce/PartInfo.java       |    32 +-
 .../hive/hcatalog/mapreduce/SpecialCases.java   |     8 +-
 .../mapreduce/TestHCatMultiOutputFormat.java    |     6 +-
 hcatalog/scripts/hcat_server_start.sh           |     2 +-
 .../content/xdocs/configuration.xml             |     2 +-
 .../src/documentation/content/xdocs/install.xml |     2 +-
 .../deployers/config/hive/hive-log4j.properties |    88 -
 .../deployers/config/hive/hive-log4j2.xml       |   111 +
 .../deployers/config/hive/hive-site.mysql.xml   |    22 +
 .../templeton/deployers/start_hive_services.sh  |     2 +-
 .../hive/hcatalog/streaming/mutate/package.html |    31 +-
 .../mutate/worker/CreatePartitionHelper.java    |    83 -
 .../mutate/worker/MetaStorePartitionHelper.java |   102 +
 .../mutate/worker/MutatorCoordinator.java       |    21 +-
 .../worker/MutatorCoordinatorBuilder.java       |    41 +-
 .../mutate/worker/PartitionHelper.java          |    17 +
 .../mutate/worker/WarehousePartitionHelper.java |    69 +
 .../hive/hcatalog/streaming/TestStreaming.java  |    54 +-
 .../worker/TestMetaStorePartitionHelper.java    |   112 +
 .../mutate/worker/TestMutatorCoordinator.java   |    40 +-
 .../worker/TestWarehousePartitionHelper.java    |    57 +
 .../webhcat/svr/src/main/bin/webhcat_server.sh  |     4 +-
 .../src/main/config/webhcat-log4j.properties    |    45 -
 .../svr/src/main/config/webhcat-log4j2.xml      |    75 +
 .../antlr4/org/apache/hive/hplsql/Hplsql.g4     |   275 +-
 .../java/org/apache/hive/hplsql/Column.java     |    65 +
 .../main/java/org/apache/hive/hplsql/Conn.java  |    16 +-
 .../java/org/apache/hive/hplsql/Converter.java  |    41 +-
 .../main/java/org/apache/hive/hplsql/Exec.java  |   300 +-
 .../java/org/apache/hive/hplsql/Expression.java |    79 +-
 .../main/java/org/apache/hive/hplsql/Meta.java  |   216 +
 .../main/java/org/apache/hive/hplsql/Query.java |    71 +
 .../main/java/org/apache/hive/hplsql/Row.java   |    97 +
 .../java/org/apache/hive/hplsql/Select.java     |    63 +-
 .../java/org/apache/hive/hplsql/Signal.java     |     2 +-
 .../main/java/org/apache/hive/hplsql/Stmt.java  |   235 +-
 .../main/java/org/apache/hive/hplsql/Utils.java |     7 +
 .../main/java/org/apache/hive/hplsql/Var.java   |    85 +-
 .../apache/hive/hplsql/functions/Function.java  |    74 +-
 .../hive/hplsql/functions/FunctionDatetime.java |    14 +-
 .../hive/hplsql/functions/FunctionMisc.java     |    22 +-
 .../hive/hplsql/functions/FunctionOra.java      |    31 +-
 .../hive/hplsql/functions/FunctionString.java   |    46 +-
 hplsql/src/main/resources/hplsql-site.xml       |    95 +
 .../org/apache/hive/hplsql/TestHplsqlLocal.java |    34 +-
 .../apache/hive/hplsql/TestHplsqlOffline.java   |    76 +
 .../test/queries/db/create_procedure_mssql.sql  |    52 +
 .../db/create_procedure_return_cursor.sql       |    53 +
 .../db/create_procedure_return_cursor2.sql      |    59 +
 .../src/test/queries/db/cursor_attributes.sql   |    60 +
 hplsql/src/test/queries/db/map_object.sql       |     9 +
 .../src/test/queries/db/rowtype_attribute.sql   |    22 +
 hplsql/src/test/queries/db/select_into.sql      |    17 +
 .../src/test/queries/db/set_current_schema.sql  |     6 +
 hplsql/src/test/queries/db/sys_refcursor.sql    |    65 +
 hplsql/src/test/queries/db/type_attribute.sql   |     8 +
 hplsql/src/test/queries/db/use.sql              |     2 +
 .../local/create_procedure_no_params.sql        |    19 +
 hplsql/src/test/queries/local/exception2.sql    |    10 -
 hplsql/src/test/queries/local/exception3.sql    |     5 -
 hplsql/src/test/queries/local/exception4.sql    |     7 -
 hplsql/src/test/queries/local/exception5.sql    |    10 -
 .../queries/local/exception_divide_by_zero.sql  |    11 +
 .../test/queries/offline/create_table_mssql.sql |    43 +
 .../test/queries/offline/create_table_ora.sql   |    53 +
 .../results/db/create_procedure_mssql.out.txt   |    45 +
 .../db/create_procedure_return_cursor.out.txt   |   135 +
 .../db/create_procedure_return_cursor2.out.txt  |   139 +
 .../test/results/db/cursor_attributes.out.txt   |    33 +
 hplsql/src/test/results/db/map_object.out.txt   |    17 +
 .../test/results/db/rowtype_attribute.out.txt   |    42 +
 hplsql/src/test/results/db/select_into.out.txt  |    19 +
 .../test/results/db/set_current_schema.out.txt  |    12 +
 .../src/test/results/db/sys_refcursor.out.txt   |    36 +
 .../src/test/results/db/type_attribute.out.txt  |    15 +
 hplsql/src/test/results/db/use.out.txt          |     4 +
 .../test/results/local/create_function.out.txt  |     4 +-
 .../local/create_procedure_no_params.out.txt    |    26 +
 hplsql/src/test/results/local/declare.out.txt   |     4 +-
 .../local/exception_divide_by_zero.out.txt      |     8 +
 .../results/offline/create_table_mssql.out.txt  |    24 +
 .../results/offline/create_table_ora.out.txt    |    42 +
 .../vectorization/VectorizationBench.java       |    93 +
 .../hive/metastore/TestHiveMetaStore.java       |   153 +-
 .../org/apache/hive/jdbc/TestJdbcDriver2.java   |    16 +
 .../TestOperationLoggingAPIWithMr.java          |     2 -
 .../TestOperationLoggingAPIWithTez.java         |     2 -
 .../operation/TestOperationLoggingLayout.java   |   134 +
 itests/pom.xml                                  |     2 +-
 itests/qtest-spark/pom.xml                      |    24 +
 itests/qtest/pom.xml                            |    28 +-
 .../test/resources/testconfiguration.properties |    54 +-
 .../org/apache/hadoop/hive/ql/QTestUtil.java    |    62 +-
 jdbc/pom.xml                                    |     1 +
 .../org/apache/hive/jdbc/HiveConnection.java    |    34 +-
 .../apache/hive/jdbc/HivePreparedStatement.java |     2 +-
 .../org/apache/hive/jdbc/HiveStatement.java     |     2 +-
 jdbc/src/java/org/apache/hive/jdbc/Utils.java   |   113 +-
 .../hive/jdbc/ZooKeeperHiveClientHelper.java    |   104 +-
 metastore/if/hive_metastore.thrift              |     5 +
 .../gen/thrift/gen-cpp/ThriftHiveMetastore.cpp  |  5586 ++-
 .../gen/thrift/gen-cpp/ThriftHiveMetastore.h    |  7942 ++--
 .../ThriftHiveMetastore_server.skeleton.cpp     |     5 +
 .../thrift/gen-cpp/hive_metastore_constants.cpp |     2 +-
 .../thrift/gen-cpp/hive_metastore_constants.h   |     2 +-
 .../gen/thrift/gen-cpp/hive_metastore_types.cpp |  6204 ++-
 .../gen/thrift/gen-cpp/hive_metastore_types.h   |  2666 +-
 .../hive/metastore/api/AbortTxnRequest.java     |    24 +-
 .../metastore/api/AddDynamicPartitions.java     |    69 +-
 .../metastore/api/AddPartitionsRequest.java     |    80 +-
 .../hive/metastore/api/AddPartitionsResult.java |    48 +-
 .../hadoop/hive/metastore/api/AggrStats.java    |    54 +-
 .../metastore/api/AlreadyExistsException.java   |    24 +-
 .../metastore/api/BinaryColumnStatsData.java    |    40 +-
 .../metastore/api/BooleanColumnStatsData.java   |    40 +-
 .../hive/metastore/api/CheckLockRequest.java    |    24 +-
 .../hive/metastore/api/ColumnStatistics.java    |    54 +-
 .../metastore/api/ColumnStatisticsData.java     |    20 +-
 .../metastore/api/ColumnStatisticsDesc.java     |    58 +-
 .../hive/metastore/api/ColumnStatisticsObj.java |    40 +-
 .../hive/metastore/api/CommitTxnRequest.java    |    24 +-
 .../hive/metastore/api/CompactionRequest.java   |    62 +-
 .../hive/metastore/api/CompactionType.java      |     2 +-
 .../api/ConfigValSecurityException.java         |    24 +-
 .../api/CurrentNotificationEventId.java         |    24 +-
 .../hadoop/hive/metastore/api/Database.java     |   115 +-
 .../apache/hadoop/hive/metastore/api/Date.java  |    24 +-
 .../hive/metastore/api/DateColumnStatsData.java |    50 +-
 .../hadoop/hive/metastore/api/Decimal.java      |    41 +-
 .../metastore/api/DecimalColumnStatsData.java   |    50 +-
 .../metastore/api/DoubleColumnStatsData.java    |    50 +-
 .../hive/metastore/api/DropPartitionsExpr.java  |    43 +-
 .../metastore/api/DropPartitionsRequest.java    |    82 +-
 .../metastore/api/DropPartitionsResult.java     |    48 +-
 .../hive/metastore/api/EnvironmentContext.java  |    61 +-
 .../hive/metastore/api/EventRequestType.java    |     2 +-
 .../hadoop/hive/metastore/api/FieldSchema.java  |    58 +-
 .../hive/metastore/api/FireEventRequest.java    |    79 +-
 .../metastore/api/FireEventRequestData.java     |    20 +-
 .../hive/metastore/api/FireEventResponse.java   |    16 +-
 .../hadoop/hive/metastore/api/Function.java     |   110 +-
 .../hadoop/hive/metastore/api/FunctionType.java |     2 +-
 .../metastore/api/GetAllFunctionsResponse.java  |   447 +
 .../metastore/api/GetOpenTxnsInfoResponse.java  |    54 +-
 .../hive/metastore/api/GetOpenTxnsResponse.java |    53 +-
 .../api/GetPrincipalsInRoleRequest.java         |    24 +-
 .../api/GetPrincipalsInRoleResponse.java        |    46 +-
 .../api/GetRoleGrantsForPrincipalRequest.java   |    36 +-
 .../api/GetRoleGrantsForPrincipalResponse.java  |    46 +-
 .../api/GrantRevokePrivilegeRequest.java        |    46 +-
 .../api/GrantRevokePrivilegeResponse.java       |    26 +-
 .../metastore/api/GrantRevokeRoleRequest.java   |    86 +-
 .../metastore/api/GrantRevokeRoleResponse.java  |    26 +-
 .../hive/metastore/api/GrantRevokeType.java     |     2 +-
 .../hive/metastore/api/HeartbeatRequest.java    |    34 +-
 .../metastore/api/HeartbeatTxnRangeRequest.java |    32 +-
 .../api/HeartbeatTxnRangeResponse.java          |    74 +-
 .../hive/metastore/api/HiveObjectPrivilege.java |    52 +-
 .../hive/metastore/api/HiveObjectRef.java       |    81 +-
 .../hive/metastore/api/HiveObjectType.java      |     2 +-
 .../apache/hadoop/hive/metastore/api/Index.java |   133 +-
 .../api/IndexAlreadyExistsException.java        |    24 +-
 .../metastore/api/InsertEventRequestData.java   |    45 +-
 .../metastore/api/InvalidInputException.java    |    24 +-
 .../metastore/api/InvalidObjectException.java   |    24 +-
 .../api/InvalidOperationException.java          |    24 +-
 .../api/InvalidPartitionException.java          |    24 +-
 .../hive/metastore/api/LockComponent.java       |    66 +-
 .../hadoop/hive/metastore/api/LockLevel.java    |     2 +-
 .../hadoop/hive/metastore/api/LockRequest.java  |    72 +-
 .../hadoop/hive/metastore/api/LockResponse.java |    36 +-
 .../hadoop/hive/metastore/api/LockState.java    |     2 +-
 .../hadoop/hive/metastore/api/LockType.java     |     2 +-
 .../hive/metastore/api/LongColumnStatsData.java |    50 +-
 .../hive/metastore/api/MetaException.java       |    24 +-
 .../hive/metastore/api/NoSuchLockException.java |    24 +-
 .../metastore/api/NoSuchObjectException.java    |    24 +-
 .../hive/metastore/api/NoSuchTxnException.java  |    24 +-
 .../hive/metastore/api/NotificationEvent.java   |    66 +-
 .../metastore/api/NotificationEventRequest.java |    34 +-
 .../api/NotificationEventResponse.java          |    46 +-
 .../hive/metastore/api/OpenTxnRequest.java      |    40 +-
 .../hive/metastore/api/OpenTxnsResponse.java    |    45 +-
 .../apache/hadoop/hive/metastore/api/Order.java |    32 +-
 .../hadoop/hive/metastore/api/Partition.java    |   156 +-
 .../hive/metastore/api/PartitionEventType.java  |     2 +-
 .../api/PartitionListComposingSpec.java         |    46 +-
 .../hive/metastore/api/PartitionSpec.java       |    58 +-
 .../api/PartitionSpecWithSharedSD.java          |    54 +-
 .../hive/metastore/api/PartitionWithoutSD.java  |   124 +-
 .../metastore/api/PartitionsByExprRequest.java  |    67 +-
 .../metastore/api/PartitionsByExprResult.java   |    54 +-
 .../metastore/api/PartitionsStatsRequest.java   |    90 +-
 .../metastore/api/PartitionsStatsResult.java    |    72 +-
 .../metastore/api/PrincipalPrivilegeSet.java    |   184 +-
 .../hive/metastore/api/PrincipalType.java       |     2 +-
 .../hadoop/hive/metastore/api/PrivilegeBag.java |    46 +-
 .../hive/metastore/api/PrivilegeGrantInfo.java  |    60 +-
 .../hive/metastore/api/RequestPartsSpec.java    |    56 +-
 .../hadoop/hive/metastore/api/ResourceType.java |     2 +-
 .../hadoop/hive/metastore/api/ResourceUri.java  |    36 +-
 .../apache/hadoop/hive/metastore/api/Role.java  |    40 +-
 .../hive/metastore/api/RolePrincipalGrant.java  |    80 +-
 .../hadoop/hive/metastore/api/Schema.java       |    91 +-
 .../hadoop/hive/metastore/api/SerDeInfo.java    |    93 +-
 .../api/SetPartitionsStatsRequest.java          |    46 +-
 .../hive/metastore/api/ShowCompactRequest.java  |    16 +-
 .../hive/metastore/api/ShowCompactResponse.java |    46 +-
 .../api/ShowCompactResponseElement.java         |    86 +-
 .../hive/metastore/api/ShowLocksRequest.java    |    16 +-
 .../hive/metastore/api/ShowLocksResponse.java   |    46 +-
 .../metastore/api/ShowLocksResponseElement.java |   114 +-
 .../hadoop/hive/metastore/api/SkewedInfo.java   |   147 +-
 .../hive/metastore/api/StorageDescriptor.java   |   242 +-
 .../metastore/api/StringColumnStatsData.java    |    48 +-
 .../apache/hadoop/hive/metastore/api/Table.java |   189 +-
 .../hive/metastore/api/TableStatsRequest.java   |    61 +-
 .../hive/metastore/api/TableStatsResult.java    |    46 +-
 .../hive/metastore/api/ThriftHiveMetastore.java | 33417 +++++++++++------
 .../hive/metastore/api/TxnAbortedException.java |    24 +-
 .../hadoop/hive/metastore/api/TxnInfo.java      |    52 +-
 .../hive/metastore/api/TxnOpenException.java    |    24 +-
 .../hadoop/hive/metastore/api/TxnState.java     |     2 +-
 .../apache/hadoop/hive/metastore/api/Type.java  |    72 +-
 .../hive/metastore/api/UnknownDBException.java  |    24 +-
 .../api/UnknownPartitionException.java          |    24 +-
 .../metastore/api/UnknownTableException.java    |    24 +-
 .../hive/metastore/api/UnlockRequest.java       |    24 +-
 .../hadoop/hive/metastore/api/Version.java      |    32 +-
 .../metastore/api/hive_metastoreConstants.java  |     7 +-
 .../gen-php/metastore/ThriftHiveMetastore.php   |  4593 ++-
 .../src/gen/thrift/gen-php/metastore/Types.php  |  1184 +-
 .../hive_metastore/ThriftHiveMetastore-remote   |   616 +-
 .../hive_metastore/ThriftHiveMetastore.py       |  4601 ++-
 .../thrift/gen-py/hive_metastore/constants.py   |     2 +-
 .../gen/thrift/gen-py/hive_metastore/ttypes.py  |   856 +-
 .../thrift/gen-rb/hive_metastore_constants.rb   |     2 +-
 .../gen/thrift/gen-rb/hive_metastore_types.rb   |    18 +-
 .../gen/thrift/gen-rb/thrift_hive_metastore.rb  |    62 +-
 .../hadoop/hive/metastore/HiveAlterHandler.java |     2 +-
 .../hadoop/hive/metastore/HiveMetaStore.java    |   106 +-
 .../hive/metastore/HiveMetaStoreClient.java     |     7 +
 .../hive/metastore/HouseKeeperService.java      |    39 +
 .../hadoop/hive/metastore/IMetaStoreClient.java |     8 +-
 .../hive/metastore/MetaStoreDirectSql.java      |    69 +-
 .../hadoop/hive/metastore/MetaStoreUtils.java   |    17 +-
 .../hadoop/hive/metastore/ObjectStore.java      |    35 +-
 .../apache/hadoop/hive/metastore/RawStore.java  |     7 +
 .../hive/metastore/RetryingMetaStoreClient.java |    32 +-
 .../hadoop/hive/metastore/txn/TxnHandler.java   |   185 +-
 .../metastore/txn/ValidCompactorTxnList.java    |     2 +-
 .../DummyRawStoreControlledCommit.java          |     7 +
 .../DummyRawStoreForJdoConnection.java          |     6 +
 .../metastore/txn/TestCompactionTxnHandler.java |    40 +-
 .../hive/metastore/txn/TestTxnHandler.java      |    73 +-
 packaging/src/main/assembly/bin.xml             |    17 +-
 pom.xml                                         |    54 +-
 ql/if/queryplan.thrift                          |     1 +
 ql/pom.xml                                      |    24 +-
 .../gen/thrift/gen-cpp/queryplan_constants.cpp  |     2 +-
 ql/src/gen/thrift/gen-cpp/queryplan_constants.h |     2 +-
 ql/src/gen/thrift/gen-cpp/queryplan_types.cpp   |   796 +-
 ql/src/gen/thrift/gen-cpp/queryplan_types.h     |   294 +-
 .../hadoop/hive/ql/plan/api/Adjacency.java      |    65 +-
 .../hadoop/hive/ql/plan/api/AdjacencyType.java  |     2 +-
 .../apache/hadoop/hive/ql/plan/api/Graph.java   |    87 +-
 .../hadoop/hive/ql/plan/api/NodeType.java       |     2 +-
 .../hadoop/hive/ql/plan/api/Operator.java       |   142 +-
 .../hadoop/hive/ql/plan/api/OperatorType.java   |     7 +-
 .../apache/hadoop/hive/ql/plan/api/Query.java   |   176 +-
 .../hadoop/hive/ql/plan/api/QueryPlan.java      |    62 +-
 .../apache/hadoop/hive/ql/plan/api/Stage.java   |   172 +-
 .../hadoop/hive/ql/plan/api/StageType.java      |     2 +-
 .../apache/hadoop/hive/ql/plan/api/Task.java    |   182 +-
 .../hadoop/hive/ql/plan/api/TaskType.java       |     2 +-
 ql/src/gen/thrift/gen-php/Types.php             |   119 +-
 ql/src/gen/thrift/gen-py/queryplan/constants.py |     2 +-
 ql/src/gen/thrift/gen-py/queryplan/ttypes.py    |    87 +-
 ql/src/gen/thrift/gen-rb/queryplan_constants.rb |     2 +-
 ql/src/gen/thrift/gen-rb/queryplan_types.rb     |     7 +-
 ...tringGroupColumnCompareStringGroupColumn.txt |   112 +-
 ...gGroupColumnCompareStringGroupScalarBase.txt |    12 +-
 ...gGroupScalarCompareStringGroupColumnBase.txt |    12 +-
 ...tringGroupColumnCompareStringGroupColumn.txt |   112 +-
 ...gGroupColumnCompareStringGroupScalarBase.txt |    12 +-
 ...gGroupScalarCompareStringGroupColumnBase.txt |    12 +-
 .../UDAFTemplates/VectorUDAFMinMaxString.txt    |     3 +-
 .../java/org/apache/hadoop/hive/ql/Context.java |     1 -
 .../java/org/apache/hadoop/hive/ql/Driver.java  |   222 +-
 .../org/apache/hadoop/hive/ql/ErrorMsg.java     |    13 +-
 .../org/apache/hadoop/hive/ql/QueryPlan.java    |    18 +-
 .../apache/hadoop/hive/ql/exec/ExplainTask.java |     7 +-
 .../hadoop/hive/ql/exec/FetchOperator.java      |     5 +-
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |     2 +-
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |    66 +-
 .../hadoop/hive/ql/exec/FunctionTask.java       |     2 +-
 .../hive/ql/exec/HashTableSinkOperator.java     |     6 +-
 .../apache/hadoop/hive/ql/exec/JoinUtil.java    |    87 +-
 .../hadoop/hive/ql/exec/KeyWrapperFactory.java  |    18 +-
 .../hadoop/hive/ql/exec/MapJoinOperator.java    |    61 +-
 .../apache/hadoop/hive/ql/exec/MoveTask.java    |     4 +-
 .../apache/hadoop/hive/ql/exec/Operator.java    |    31 +-
 .../hadoop/hive/ql/exec/OperatorFactory.java    |    11 +
 .../hadoop/hive/ql/exec/ScriptOperator.java     |    85 +-
 .../ql/exec/SparkHashTableSinkOperator.java     |    17 +-
 .../apache/hadoop/hive/ql/exec/Utilities.java   |    38 +-
 .../hadoop/hive/ql/exec/mr/ExecDriver.java      |    30 +-
 .../hive/ql/exec/mr/HadoopJobExecHelper.java    |    20 +-
 .../persistence/HybridHashTableContainer.java   |    33 +
 .../persistence/MapJoinBytesTableContainer.java |     5 +
 .../exec/persistence/MapJoinTableContainer.java |     5 +
 .../persistence/MapJoinTableContainerSerDe.java |    63 +-
 .../hive/ql/exec/spark/HashTableLoader.java     |    26 +-
 .../ql/exec/spark/HiveSparkClientFactory.java   |    10 +-
 .../hive/ql/exec/spark/KryoSerializer.java      |     4 +
 .../ql/exec/spark/RemoteHiveSparkClient.java    |    57 +-
 .../exec/spark/SparkDynamicPartitionPruner.java |   268 +
 .../hadoop/hive/ql/exec/spark/SparkPlan.java    |     3 -
 .../hive/ql/exec/spark/SparkPlanGenerator.java  |    15 +-
 .../ql/exec/spark/SparkReduceRecordHandler.java |     2 -
 .../hadoop/hive/ql/exec/spark/SparkTask.java    |     1 +
 .../hive/ql/exec/spark/SparkUtilities.java      |    56 +
 .../spark/status/impl/LocalSparkJobStatus.java  |     2 +-
 .../spark/status/impl/RemoteSparkJobStatus.java |     2 +-
 .../hive/ql/exec/tez/KeyValuesAdapter.java      |    47 +
 .../hive/ql/exec/tez/KeyValuesFromKeyValue.java |    90 +
 .../ql/exec/tez/KeyValuesFromKeyValues.java     |    48 +
 .../hive/ql/exec/tez/ReduceRecordProcessor.java |    11 +-
 .../hive/ql/exec/tez/ReduceRecordSource.java    |    15 +-
 .../hive/ql/exec/tez/TezSessionState.java       |     2 +
 .../apache/hadoop/hive/ql/exec/tez/TezTask.java |    10 +-
 .../hive/ql/exec/vector/BytesColumnVector.java  |   333 -
 .../hive/ql/exec/vector/ColumnVector.java       |   170 -
 .../ql/exec/vector/DecimalColumnVector.java     |   113 -
 .../hive/ql/exec/vector/DoubleColumnVector.java |   149 -
 .../hive/ql/exec/vector/LongColumnVector.java   |   193 -
 .../ql/exec/vector/VectorHashKeyWrapper.java    |     2 +-
 .../ql/exec/vector/VectorMapJoinOperator.java   |     1 -
 .../ql/exec/vector/VectorSelectOperator.java    |     3 -
 .../VectorSparkHashTableSinkOperator.java       |   104 +
 ...VectorSparkPartitionPruningSinkOperator.java |    99 +
 .../ql/exec/vector/VectorizationContext.java    |    56 +-
 .../hive/ql/exec/vector/VectorizedRowBatch.java |   206 -
 .../BRoundWithNumDigitsDoubleToDouble.java      |    42 +
 .../expressions/CastStringGroupToString.java    |    40 +
 .../ql/exec/vector/expressions/ColAndCol.java   |    34 +-
 .../ql/exec/vector/expressions/ColOrCol.java    |    42 +-
 .../exec/vector/expressions/CuckooSetBytes.java |     2 +-
 .../ql/exec/vector/expressions/DecimalUtil.java |    18 +
 .../vector/expressions/FilterExprAndExpr.java   |     8 +-
 .../vector/expressions/FilterExprOrExpr.java    |   140 +-
 ...FuncBRoundWithNumDigitsDecimalToDecimal.java |    40 +
 .../FuncRoundWithNumDigitsDecimalToDecimal.java |    14 +-
 .../ql/exec/vector/expressions/MathExpr.java    |    22 +
 .../hive/ql/exec/vector/expressions/NotCol.java |    14 +-
 .../ql/exec/vector/expressions/StringExpr.java  |    51 +
 .../mapjoin/VectorMapJoinCommonOperator.java    |     1 +
 ...VectorMapJoinInnerBigOnlyStringOperator.java |     4 +-
 .../VectorMapJoinInnerStringOperator.java       |     4 +-
 .../VectorMapJoinLeftSemiStringOperator.java    |     4 +-
 .../VectorMapJoinOuterStringOperator.java       |     4 +-
 .../fast/VectorMapJoinFastHashTable.java        |     5 +
 .../fast/VectorMapJoinFastTableContainer.java   |     5 +
 .../hashtable/VectorMapJoinHashTable.java       |     4 +
 .../VectorMapJoinOptimizedHashTable.java        |     4 +
 .../hadoop/hive/ql/hooks/LineageInfo.java       |     9 +-
 .../hadoop/hive/ql/hooks/LineageLogger.java     |    48 +-
 .../hive/ql/hooks/PostExecOrcFileDump.java      |   120 +
 .../ql/hooks/PostExecTezSummaryPrinter.java     |    72 +
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java |   183 +-
 .../hive/ql/io/CombineHiveInputFormat.java      |    98 +-
 .../hadoop/hive/ql/io/HiveInputFormat.java      |    46 +-
 .../apache/hadoop/hive/ql/io/orc/FileDump.java  |    52 +-
 .../hadoop/hive/ql/io/orc/MemoryManager.java    |     4 +-
 .../apache/hadoop/hive/ql/io/orc/OrcConf.java   |   191 +
 .../apache/hadoop/hive/ql/io/orc/OrcFile.java   |   161 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |   306 +-
 .../hadoop/hive/ql/io/orc/OrcOutputFormat.java  |    67 +-
 .../hive/ql/io/orc/OrcRawRecordMerger.java      |    20 +-
 .../apache/hadoop/hive/ql/io/orc/OrcSerde.java  |     6 +-
 .../apache/hadoop/hive/ql/io/orc/OrcSplit.java  |    25 +-
 .../apache/hadoop/hive/ql/io/orc/Reader.java    |    47 +-
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       |   260 +-
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |    83 +-
 .../hive/ql/io/orc/TreeReaderFactory.java       |    30 +-
 .../hadoop/hive/ql/io/orc/WriterImpl.java       |    27 +-
 .../hive/ql/io/parquet/LeafFilterFactory.java   |    43 +-
 .../parquet/read/DataWritableReadSupport.java   |    10 +-
 .../read/ParquetFilterPredicateConverter.java   |   145 +
 .../read/ParquetRecordReaderWrapper.java        |   125 +-
 .../ql/io/rcfile/stats/PartialScanTask.java     |    20 +-
 .../hive/ql/io/sarg/ConvertAstToSearchArg.java  |   436 +
 .../hive/ql/io/sarg/SearchArgumentFactory.java  |    56 -
 .../hive/ql/io/sarg/SearchArgumentImpl.java     |  1027 -
 .../hadoop/hive/ql/lib/DefaultGraphWalker.java  |    84 +-
 .../hadoop/hive/ql/lib/ForwardWalker.java       |    35 +-
 .../hadoop/hive/ql/lib/LevelOrderWalker.java    |   153 +
 .../hadoop/hive/ql/lib/PreOrderOnceWalker.java  |    44 +
 .../hadoop/hive/ql/lib/PreOrderWalker.java      |     2 +-
 .../hadoop/hive/ql/lib/RuleExactMatch.java      |    21 +-
 .../apache/hadoop/hive/ql/lib/RuleRegExp.java   |   197 +-
 .../hadoop/hive/ql/lockmgr/DbLockManager.java   |    12 +-
 .../hadoop/hive/ql/lockmgr/DbTxnManager.java    |    71 +-
 .../hadoop/hive/ql/lockmgr/DummyTxnManager.java |     8 +
 .../hadoop/hive/ql/lockmgr/HiveTxnManager.java  |    21 +
 .../hive/ql/lockmgr/HiveTxnManagerImpl.java     |    10 +
 .../hadoop/hive/ql/lockmgr/LockException.java   |     8 +-
 .../hadoop/hive/ql/log/HiveEventCounter.java    |   135 +
 .../apache/hadoop/hive/ql/log/NullAppender.java |    63 +
 .../ql/log/PidDailyRollingFileAppender.java     |    33 -
 .../hive/ql/log/PidFilePatternConverter.java    |    62 +
 .../apache/hadoop/hive/ql/metadata/Hive.java    |   153 +-
 .../hadoop/hive/ql/metadata/HiveException.java  |     3 +
 .../hadoop/hive/ql/metadata/Partition.java      |     2 +-
 .../ql/metadata/SessionHiveMetaStoreClient.java |     2 +-
 .../hadoop/hive/ql/metadata/TableIterable.java  |   104 +
 .../hadoop/hive/ql/optimizer/ColumnPruner.java  |    12 +-
 .../hive/ql/optimizer/ColumnPrunerProcCtx.java  |    95 +-
 .../ql/optimizer/ColumnPrunerProcFactory.java   |    42 +-
 .../hive/ql/optimizer/ConstantPropagate.java    |    12 +-
 .../optimizer/ConstantPropagateProcFactory.java |   311 +-
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |   213 +-
 .../DynamicPartitionPruningOptimization.java    |    44 +-
 .../hive/ql/optimizer/GenMapRedUtils.java       |    20 +-
 .../hive/ql/optimizer/GroupByOptimizer.java     |    58 +-
 .../ql/optimizer/IdentityProjectRemover.java    |    15 +
 .../hadoop/hive/ql/optimizer/IndexUtils.java    |    13 +-
 .../hive/ql/optimizer/MapJoinProcessor.java     |    44 +-
 .../ql/optimizer/OperatorComparatorFactory.java |   552 +
 .../hadoop/hive/ql/optimizer/Optimizer.java     |    15 +-
 .../hive/ql/optimizer/PointLookupOptimizer.java |   378 +
 .../hadoop/hive/ql/optimizer/PrunerUtils.java   |    14 +-
 .../ql/optimizer/ReduceSinkMapJoinProc.java     |    84 +-
 .../hive/ql/optimizer/SimpleFetchOptimizer.java |     5 +-
 .../SparkRemoveDynamicPruningBySize.java        |    73 +
 .../ql/optimizer/calcite/HiveRelOptUtil.java    |    23 -
 .../calcite/reloperators/HiveFilter.java        |     2 +-
 .../calcite/reloperators/HiveSort.java          |    29 +-
 .../rules/HiveJoinProjectTransposeRule.java     |    53 +-
 .../calcite/rules/HiveJoinToMultiJoinRule.java  |    82 +-
 .../calcite/translator/ExprNodeConverter.java   |    60 +-
 .../calcite/translator/HiveOpConverter.java     |   104 +-
 .../calcite/translator/JoinTypeCheckCtx.java    |     2 +-
 .../translator/PlanModifierForASTConv.java      |     2 +-
 .../translator/PlanModifierForReturnPath.java   |     6 +-
 .../calcite/translator/TypeConverter.java       |    13 +-
 .../correlation/AbstractCorrelationProcCtx.java |     7 +
 .../correlation/CorrelationUtilities.java       |    11 +-
 .../correlation/ReduceSinkDeDuplication.java    |     6 +-
 .../ql/optimizer/index/RewriteCanApplyCtx.java  |     8 +-
 .../ql/optimizer/lineage/ExprProcFactory.java   |     9 +-
 .../hive/ql/optimizer/lineage/Generator.java    |     4 +-
 .../hive/ql/optimizer/lineage/LineageCtx.java   |    34 +-
 .../ql/optimizer/lineage/OpProcFactory.java     |    10 +-
 .../annotation/AnnotateWithOpTraits.java        |     6 +-
 .../ql/optimizer/pcr/PcrExprProcFactory.java    |   154 +-
 .../BucketingSortingInferenceOptimizer.java     |     8 +-
 .../physical/GenSparkSkewJoinProcessor.java     |    14 +-
 .../physical/NullScanTaskDispatcher.java        |     6 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |    40 +-
 .../hive/ql/optimizer/ppr/OpProcFactory.java    |     3 +-
 .../hive/ql/optimizer/ppr/PartitionPruner.java  |    69 +-
 .../spark/CombineEquivalentWorkResolver.java    |   292 +
 .../spark/SparkPartitionPruningSinkDesc.java    |   100 +
 .../spark/SparkReduceSinkMapJoinProc.java       |     2 +-
 .../annotation/AnnotateWithStatistics.java      |     6 +-
 .../stats/annotation/StatsRulesProcFactory.java |    49 +-
 .../ql/optimizer/unionproc/UnionProcessor.java  |    10 +-
 .../apache/hadoop/hive/ql/parse/ASTNode.java    |   139 +-
 .../hive/ql/parse/BaseSemanticAnalyzer.java     |    13 +
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |    49 +-
 .../ql/parse/ColumnStatsSemanticAnalyzer.java   |     6 +-
 .../hive/ql/parse/DDLSemanticAnalyzer.java      |    11 +
 .../hive/ql/parse/ExplainSemanticAnalyzer.java  |     2 +-
 .../hadoop/hive/ql/parse/GenMapRedWalker.java   |     2 +-
 .../hadoop/hive/ql/parse/GenTezProcContext.java |    12 +
 .../hadoop/hive/ql/parse/GenTezUtils.java       |    23 +-
 .../apache/hadoop/hive/ql/parse/GenTezWork.java |    81 +-
 .../hadoop/hive/ql/parse/GenTezWorkWalker.java  |     2 +-
 .../org/apache/hadoop/hive/ql/parse/HiveLexer.g |    11 +
 .../apache/hadoop/hive/ql/parse/HiveParser.g    |    70 +
 .../hadoop/hive/ql/parse/IdentifiersParser.g    |    75 +-
 .../hadoop/hive/ql/parse/LeadLagInfo.java       |     4 +-
 .../hive/ql/parse/LoadSemanticAnalyzer.java     |    38 +-
 .../apache/hadoop/hive/ql/parse/ParseUtils.java |    53 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   111 +-
 .../hive/ql/parse/SemanticAnalyzerFactory.java  |    12 +
 .../apache/hadoop/hive/ql/parse/TezWalker.java  |     2 +-
 .../hadoop/hive/ql/parse/TypeCheckCtx.java      |    15 +-
 .../hive/ql/parse/TypeCheckProcFactory.java     |    46 +-
 .../ql/parse/spark/GenSparkProcContext.java     |    14 +-
 .../hive/ql/parse/spark/GenSparkUtils.java      |   111 +-
 .../hive/ql/parse/spark/GenSparkWorkWalker.java |     2 +-
 .../parse/spark/OptimizeSparkProcContext.java   |    16 +-
 .../hive/ql/parse/spark/SparkCompiler.java      |   180 +-
 .../SparkPartitionPruningSinkOperator.java      |   142 +
 .../hive/ql/parse/spark/SplitOpTreeForDPP.java  |   151 +
 .../apache/hadoop/hive/ql/plan/BaseWork.java    |     2 +-
 .../hive/ql/plan/CommonMergeJoinDesc.java       |     4 +
 .../apache/hadoop/hive/ql/plan/ExplainWork.java |    19 +-
 .../hive/ql/plan/ExprNodeConstantDesc.java      |    29 +-
 .../hadoop/hive/ql/plan/ExprNodeDesc.java       |    23 +-
 .../hadoop/hive/ql/plan/ExprNodeDescUtils.java  |   115 +
 .../apache/hadoop/hive/ql/plan/FilterDesc.java  |    14 +-
 .../hadoop/hive/ql/plan/HiveOperation.java      |    32 +-
 .../hadoop/hive/ql/plan/JoinCondDesc.java       |    14 +
 .../apache/hadoop/hive/ql/plan/JoinDesc.java    |     4 +
 .../apache/hadoop/hive/ql/plan/MapJoinDesc.java |    11 +
 .../org/apache/hadoop/hive/ql/plan/MapWork.java |    20 +-
 .../hadoop/hive/ql/plan/PartitionDesc.java      |    39 +-
 .../apache/hadoop/hive/ql/plan/PlanUtils.java   |     9 +-
 .../hadoop/hive/ql/plan/ReduceSinkDesc.java     |     1 +
 .../apache/hadoop/hive/ql/plan/ReduceWork.java  |     2 +-
 .../hive/ql/plan/SparkHashTableSinkDesc.java    |    11 +
 .../hadoop/hive/ql/plan/TableScanDesc.java      |     6 +-
 .../hadoop/hive/ql/ppd/ExprWalkerInfo.java      |   136 +-
 .../hive/ql/ppd/ExprWalkerProcFactory.java      |   107 +-
 .../hadoop/hive/ql/ppd/OpProcFactory.java       |    11 +-
 .../ql/ppd/PredicateTransitivePropagate.java    |     4 +-
 .../hive/ql/ppd/SyntheticJoinPredicate.java     |    18 +-
 .../ql/processors/CommandProcessorResponse.java |    21 +-
 .../hadoop/hive/ql/processors/HiveCommand.java  |     3 +
 .../hadoop/hive/ql/processors/SetProcessor.java |     4 +
 .../authorization/plugin/HiveOperationType.java |     5 +
 .../plugin/sqlstd/Operation2Privilege.java      |    11 +
 .../hadoop/hive/ql/session/SessionState.java    |    44 +-
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |    54 +-
 .../hive/ql/txn/AcidHouseKeeperService.java     |   104 +
 .../hive/ql/txn/compactor/CompactorMR.java      |    19 +-
 .../hadoop/hive/ql/txn/compactor/Initiator.java |    10 +-
 .../hadoop/hive/ql/txn/compactor/Worker.java    |     2 +-
 .../hive/ql/udf/generic/GenericUDAFStd.java     |     2 +
 .../ql/udf/generic/GenericUDAFVariance.java     |     2 +
 .../hadoop/hive/ql/udf/generic/GenericUDF.java  |    14 +-
 .../hive/ql/udf/generic/GenericUDFAesBase.java  |   205 +
 .../ql/udf/generic/GenericUDFAesDecrypt.java    |    50 +
 .../ql/udf/generic/GenericUDFAesEncrypt.java    |    50 +
 .../hive/ql/udf/generic/GenericUDFBRound.java   |    68 +
 .../ql/udf/generic/GenericUDFBaseNumeric.java   |     4 +-
 .../hive/ql/udf/generic/GenericUDFBasePad.java  |     8 +-
 .../hive/ql/udf/generic/GenericUDFBridge.java   |     8 +-
 .../hive/ql/udf/generic/GenericUDFIn.java       |    14 +-
 .../hive/ql/udf/generic/GenericUDFMapKeys.java  |     6 +-
 .../hive/ql/udf/generic/GenericUDFNvl.java      |     2 +-
 .../hive/ql/udf/generic/GenericUDFOPAnd.java    |    63 +-
 .../hive/ql/udf/generic/GenericUDFOPEqual.java  |     4 +
 .../generic/GenericUDFOPEqualOrGreaterThan.java |     4 +
 .../generic/GenericUDFOPEqualOrLessThan.java    |     4 +
 .../ql/udf/generic/GenericUDFOPGreaterThan.java |     4 +
 .../ql/udf/generic/GenericUDFOPLessThan.java    |     4 +
 .../ql/udf/generic/GenericUDFOPNotEqual.java    |     5 +
 .../ql/udf/generic/GenericUDFOPNotNull.java     |     4 +
 .../hive/ql/udf/generic/GenericUDFOPNull.java   |     4 +
 .../hive/ql/udf/generic/GenericUDFOPOr.java     |    63 +-
 .../ql/udf/generic/GenericUDFParamUtils.java    |     8 +-
 .../hive/ql/udf/generic/GenericUDFRound.java    |    41 +-
 .../hive/ql/udf/generic/GenericUDFStruct.java   |    25 +-
 .../hadoop/hive/ql/udf/generic/RoundUtils.java  |    14 +
 .../main/resources/hive-exec-log4j.properties   |    77 -
 ql/src/main/resources/hive-exec-log4j2.xml      |   110 +
 ql/src/main/resources/tez-container-log4j2.xml  |    49 +
 .../apache/hadoop/hive/ql/TestTxnCommands.java  |   494 +
 .../apache/hadoop/hive/ql/TestTxnCommands2.java |   126 +-
 .../hadoop/hive/ql/exec/TestOperators.java      |    16 +
 .../exec/vector/TestVectorizationContext.java   |    93 +
 .../exec/vector/TestVectorizedRowBatchCtx.java  |     6 +-
 .../TestVectorLogicalExpressions.java           |   282 +
 .../apache/hadoop/hive/ql/io/TestAcidUtils.java |    27 +-
 .../hive/ql/io/orc/TestInputOutputFormat.java   |   137 +-
 .../hadoop/hive/ql/io/orc/TestOrcFile.java      |    11 +-
 .../hive/ql/io/orc/TestRecordReaderImpl.java    |    69 +-
 .../hive/ql/io/orc/TestVectorizedORCReader.java |    75 +-
 .../parquet/TestParquetRecordReaderWrapper.java |   173 +
 .../read/TestParquetFilterPredicate.java        |    72 +
 .../ql/io/sarg/TestConvertAstToSearchArg.java   |  2884 ++
 .../hive/ql/io/sarg/TestSearchArgumentImpl.java |  2891 +-
 .../hadoop/hive/ql/lib/TestRuleRegExp.java      |   118 +
 .../hive/ql/lockmgr/TestDbTxnManager.java       |    74 +-
 .../hive/ql/lockmgr/TestDbTxnManager2.java      |     2 +-
 .../hadoop/hive/ql/log/TestLog4j2Appenders.java |    95 +
 .../hadoop/hive/ql/metadata/StringAppender.java |   128 +
 .../hadoop/hive/ql/metadata/TestHive.java       |    50 +-
 .../TestSQL11ReservedKeyWordsNegative.java      |    32 +-
 .../TestSQL11ReservedKeyWordsPositive.java      |    23 +-
 .../parse/TestUpdateDeleteSemanticAnalyzer.java |    34 +-
 .../positive/TestTransactionStatement.java      |   102 +
 .../hive/ql/session/TestSessionState.java       |     2 +-
 .../udf/generic/TestGenericUDFAesDecrypt.java   |   233 +
 .../udf/generic/TestGenericUDFAesEncrypt.java   |   228 +
 .../ql/udf/generic/TestGenericUDFBRound.java    |   202 +
 .../clientnegative/alter_table_wrong_location.q |     4 +
 .../queries/clientnegative/ctas_noemptyfolder.q |    10 +
 .../queries/clientnegative/load_orc_negative3.q |     6 +
 .../clientnegative/mismatch_columns_insertion.q |     4 +
 .../queries/clientnegative/nvl_mismatch_type.q  |    20 +
 .../annotate_stats_deep_filters.q               |    67 +
 .../clientpositive/authorization_1_sql_std.q    |     4 +
 .../clientpositive/cast_tinyint_to_double.q     |     7 +
 ql/src/test/queries/clientpositive/cbo_rp_gby.q |    24 +
 .../queries/clientpositive/cbo_rp_gby_empty.q   |    30 +
 .../test/queries/clientpositive/cbo_rp_insert.q |    17 +
 .../test/queries/clientpositive/cbo_rp_join.q   |    65 +
 .../test/queries/clientpositive/cbo_rp_limit.q  |    16 +
 .../clientpositive/cbo_rp_outer_join_ppr.q      |    40 +
 .../queries/clientpositive/cbo_rp_semijoin.q    |    17 +
 .../clientpositive/cbo_rp_simple_select.q       |    56 +
 .../test/queries/clientpositive/cbo_rp_stats.q  |    10 +
 .../queries/clientpositive/cbo_rp_subq_exists.q |    67 +
 .../queries/clientpositive/cbo_rp_subq_in.q     |    56 +
 .../queries/clientpositive/cbo_rp_subq_not_in.q |    81 +
 .../queries/clientpositive/cbo_rp_udf_udaf.q    |    20 +
 .../test/queries/clientpositive/cbo_rp_union.q  |    14 +
 .../test/queries/clientpositive/cbo_rp_views.q  |    46 +
 .../queries/clientpositive/cbo_rp_windowing.q   |    21 +
 .../queries/clientpositive/cbo_rp_windowing_2.q |   439 +
 ql/src/test/queries/clientpositive/char_udf1.q  |     9 +-
 .../clientpositive/columnstats_quoting.q        |     8 +
 .../queries/clientpositive/compustat_avro.q     |     8 +-
 .../test/queries/clientpositive/create_like.q   |    12 +
 .../queries/clientpositive/dynamic_rdd_cache.q  |   111 +
 .../queries/clientpositive/flatten_and_or.q     |    19 +
 .../queries/clientpositive/folder_predicate.q   |    32 +
 ql/src/test/queries/clientpositive/groupby13.q  |    16 +
 .../queries/clientpositive/groupby1_map_nomap.q |     2 +
 ql/src/test/queries/clientpositive/groupby5.q   |     2 +
 ql/src/test/queries/clientpositive/groupby6.q   |     2 +
 .../clientpositive/groupby_grouping_id2.q       |     2 +
 .../clientpositive/groupby_ppr_multi_distinct.q |     2 +
 ql/src/test/queries/clientpositive/having2.q    |    27 +
 .../clientpositive/insertoverwrite_bucket.q     |    28 +
 ql/src/test/queries/clientpositive/keyword_2.q  |    14 +
 ql/src/test/queries/clientpositive/lineage3.q   |    22 +-
 .../clientpositive/load_dyn_part14_win.q        |    18 +-
 .../test/queries/clientpositive/load_orc_part.q |     4 +
 .../queries/clientpositive/macro_duplicate.q    |    10 +
 .../queries/clientpositive/multi_column_in.q    |    71 +
 .../test/queries/clientpositive/orc_file_dump.q |    57 +
 .../test/queries/clientpositive/orc_ppd_basic.q |   177 +
 .../clientpositive/parquet_ppd_boolean.q        |    35 +
 .../queries/clientpositive/parquet_ppd_char.q   |    76 +
 .../queries/clientpositive/parquet_ppd_date.q   |   101 +
 .../clientpositive/parquet_ppd_decimal.q        |   163 +
 .../clientpositive/parquet_ppd_partition.q      |     9 +
 .../clientpositive/parquet_ppd_timestamp.q      |    98 +
 .../clientpositive/parquet_ppd_varchar.q        |    76 +
 .../clientpositive/parquet_predicate_pushdown.q |   292 +
 .../test/queries/clientpositive/pointlookup.q   |    59 +
 .../test/queries/clientpositive/pointlookup2.q  |    51 +
 .../test/queries/clientpositive/pointlookup3.q  |    41 +
 .../queries/clientpositive/ptfgroupbyjoin.q     |    61 +
 .../queries/clientpositive/selectDistinctStar.q |     2 +
 .../clientpositive/smb_join_partition_key.q     |    35 +
 .../spark_dynamic_partition_pruning.q           |   180 +
 .../spark_dynamic_partition_pruning_2.q         |   118 +
 ...spark_vectorized_dynamic_partition_pruning.q |   192 +
 .../test/queries/clientpositive/stats_ppr_all.q |    24 +
 ql/src/test/queries/clientpositive/structin.q   |    23 +
 .../clientpositive/tez_dynpart_hashjoin_1.q     |   101 +
 .../clientpositive/tez_dynpart_hashjoin_2.q     |    83 +
 .../tez_vector_dynpart_hashjoin_1.q             |   102 +
 .../tez_vector_dynpart_hashjoin_2.q             |    84 +
 .../queries/clientpositive/udf_aes_decrypt.q    |    21 +
 .../queries/clientpositive/udf_aes_encrypt.q    |    21 +
 ql/src/test/queries/clientpositive/udf_bround.q |    44 +
 .../clientpositive/udf_from_utc_timestamp.q     |    30 +-
 .../queries/clientpositive/udf_percentile.q     |     2 +
 .../clientpositive/udf_to_utc_timestamp.q       |    30 +-
 .../clientpositive/unionall_unbalancedppd.q     |   195 +
 .../test/queries/clientpositive/varchar_udf1.q  |     6 +-
 .../test/queries/clientpositive/vector_acid3.q  |    17 +
 .../vector_aggregate_without_gby.q              |    14 +
 .../test/queries/clientpositive/vector_bround.q |    14 +
 .../clientpositive/vector_cast_constant.q       |     4 +-
 .../queries/clientpositive/vectorized_casts.q   |     6 +
 .../alter_table_wrong_location.q.out            |     9 +
 .../clientnegative/char_pad_convert_fail0.q.out |     2 +-
 .../clientnegative/char_pad_convert_fail1.q.out |     2 +-
 .../clientnegative/char_pad_convert_fail3.q.out |     2 +-
 .../clientnegative/ctas_noemptyfolder.q.out     |    19 +
 .../clientnegative/exchange_partition.q.out     |     2 +-
 .../clientnegative/load_orc_negative3.q.out     |    25 +
 .../mismatch_columns_insertion.q.out            |     9 +
 .../clientnegative/nvl_mismatch_type.q.out      |    43 +
 .../groupby2_map_skew_multi_distinct.q.out      |     9 +
 .../spark/groupby2_multi_distinct.q.out         |     9 +
 .../groupby3_map_skew_multi_distinct.q.out      |     9 +
 .../spark/groupby3_multi_distinct.q.out         |     9 +
 .../spark/groupby_grouping_sets7.q.out          |     9 +
 .../annotate_stats_deep_filters.q.out           |   244 +
 .../clientpositive/annotate_stats_filter.q.out  |    10 +-
 .../authorization_1_sql_std.q.out               |    11 +
 .../clientpositive/cast_tinyint_to_double.q.out |    38 +
 .../results/clientpositive/cbo_rp_gby.q.out     |   124 +
 .../clientpositive/cbo_rp_gby_empty.q.out       |    77 +
 .../results/clientpositive/cbo_rp_insert.q.out  |    89 +
 .../results/clientpositive/cbo_rp_join.q.out    | 15028 ++++++++
 .../results/clientpositive/cbo_rp_join0.q.out   |     4 +-
 .../results/clientpositive/cbo_rp_limit.q.out   |    90 +
 .../cbo_rp_outer_join_ppr.q.java1.7.out         |   855 +
 .../clientpositive/cbo_rp_semijoin.q.out        |   440 +
 .../clientpositive/cbo_rp_simple_select.q.out   |   755 +
 .../results/clientpositive/cbo_rp_stats.q.out   |    14 +
 .../clientpositive/cbo_rp_subq_exists.q.out     |   297 +
 .../results/clientpositive/cbo_rp_subq_in.q.out |   151 +
 .../clientpositive/cbo_rp_subq_not_in.q.out     |   365 +
 .../clientpositive/cbo_rp_udf_udaf.q.out        |   125 +
 .../results/clientpositive/cbo_rp_union.q.out   |   920 +
 .../results/clientpositive/cbo_rp_views.q.out   |   237 +
 .../clientpositive/cbo_rp_windowing.q.out       |   293 +
 .../clientpositive/cbo_rp_windowing_2.q.out     |  2338 ++
 .../clientpositive/char_udf1.q.java1.7.out      |    22 +-
 .../clientpositive/columnstats_quoting.q.out    |   114 +
 .../results/clientpositive/compustat_avro.q.out |     8 +-
 .../clientpositive/constprog_partitioner.q.out  |    30 +-
 .../clientpositive/convert_enum_to_string.q.out |     9 +-
 .../clientpositive/correlationoptimizer10.q.out |    48 +-
 .../clientpositive/correlationoptimizer5.q.out  |     6 +-
 .../results/clientpositive/create_like.q.out    |    66 +
 .../results/clientpositive/decimal_udf.q.out    |    18 +-
 .../clientpositive/dynamic_rdd_cache.q.out      |  1428 +
 .../clientpositive/exchange_partition.q.out     |     4 +-
 .../clientpositive/exchange_partition2.q.out    |     4 +-
 .../clientpositive/exchange_partition3.q.out    |     4 +-
 .../clientpositive/filter_cond_pushdown.q.out   |    32 +-
 .../clientpositive/filter_join_breaktask.q.out  |    12 +-
 .../results/clientpositive/flatten_and_or.q.out |    66 +
 .../test/results/clientpositive/fold_when.q.out |    16 +-
 .../clientpositive/folder_predicate.q.out       |   368 +
 .../test/results/clientpositive/groupby13.q.out |    86 +
 .../clientpositive/groupby1_map_nomap.q.out     |     8 +-
 .../test/results/clientpositive/groupby5.q.out  |     8 +-
 .../test/results/clientpositive/groupby6.q.out  |     8 +-
 .../clientpositive/groupby_duplicate_key.q.out  |    16 +-
 .../clientpositive/groupby_grouping_id2.q.out   |    28 +-
 .../groupby_multi_single_reducer2.q.out         |     2 +-
 .../groupby_multi_single_reducer3.q.out         |    12 +-
 .../groupby_ppr_multi_distinct.q.out            |     8 +-
 .../clientpositive/groupby_sort_1_23.q.out      |    56 +-
 .../clientpositive/groupby_sort_skew_1_23.q.out |    56 +-
 .../test/results/clientpositive/having2.q.out   |   353 +
 .../clientpositive/infer_const_type.q.out       |     7 +-
 .../clientpositive/input_testxpath2.q.out       |     2 +-
 .../clientpositive/input_testxpath4.q.out       |     2 +-
 .../clientpositive/insertoverwrite_bucket.q.out |   104 +
 ql/src/test/results/clientpositive/join32.q.out |     2 +-
 .../clientpositive/join32_lessSize.q.out        |     6 +-
 ql/src/test/results/clientpositive/join33.q.out |     2 +-
 .../join_cond_pushdown_unqual4.q.out            |     2 +-
 .../test/results/clientpositive/keyword_2.q.out |    51 +
 .../test/results/clientpositive/lineage2.q.out  |  2296 +-
 .../test/results/clientpositive/lineage3.q.out  |  2290 +-
 .../list_bucket_query_oneskew_3.q.out           |     6 +-
 .../clientpositive/load_dyn_part14_win.q.out    |   167 +-
 .../results/clientpositive/load_orc_part.q.out  |    18 +
 .../clientpositive/macro_duplicate.q.out        |    56 +
 .../clientpositive/multi_column_in.q.out        |   410 +
 .../results/clientpositive/multi_insert.q.out   |     8 +-
 .../clientpositive/multi_insert_gby.q.out       |     2 +-
 .../multi_insert_lateral_view.q.out             |     4 +-
 ...i_insert_move_tasks_share_dependencies.q.out |   360 +-
 .../test/results/clientpositive/null_cast.q.out |     6 +-
 .../results/clientpositive/orc_file_dump.q.out  |   447 +
 .../clientpositive/orc_predicate_pushdown.q.out |    36 +-
 .../clientpositive/parquet_ppd_boolean.q.out    |   200 +
 .../clientpositive/parquet_ppd_char.q.out       |   220 +
 .../clientpositive/parquet_ppd_date.q.out       |   301 +
 .../clientpositive/parquet_ppd_decimal.q.out    |   490 +
 .../clientpositive/parquet_ppd_partition.q.out  |    47 +
 .../clientpositive/parquet_ppd_timestamp.q.out  |   292 +
 .../clientpositive/parquet_ppd_varchar.q.out    |   220 +
 .../parquet_predicate_pushdown.q.out            |  1270 +
 .../results/clientpositive/pointlookup.q.out    |   198 +
 .../results/clientpositive/pointlookup2.q.out   |  1647 +
 .../results/clientpositive/pointlookup3.q.out   |  1394 +
 .../results/clientpositive/ppd_gby_join.q.out   |     4 +-
 .../test/results/clientpositive/ppd_join.q.out  |     4 +-
 .../test/results/clientpositive/ppd_join2.q.out |    22 +-
 .../test/results/clientpositive/ppd_join3.q.out |    52 +-
 .../clientpositive/ppd_outer_join4.q.out        |     2 +-
 ql/src/test/results/clientpositive/ptf.q.out    |    27 +-
 .../results/clientpositive/ptfgroupbyjoin.q.out |   519 +
 .../clientpositive/rand_partitionpruner3.q.out  |    12 +-
 .../clientpositive/selectDistinctStar.q.out     |     8 +-
 .../clientpositive/select_unquote_not.q.out     |     8 +-
 .../results/clientpositive/show_functions.q.out |     3 +
 .../clientpositive/smb_join_partition_key.q.out |   128 +
 .../clientpositive/spark/auto_join18.q.out      |    24 +-
 .../clientpositive/spark/auto_join30.q.out      |    51 +-
 .../clientpositive/spark/auto_join32.q.out      |    24 +-
 .../spark/auto_smb_mapjoin_14.q.out             |    30 +-
 .../spark/auto_sortmerge_join_10.q.out          |    23 +-
 .../results/clientpositive/spark/bucket2.q.out  |     3 -
 .../results/clientpositive/spark/bucket3.q.out  |     3 -
 .../results/clientpositive/spark/bucket4.q.out  |     3 -
 .../spark/column_access_stats.q.out             |     4 -
 .../spark/constprog_partitioner.q.out           |    30 +-
 .../spark/dynamic_rdd_cache.q.out               |  1073 +
 .../spark/filter_join_breaktask.q.out           |    12 +-
 .../clientpositive/spark/groupby10.q.out        |    32 +-
 .../clientpositive/spark/groupby1_map.q.out     |   412 +
 .../spark/groupby1_map_nomap.q.out              |   412 +
 .../spark/groupby1_map_skew.q.out               |   427 +
 .../clientpositive/spark/groupby1_noskew.q.out  |   406 +
 .../clientpositive/spark/groupby2_map.q.out     |   118 +
 .../spark/groupby2_map_multi_distinct.q.out     |   232 +
 .../spark/groupby2_map_skew.q.out               |   129 +
 .../clientpositive/spark/groupby2_noskew.q.out  |   111 +
 .../spark/groupby2_noskew_multi_distinct.q.out  |   114 +
 .../clientpositive/spark/groupby4_map.q.out     |    93 +
 .../spark/groupby4_map_skew.q.out               |    93 +
 .../clientpositive/spark/groupby4_noskew.q.out  |   104 +
 .../results/clientpositive/spark/groupby5.q.out |   433 +
 .../clientpositive/spark/groupby5_map.q.out     |    95 +
 .../spark/groupby5_map_skew.q.out               |    95 +
 .../clientpositive/spark/groupby5_noskew.q.out  |   418 +
 .../results/clientpositive/spark/groupby6.q.out |   117 +
 .../clientpositive/spark/groupby6_map.q.out     |   109 +
 .../spark/groupby6_map_skew.q.out               |   122 +
 .../clientpositive/spark/groupby6_noskew.q.out  |   104 +
 .../clientpositive/spark/groupby7_map.q.out     |    23 +-
 .../spark/groupby7_map_skew.q.out               |    38 +-
 .../clientpositive/spark/groupby7_noskew.q.out  |    17 +-
 .../groupby7_noskew_multi_single_reducer.q.out  |    18 +-
 .../results/clientpositive/spark/groupby8.q.out |    62 +-
 .../spark/groupby8_map_skew.q.out               |    37 +-
 .../spark/groupby_grouping_id2.q.out            |   234 +
 .../spark/groupby_multi_single_reducer2.q.out   |     2 +-
 .../spark/groupby_multi_single_reducer3.q.out   |    12 +-
 .../clientpositive/spark/groupby_position.q.out |    37 +-
 .../spark/groupby_ppr_multi_distinct.q.out      |   350 +
 .../spark/groupby_resolution.q.out              |   796 +
 .../spark/groupby_sort_1_23.q.out               |    90 +-
 .../spark/groupby_sort_skew_1_23.q.out          |    90 +-
 .../clientpositive/spark/insert_into3.q.out     |    33 +-
 .../results/clientpositive/spark/join18.q.out   |    24 +-
 .../results/clientpositive/spark/join22.q.out   |    19 +-
 .../results/clientpositive/spark/join32.q.out   |     2 +-
 .../clientpositive/spark/join32_lessSize.q.out  |     6 +-
 .../results/clientpositive/spark/join33.q.out   |     2 +-
 .../spark/join_cond_pushdown_unqual4.q.out      |     2 +-
 .../spark/limit_partition_metadataonly.q.out    |     2 -
 .../clientpositive/spark/limit_pushdown.q.out   |    31 +-
 .../spark/list_bucket_dml_2.q.java1.7.out       |     3 -
 .../clientpositive/spark/load_dyn_part14.q.out  |    30 +-
 .../clientpositive/spark/multi_insert.q.out     |     8 +-
 .../clientpositive/spark/multi_insert_gby.q.out |     2 +-
 .../spark/multi_insert_lateral_view.q.out       |     4 +-
 ...i_insert_move_tasks_share_dependencies.q.out |   536 +-
 .../clientpositive/spark/nullgroup.q.out        |   265 +
 .../clientpositive/spark/nullgroup2.q.out       |   300 +
 .../clientpositive/spark/nullgroup4.q.out       |   292 +
 .../spark/nullgroup4_multi_distinct.q.out       |   133 +
 .../spark/optimize_nullscan.q.out               |     3 -
 .../test/results/clientpositive/spark/pcr.q.out |     6 -
 .../clientpositive/spark/ppd_gby_join.q.out     |     4 +-
 .../results/clientpositive/spark/ppd_join.q.out |     4 +-
 .../clientpositive/spark/ppd_join2.q.out        |    22 +-
 .../clientpositive/spark/ppd_join3.q.out        |    52 +-
 .../clientpositive/spark/ppd_outer_join4.q.out  |     2 +-
 .../test/results/clientpositive/spark/ptf.q.out |    17 +-
 .../results/clientpositive/spark/sample3.q.out  |     3 -
 .../results/clientpositive/spark/sample9.q.out  |     3 -
 .../clientpositive/spark/skewjoinopt11.q.out    |    60 +-
 .../clientpositive/spark/skewjoinopt9.q.out     |    20 +-
 .../clientpositive/spark/smb_mapjoin_11.q.out   |     6 -
 .../spark/spark_dynamic_partition_pruning.q.out |  5573 +++
 .../spark_dynamic_partition_pruning_2.q.out     |  1015 +
 ...k_vectorized_dynamic_partition_pruning.q.out |  5822 +++
 .../clientpositive/spark/subquery_exists.q.out  |    12 +-
 .../clientpositive/spark/subquery_in.q.out      |    36 +-
 .../spark/subquery_multiinsert.q.java1.7.out    |    16 +-
 .../clientpositive/spark/temp_table_gb1.q.out   |    67 +
 .../clientpositive/spark/udaf_collect_set.q.out |   212 +
 .../clientpositive/spark/udf_example_add.q.out  |     3 -
 .../clientpositive/spark/udf_in_file.q.out      |     3 -
 .../results/clientpositive/spark/udf_max.q.out  |    62 +
 .../results/clientpositive/spark/udf_min.q.out  |    62 +
 .../clientpositive/spark/udf_percentile.q.out   |   450 +
 .../results/clientpositive/spark/union10.q.out  |    36 +-
 .../results/clientpositive/spark/union11.q.out  |    38 +-
 .../results/clientpositive/spark/union15.q.out  |    23 +-
 .../results/clientpositive/spark/union16.q.out  |   450 +-
 .../results/clientpositive/spark/union2.q.out   |    36 +-
 .../results/clientpositive/spark/union20.q.out  |    18 +-
 .../results/clientpositive/spark/union25.q.out  |    21 +-
 .../results/clientpositive/spark/union28.q.out  |    21 +-
 .../results/clientpositive/spark/union3.q.out   |    45 +-
 .../results/clientpositive/spark/union30.q.out  |    21 +-
 .../results/clientpositive/spark/union4.q.out   |    18 +-
 .../results/clientpositive/spark/union5.q.out   |    20 +-
 .../results/clientpositive/spark/union9.q.out   |    54 +-
 .../clientpositive/spark/union_remove_1.q.out   |    23 +-
 .../clientpositive/spark/union_remove_15.q.out  |    23 +-
 .../clientpositive/spark/union_remove_16.q.out  |    23 +-
 .../clientpositive/spark/union_remove_18.q.out  |    23 +-
 .../clientpositive/spark/union_remove_19.q.out  |    75 +-
 .../clientpositive/spark/union_remove_20.q.out  |    23 +-
 .../clientpositive/spark/union_remove_21.q.out  |    21 +-
 .../clientpositive/spark/union_remove_22.q.out  |    46 +-
 .../clientpositive/spark/union_remove_24.q.out  |    23 +-
 .../clientpositive/spark/union_remove_25.q.out  |    59 +-
 .../clientpositive/spark/union_remove_4.q.out   |    23 +-
 .../clientpositive/spark/union_remove_6.q.out   |    23 +-
 .../spark/union_remove_6_subq.q.out             |    84 +-
 .../clientpositive/spark/union_remove_7.q.out   |    23 +-
 .../clientpositive/spark/union_top_level.q.out  |    59 +-
 .../clientpositive/spark/union_view.q.out       |    33 +-
 .../spark/vector_cast_constant.q.java1.7.out    |    41 +-
 .../spark/vector_count_distinct.q.out           |    31 +-
 .../spark/vector_decimal_mapjoin.q.out          |     1 +
 .../clientpositive/spark/vector_elt.q.out       |     7 -
 .../spark/vector_left_outer_join.q.out          |     2 +
 .../spark/vector_mapjoin_reduce.q.out           |    23 +-
 .../spark/vector_string_concat.q.out            |     3 -
 .../clientpositive/spark/vectorization_0.q.out  |     2 +-
 .../clientpositive/spark/vectorization_13.q.out |     4 +-
 .../clientpositive/spark/vectorization_15.q.out |     2 +-
 .../clientpositive/spark/vectorization_17.q.out |    12 +-
 .../spark/vectorization_decimal_date.q.out      |     4 -
 .../spark/vectorization_div0.q.out              |     3 -
 .../spark/vectorization_short_regress.q.out     |    40 +-
 .../clientpositive/spark/vectorized_case.q.out  |     5 +-
 .../spark/vectorized_mapjoin.q.out              |     1 +
 .../spark/vectorized_math_funcs.q.out           |     3 -
 .../spark/vectorized_nested_mapjoin.q.out       |     2 +
 .../clientpositive/spark/vectorized_ptf.q.out   |    21 +-
 .../spark/vectorized_string_funcs.q.out         |     3 -
 .../results/clientpositive/stats_ppr_all.q.out  |   284 +
 .../test/results/clientpositive/structin.q.out  |   110 +
 .../clientpositive/subquery_exists.q.out        |    12 +-
 .../results/clientpositive/subquery_in.q.out    |    36 +-
 .../clientpositive/subquery_in_having.q.out     |    50 +-
 .../subquery_multiinsert.q.java1.7.out          |    16 +-
 .../subquery_unqualcolumnrefs.q.out             |    26 +-
 .../results/clientpositive/subquery_views.q.out |    40 +-
 .../clientpositive/tez/explainuser_1.q.out      |   346 +-
 .../clientpositive/tez/explainuser_2.q.out      |    12 +-
 .../tez/filter_join_breaktask.q.out             |    12 +-
 .../clientpositive/tez/orc_ppd_basic.q.out      |   701 +
 .../test/results/clientpositive/tez/ptf.q.out   |    15 +-
 .../clientpositive/tez/selectDistinctStar.q.out |     8 +-
 .../clientpositive/tez/subquery_exists.q.out    |    12 +-
 .../clientpositive/tez/subquery_in.q.out        |    36 +-
 .../tez/tez_dynpart_hashjoin_1.q.out            |   791 +
 .../tez/tez_dynpart_hashjoin_2.q.out            |   564 +
 .../tez/tez_vector_dynpart_hashjoin_1.q.out     |   804 +
 .../tez/tez_vector_dynpart_hashjoin_2.q.out     |   570 +
 .../results/clientpositive/tez/union2.q.out     |    28 +-
 .../results/clientpositive/tez/union9.q.out     |    40 +-
 .../clientpositive/tez/vector_acid3.q.out       |    31 +
 .../tez/vector_cast_constant.q.java1.7.out      |    21 +-
 .../tez/vector_char_mapjoin1.q.out              |     1 +
 .../clientpositive/tez/vector_decimal_udf.q.out |    24 +-
 .../clientpositive/tez/vector_inner_join.q.out  |    14 +-
 .../tez/vector_mapjoin_reduce.q.out             |    24 +-
 .../tez/vector_mr_diff_schema_alias.q.out       |     2 +-
 .../tez/vector_null_projection.q.out            |     4 +
 .../tez/vector_varchar_mapjoin1.q.out           |     1 +
 .../clientpositive/tez/vectorization_0.q.out    |     2 +-
 .../clientpositive/tez/vectorization_13.q.out   |     4 +-
 .../clientpositive/tez/vectorization_15.q.out   |     2 +-
 .../clientpositive/tez/vectorization_17.q.out   |    12 +-
 .../clientpositive/tez/vectorization_7.q.out    |     4 +-
 .../clientpositive/tez/vectorization_8.q.out    |     4 +-
 .../tez/vectorization_short_regress.q.out       |    40 +-
 .../clientpositive/tez/vectorized_case.q.out    |     2 +-
 .../clientpositive/tez/vectorized_casts.q.out   |    99 +-
 .../clientpositive/tez/vectorized_ptf.q.out     |    19 +-
 .../clientpositive/udf_aes_decrypt.q.out        |    79 +
 .../clientpositive/udf_aes_encrypt.q.out        |    79 +
 .../results/clientpositive/udf_bround.q.out     |   119 +
 .../clientpositive/udf_from_utc_timestamp.q.out |    66 +-
 .../results/clientpositive/udf_inline.q.out     |     8 +-
 .../clientpositive/udf_isnull_isnotnull.q.out   |     2 +-
 ql/src/test/results/clientpositive/udf_or.q.out |     4 +-
 .../results/clientpositive/udf_percentile.q.out |   104 +-
 .../test/results/clientpositive/udf_size.q.out  |     2 +-
 .../results/clientpositive/udf_struct.q.out     |     2 +-
 .../clientpositive/udf_to_utc_timestamp.q.out   |    66 +-
 .../test/results/clientpositive/udf_union.q.out |     2 +-
 .../test/results/clientpositive/union16.q.out   |   354 +-
 ql/src/test/results/clientpositive/union2.q.out |    32 +-
 ql/src/test/results/clientpositive/union9.q.out |    46 +-
 .../clientpositive/union_remove_6_subq.q.out    |    34 +-
 .../results/clientpositive/union_view.q.out     |    24 +
 .../clientpositive/unionall_unbalancedppd.q.out |   657 +
 .../clientpositive/varchar_udf1.q.java1.7.out   |    12 +-
 .../results/clientpositive/vector_acid3.q.out   |    31 +
 .../vector_aggregate_without_gby.q.out          |    96 +
 .../results/clientpositive/vector_bround.q.out  |    86 +
 .../vector_cast_constant.q.java1.7.out          |    32 +-
 .../clientpositive/vector_char_mapjoin1.q.out   |     1 +
 .../clientpositive/vector_decimal_udf.q.out     |    24 +-
 .../clientpositive/vector_inner_join.q.out      |    12 +-
 .../clientpositive/vector_mapjoin_reduce.q.out  |    26 +-
 .../vector_mr_diff_schema_alias.q.out           |     2 +-
 .../clientpositive/vector_null_projection.q.out |     4 +
 .../vector_varchar_mapjoin1.q.out               |     1 +
 .../clientpositive/vectorization_0.q.out        |     2 +-
 .../clientpositive/vectorization_13.q.out       |     4 +-
 .../clientpositive/vectorization_15.q.out       |     2 +-
 .../clientpositive/vectorization_17.q.out       |    12 +-
 .../clientpositive/vectorization_7.q.out        |     4 +-
 .../clientpositive/vectorization_8.q.out        |     4 +-
 .../vectorization_short_regress.q.out           |    40 +-
 .../clientpositive/vectorized_case.q.out        |     2 +-
 .../clientpositive/vectorized_casts.q.out       |    66 +-
 .../results/clientpositive/vectorized_ptf.q.out |    67 +-
 .../gen/thrift/gen-cpp/complex_constants.cpp    |     2 +-
 .../src/gen/thrift/gen-cpp/complex_constants.h  |     2 +-
 serde/src/gen/thrift/gen-cpp/complex_types.cpp  |   442 +-
 serde/src/gen/thrift/gen-cpp/complex_types.h    |   174 +-
 .../gen/thrift/gen-cpp/megastruct_constants.cpp |     2 +-
 .../gen/thrift/gen-cpp/megastruct_constants.h   |     2 +-
 .../src/gen/thrift/gen-cpp/megastruct_types.cpp |   585 +-
 serde/src/gen/thrift/gen-cpp/megastruct_types.h |   175 +-
 .../src/gen/thrift/gen-cpp/serde_constants.cpp  |     2 +-
 serde/src/gen/thrift/gen-cpp/serde_constants.h  |     2 +-
 serde/src/gen/thrift/gen-cpp/serde_types.cpp    |     5 +-
 serde/src/gen/thrift/gen-cpp/serde_types.h      |     5 +-
 .../gen/thrift/gen-cpp/testthrift_constants.cpp |     2 +-
 .../gen/thrift/gen-cpp/testthrift_constants.h   |     2 +-
 .../src/gen/thrift/gen-cpp/testthrift_types.cpp |    95 +-
 serde/src/gen/thrift/gen-cpp/testthrift_types.h |    45 +-
 .../hadoop/hive/serde/serdeConstants.java       |     7 +-
 .../hadoop/hive/serde/test/InnerStruct.java     |    24 +-
 .../hadoop/hive/serde/test/ThriftTestObj.java   |    62 +-
 .../hadoop/hive/serde2/thrift/test/Complex.java |   279 +-
 .../hive/serde2/thrift/test/IntString.java      |    40 +-
 .../hive/serde2/thrift/test/MegaStruct.java     |   521 +-
 .../hive/serde2/thrift/test/MiniStruct.java     |    38 +-
 .../hadoop/hive/serde2/thrift/test/MyEnum.java  |     2 +-
 .../hive/serde2/thrift/test/PropValueUnion.java |    60 +-
 .../hive/serde2/thrift/test/SetIntString.java   |    54 +-
 serde/src/gen/thrift/gen-php/Types.php          |    15 +-
 .../org/apache/hadoop/hive/serde/Types.php      |   373 +-
 .../src/gen/thrift/gen-py/complex/constants.py  |     2 +-
 serde/src/gen/thrift/gen-py/complex/ttypes.py   |    50 +-
 .../gen/thrift/gen-py/megastruct/constants.py   |     2 +-
 .../src/gen/thrift/gen-py/megastruct/ttypes.py  |    44 +-
 .../org_apache_hadoop_hive_serde/constants.py   |     2 +-
 .../org_apache_hadoop_hive_serde/ttypes.py      |     2 +-
 .../gen/thrift/gen-py/testthrift/constants.py   |     2 +-
 .../src/gen/thrift/gen-py/testthrift/ttypes.py  |    14 +-
 .../src/gen/thrift/gen-rb/complex_constants.rb  |     2 +-
 serde/src/gen/thrift/gen-rb/complex_types.rb    |     2 +-
 .../gen/thrift/gen-rb/megastruct_constants.rb   |     2 +-
 serde/src/gen/thrift/gen-rb/megastruct_types.rb |     2 +-
 serde/src/gen/thrift/gen-rb/serde_constants.rb  |     2 +-
 serde/src/gen/thrift/gen-rb/serde_types.rb      |     2 +-
 .../gen/thrift/gen-rb/testthrift_constants.rb   |     2 +-
 serde/src/gen/thrift/gen-rb/testthrift_types.rb |     2 +-
 .../hadoop/hive/ql/io/sarg/ExpressionTree.java  |   157 -
 .../hadoop/hive/ql/io/sarg/PredicateLeaf.java   |    87 -
 .../hadoop/hive/ql/io/sarg/SearchArgument.java  |   278 -
 .../apache/hadoop/hive/serde2/SerDeUtils.java   |    14 +-
 .../apache/hadoop/hive/serde2/WriteBuffers.java |     2 +-
 .../serde2/avro/AvroLazyObjectInspector.java    |    19 +-
 .../hadoop/hive/serde2/avro/InstanceCache.java  |    26 +-
 .../hadoop/hive/serde2/io/DateWritable.java     |    14 -
 .../hive/serde2/io/HiveDecimalWritable.java     |   185 -
 .../hive/serde2/lazy/LazyHiveDecimal.java       |     3 +-
 .../lazy/fast/LazySimpleDeserializeRead.java    |    16 +-
 .../hive/serde2/lazybinary/LazyBinaryDate.java  |     4 +-
 .../lazybinary/LazyBinaryHiveDecimal.java       |     2 +-
 .../hive/serde2/lazybinary/LazyBinarySerDe.java |    37 +-
 .../fast/LazyBinaryDeserializeRead.java         |     6 +-
 .../fast/LazyBinarySerializeWrite.java          |     5 +-
 .../ObjectInspectorConverters.java              |    31 +-
 .../objectinspector/ObjectInspectorFactory.java |    68 +-
 .../objectinspector/ObjectInspectorUtils.java   |    19 +
 .../ReflectionStructObjectInspector.java        |    60 +-
 .../SettableUnionObjectInspector.java           |     4 +-
 .../StandardConstantStructObjectInspector.java  |    51 +
 .../StandardStructObjectInspector.java          |     7 +-
 .../StandardUnionObjectInspector.java           |     4 +-
 .../ThriftUnionObjectInspector.java             |    44 +-
 .../hive/serde2/typeinfo/HiveDecimalUtils.java  |    35 +-
 .../hive/serde2/typeinfo/TypeInfoUtils.java     |     2 +-
 .../avro/TestAvroLazyObjectInspector.java       |    59 +
 .../hive/serde2/avro/TestInstanceCache.java     |    40 +-
 .../TestObjectInspectorConverters.java          |    89 +-
 .../TestReflectionObjectInspectors.java         |    71 +-
 .../TestThriftObjectInspectors.java             |    10 +-
 service/src/gen/thrift/gen-cpp/TCLIService.cpp  |   458 +-
 service/src/gen/thrift/gen-cpp/TCLIService.h    |   821 +-
 .../thrift/gen-cpp/TCLIService_constants.cpp    |     2 +-
 .../gen/thrift/gen-cpp/TCLIService_constants.h  |     2 +-
 .../gen/thrift/gen-cpp/TCLIService_types.cpp    |  3250 +-
 .../src/gen/thrift/gen-cpp/TCLIService_types.h  |  1482 +-
 service/src/gen/thrift/gen-cpp/ThriftHive.cpp   |   286 +-
 service/src/gen/thrift/gen-cpp/ThriftHive.h     |   389 +-
 .../thrift/gen-cpp/hive_service_constants.cpp   |     2 +-
 .../gen/thrift/gen-cpp/hive_service_constants.h |     2 +-
 .../gen/thrift/gen-cpp/hive_service_types.cpp   |   110 +-
 .../src/gen/thrift/gen-cpp/hive_service_types.h |    75 +-
 .../hadoop/hive/service/HiveClusterStatus.java  |    68 +-
 .../hive/service/HiveServerException.java       |    40 +-
 .../hadoop/hive/service/JobTrackerState.java    |     2 +-
 .../apache/hadoop/hive/service/ThriftHive.java  |   914 +-
 .../service/cli/thrift/TArrayTypeEntry.java     |    24 +-
 .../hive/service/cli/thrift/TBinaryColumn.java  |    64 +-
 .../hive/service/cli/thrift/TBoolColumn.java    |    62 +-
 .../hive/service/cli/thrift/TBoolValue.java     |    26 +-
 .../hive/service/cli/thrift/TByteColumn.java    |    62 +-
 .../hive/service/cli/thrift/TByteValue.java     |    26 +-
 .../hive/service/cli/thrift/TCLIService.java    |  1734 +-
 .../cli/thrift/TCLIServiceConstants.java        |     7 +-
 .../cli/thrift/TCancelDelegationTokenReq.java   |    32 +-
 .../cli/thrift/TCancelDelegationTokenResp.java  |    24 +-
 .../service/cli/thrift/TCancelOperationReq.java |    24 +-
 .../cli/thrift/TCancelOperationResp.java        |    24 +-
 .../service/cli/thrift/TCloseOperationReq.java  |    24 +-
 .../service/cli/thrift/TCloseOperationResp.java |    24 +-
 .../service/cli/thrift/TCloseSessionReq.java    |    24 +-
 .../service/cli/thrift/TCloseSessionResp.java   |    24 +-
 .../apache/hive/service/cli/thrift/TColumn.java |    20 +-
 .../hive/service/cli/thrift/TColumnDesc.java    |    50 +-
 .../hive/service/cli/thrift/TColumnValue.java   |    20 +-
 .../hive/service/cli/thrift/TDoubleColumn.java  |    62 +-
 .../hive/service/cli/thrift/TDoubleValue.java   |    26 +-
 .../cli/thrift/TExecuteStatementReq.java        |    87 +-
 .../cli/thrift/TExecuteStatementResp.java       |    34 +-
 .../service/cli/thrift/TFetchOrientation.java   |     2 +-
 .../service/cli/thrift/TFetchResultsReq.java    |    54 +-
 .../service/cli/thrift/TFetchResultsResp.java   |    42 +-
 .../service/cli/thrift/TGetCatalogsReq.java     |    24 +-
 .../service/cli/thrift/TGetCatalogsResp.java    |    34 +-
 .../hive/service/cli/thrift/TGetColumnsReq.java |    58 +-
 .../service/cli/thrift/TGetColumnsResp.java     |    34 +-
 .../cli/thrift/TGetDelegationTokenReq.java      |    40 +-
 .../cli/thrift/TGetDelegationTokenResp.java     |    34 +-
 .../service/cli/thrift/TGetFunctionsReq.java    |    50 +-
 .../service/cli/thrift/TGetFunctionsResp.java   |    34 +-
 .../hive/service/cli/thrift/TGetInfoReq.java    |    36 +-
 .../hive/service/cli/thrift/TGetInfoResp.java   |    32 +-
 .../hive/service/cli/thrift/TGetInfoType.java   |     2 +-
 .../hive/service/cli/thrift/TGetInfoValue.java  |    20 +-
 .../cli/thrift/TGetOperationStatusReq.java      |    24 +-
 .../cli/thrift/TGetOperationStatusResp.java     |    62 +-
 .../cli/thrift/TGetResultSetMetadataReq.java    |    24 +-
 .../cli/thrift/TGetResultSetMetadataResp.java   |    34 +-
 .../hive/service/cli/thrift/TGetSchemasReq.java |    42 +-
 .../service/cli/thrift/TGetSchemasResp.java     |    34 +-
 .../service/cli/thrift/TGetTableTypesReq.java   |    24 +-
 .../service/cli/thrift/TGetTableTypesResp.java  |    34 +-
 .../hive/service/cli/thrift/TGetTablesReq.java  |    79 +-
 .../hive/service/cli/thrift/TGetTablesResp.java |    34 +-
 .../service/cli/thrift/TGetTypeInfoReq.java     |    24 +-
 .../service/cli/thrift/TGetTypeInfoResp.java    |    34 +-
 .../service/cli/thrift/THandleIdentifier.java   |    50 +-
 .../hive/service/cli/thrift/TI16Column.java     |    62 +-
 .../hive/service/cli/thrift/TI16Value.java      |    26 +-
 .../hive/service/cli/thrift/TI32Column.java     |    62 +-
 .../hive/service/cli/thrift/TI32Value.java      |    26 +-
 .../hive/service/cli/thrift/TI64Column.java     |    62 +-
 .../hive/service/cli/thrift/TI64Value.java      |    26 +-
 .../hive/service/cli/thrift/TMapTypeEntry.java  |    32 +-
 .../service/cli/thrift/TOpenSessionReq.java     |    91 +-
 .../service/cli/thrift/TOpenSessionResp.java    |    91 +-
 .../service/cli/thrift/TOperationHandle.java    |    54 +-
 .../service/cli/thrift/TOperationState.java     |     2 +-
 .../hive/service/cli/thrift/TOperationType.java |     2 +-
 .../service/cli/thrift/TPrimitiveTypeEntry.java |    38 +-
 .../service/cli/thrift/TProtocolVersion.java    |     2 +-
 .../cli/thrift/TRenewDelegationTokenReq.java    |    32 +-
 .../cli/thrift/TRenewDelegationTokenResp.java   |    24 +-
 .../apache/hive/service/cli/thrift/TRow.java    |    46 +-
 .../apache/hive/service/cli/thrift/TRowSet.java |    86 +-
 .../hive/service/cli/thrift/TSessionHandle.java |    24 +-
 .../apache/hive/service/cli/thrift/TStatus.java |    83 +-
 .../hive/service/cli/thrift/TStatusCode.java    |     2 +-
 .../hive/service/cli/thrift/TStringColumn.java  |    62 +-
 .../hive/service/cli/thrift/TStringValue.java   |    26 +-
 .../service/cli/thrift/TStructTypeEntry.java    |    50 +-
 .../hive/service/cli/thrift/TTableSchema.java   |    46 +-
 .../hive/service/cli/thrift/TTypeDesc.java      |    46 +-
 .../hive/service/cli/thrift/TTypeEntry.java     |    20 +-
 .../apache/hive/service/cli/thrift/TTypeId.java |     2 +-
 .../service/cli/thrift/TTypeQualifierValue.java |    20 +-
 .../service/cli/thrift/TTypeQualifiers.java     |    54 +-
 .../service/cli/thrift/TUnionTypeEntry.java     |    50 +-
 .../cli/thrift/TUserDefinedTypeEntry.java       |    24 +-
 service/src/gen/thrift/gen-php/TCLIService.php  |   269 +-
 service/src/gen/thrift/gen-php/ThriftHive.php   |   125 +-
 service/src/gen/thrift/gen-php/Types.php        |    30 +-
 .../gen-py/TCLIService/TCLIService-remote       |   102 +-
 .../thrift/gen-py/TCLIService/TCLIService.py    |   439 +-
 .../gen/thrift/gen-py/TCLIService/constants.py  |     2 +-
 .../src/gen/thrift/gen-py/TCLIService/ttypes.py |   478 +-
 .../gen-py/hive_service/ThriftHive-remote       |  1021 +-
 .../thrift/gen-py/hive_service/ThriftHive.py    |   266 +-
 .../gen/thrift/gen-py/hive_service/constants.py |     2 +-
 .../gen/thrift/gen-py/hive_service/ttypes.py    |    19 +-
 .../gen/thrift/gen-rb/hive_service_constants.rb |     2 +-
 .../src/gen/thrift/gen-rb/hive_service_types.rb |     2 +-
 .../src/gen/thrift/gen-rb/t_c_l_i_service.rb    |     2 +-
 .../thrift/gen-rb/t_c_l_i_service_constants.rb  |     2 +-
 .../gen/thrift/gen-rb/t_c_l_i_service_types.rb  |     2 +-
 service/src/gen/thrift/gen-rb/thrift_hive.rb    |     2 +-
 .../hive/service/cli/CLIServiceUtils.java       |     7 -
 .../cli/operation/GetColumnsOperation.java      |    10 +-
 .../cli/operation/GetTablesOperation.java       |     7 +-
 .../cli/operation/LogDivertAppender.java        |   223 +-
 .../service/cli/operation/OperationManager.java |    17 +-
 .../service/cli/session/SessionManager.java     |    42 +-
 .../apache/hive/service/server/HiveServer2.java |    74 +-
 .../session/TestPluggableHiveSessionImpl.java   |    55 +
 shims/0.20S/pom.xml                             |     8 +-
 .../hadoop/hive/shims/Hadoop20SShims.java       |    46 +-
 shims/0.23/pom.xml                              |    21 +-
 .../apache/hadoop/hive/shims/Hadoop23Shims.java |   145 +-
 shims/common/pom.xml                            |    17 +-
 .../apache/hadoop/hive/shims/HadoopShims.java   |    37 +
 .../hadoop/hive/shims/HadoopShimsSecure.java    |    32 +
 .../hadoop/hive/shims/HiveEventCounter.java     |   102 -
 spark-client/pom.xml                            |     5 +
 .../hive/spark/client/SparkClientImpl.java      |    24 +-
 .../hive/spark/client/SparkClientUtilities.java |    13 +-
 .../hive/spark/client/TestSparkClient.java      |     4 +-
 .../src/test/resources/log4j.properties         |    23 -
 spark-client/src/test/resources/log4j2.xml      |    39 +
 storage-api/pom.xml                             |    78 +
 .../hadoop/hive/common/type/HiveDecimal.java    |   313 +
 .../hive/ql/exec/vector/BytesColumnVector.java  |   322 +
 .../hive/ql/exec/vector/ColumnVector.java       |   173 +
 .../ql/exec/vector/DecimalColumnVector.java     |   106 +
 .../hive/ql/exec/vector/DoubleColumnVector.java |   143 +
 .../hive/ql/exec/vector/LongColumnVector.java   |   189 +
 .../hive/ql/exec/vector/VectorizedRowBatch.java |   186 +
 .../hadoop/hive/ql/io/sarg/ExpressionTree.java  |   156 +
 .../hadoop/hive/ql/io/sarg/PredicateLeaf.java   |   103 +
 .../hadoop/hive/ql/io/sarg/SearchArgument.java  |   287 +
 .../hive/ql/io/sarg/SearchArgumentFactory.java  |    28 +
 .../hive/ql/io/sarg/SearchArgumentImpl.java     |   687 +
 .../hive/serde2/io/HiveDecimalWritable.java     |   174 +
 testutils/ptest2/pom.xml                        |    32 +-
 .../ptest2/src/main/resources/log4j.properties  |    37 -
 testutils/ptest2/src/main/resources/log4j2.xml  |    80 +
 1291 files changed, 156107 insertions(+), 50140 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/fc53e5d7/beeline/src/java/org/apache/hive/beeline/BeeLine.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/fc53e5d7/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java
----------------------------------------------------------------------
diff --cc beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java
index b6f244b,3388391..8e1d11b
--- a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java
+++ b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java
@@@ -79,10 -78,9 +79,11 @@@ class BeeLineOpts implements Completer 
    int timeout = -1;
    private String isolation = DEFAULT_ISOLATION_LEVEL;
    private String outputFormat = "table";
 +  // This configuration is used only for client side configuration.
 +  private HiveConf conf;
    private boolean trimScripts = true;
    private boolean allowMultiLineCommand = true;
+   private boolean showConnectedUrl = false;
  
    //This can be set for old behavior of nulls printed as empty strings
    private boolean nullEmptyString = false;

http://git-wip-us.apache.org/repos/asf/hive/blob/fc53e5d7/jdbc/src/java/org/apache/hive/jdbc/Utils.java
----------------------------------------------------------------------
diff --cc jdbc/src/java/org/apache/hive/jdbc/Utils.java
index 0e4693b,d8368a4..e4df7f4
--- a/jdbc/src/java/org/apache/hive/jdbc/Utils.java
+++ b/jdbc/src/java/org/apache/hive/jdbc/Utils.java
@@@ -37,8 -36,8 +36,8 @@@ import org.apache.hive.service.cli.thri
  import org.apache.http.client.CookieStore;
  import org.apache.http.cookie.Cookie;
  
 -class Utils {
 +public class Utils {
-   public static final Log LOG = LogFactory.getLog(Utils.class.getName());
+   static final Log LOG = LogFactory.getLog(Utils.class.getName());
    /**
      * The required prefix for the connection URL.
      */

http://git-wip-us.apache.org/repos/asf/hive/blob/fc53e5d7/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index a381f73,4030075..9ee6023
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@@ -388,12 -387,10 +388,15 @@@ public class Driver implements CommandP
      SessionState.get().setupQueryCurrentTimestamp();
  
      try {
+       // Initialize the transaction manager.  This must be done before analyze is called.
+       SessionState.get().initTxnMgr(conf);
 -
 -      command = new VariableSubstitution().substitute(conf, command);
++      
 +      command = new VariableSubstitution(new HiveVariableSource() {
 +        @Override
 +        public Map<String, String> getHiveVariable() {
 +          return SessionState.get().getHiveVariables();
 +        }
 +      }).substitute(conf, command);
        ctx = new Context(conf);
        ctx.setTryCount(getTryCount());
        ctx.setCmd(command);

http://git-wip-us.apache.org/repos/asf/hive/blob/fc53e5d7/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/fc53e5d7/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/fc53e5d7/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------

[32/50] [abbrv] hive git commit: HIVE-11689 : minor flow changes to ORC split generation (Sergey Shelukhin, reviewed by Prasanth Jayachandran and Swarnim Kulkarni) ADDENDUM

Posted by xu...@apache.org.

HIVE-11689 : minor flow changes to ORC split generation (Sergey Shelukhin, reviewed by Prasanth Jayachandran and Swarnim Kulkarni) ADDENDUM


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3ff3c6f1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3ff3c6f1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3ff3c6f1

Branch: refs/heads/beeline-cli
Commit: 3ff3c6f19ab82390f44c88cde5afea32a0299986
Parents: dbdd611
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 2 11:01:35 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 2 11:01:35 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java     | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3ff3c6f1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 05efc5f..cf8694e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -28,6 +28,7 @@ import java.util.Map;
 import java.util.NavigableMap;
 import java.util.TreeMap;
 import java.util.concurrent.Callable;
+import java.util.concurrent.CompletionService;
 import java.util.concurrent.ExecutorCompletionService;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -373,7 +374,6 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     private final Configuration conf;
     private static Cache<Path, FileInfo> footerCache;
     private static ExecutorService threadPool = null;
-    private static ExecutorCompletionService<AcidDirInfo> ecs = null;
     private final int numBuckets;
     private final long maxSize;
     private final long minSize;
@@ -419,7 +419,6 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
           threadPool = Executors.newFixedThreadPool(numThreads,
               new ThreadFactoryBuilder().setDaemon(true)
                   .setNameFormat("ORC_GET_SPLITS #%d").build());
-          ecs = new ExecutorCompletionService<AcidDirInfo>(threadPool);
         }
 
         if (footerCache == null && cacheStripeDetails) {
@@ -440,7 +439,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
   /**
    * The full ACID directory information needed for splits; no more calls to HDFS needed.
    * We could just live with AcidUtils.Directory but...
-   * 1) That doesn't contain have base files.
+   * 1) That doesn't have base files for the base-directory case.
    * 2) We save fs for convenience to avoid getting it twice.
    */
   @VisibleForTesting
@@ -1031,17 +1030,18 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     // multi-threaded file statuses and split strategy
     boolean useFileIds = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
     Path[] paths = getInputPaths(conf);
+    CompletionService<AcidDirInfo> ecs = new ExecutorCompletionService<>(Context.threadPool);
     for (Path dir : paths) {
       FileSystem fs = dir.getFileSystem(conf);
       FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds);
-      pathFutures.add(Context.ecs.submit(fileGenerator));
+      pathFutures.add(ecs.submit(fileGenerator));
     }
 
     // complete path futures and schedule split generation
     try {
       for (int notIndex = 0; notIndex < paths.length; ++notIndex) {
-        AcidDirInfo adi = Context.ecs.take().get();
-        SplitStrategy splitStrategy = determineSplitStrategy(
+        AcidDirInfo adi = ecs.take().get();
+        SplitStrategy<?> splitStrategy = determineSplitStrategy(
             context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseOrOriginalFiles);
 
         if (isDebugEnabled) {
@@ -1049,12 +1049,14 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
         }
 
         if (splitStrategy instanceof ETLSplitStrategy) {
-          List<SplitInfo> splitInfos = splitStrategy.getSplits();
+          List<SplitInfo> splitInfos = ((ETLSplitStrategy)splitStrategy).getSplits();
           for (SplitInfo splitInfo : splitInfos) {
             splitFutures.add(Context.threadPool.submit(new SplitGenerator(splitInfo)));
           }
         } else {
-          splits.addAll(splitStrategy.getSplits());
+          @SuppressWarnings("unchecked")
+          List<OrcSplit> readySplits = (List<OrcSplit>)splitStrategy.getSplits();
+          splits.addAll(readySplits);
         }
       }

[44/50] [abbrv] hive git commit: HIVE-11737: IndexOutOfBounds compiling query with duplicated groupby keys (Jimmy, reviewed by Szehon)

Posted by xu...@apache.org.

HIVE-11737: IndexOutOfBounds compiling query with duplicated groupby keys (Jimmy, reviewed by Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7aec2726
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7aec2726
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7aec2726

Branch: refs/heads/beeline-cli
Commit: 7aec2726dc10c036941bc204020e199885550427
Parents: 76fc383
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Fri Sep 4 10:34:58 2015 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Mon Sep 7 09:41:15 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  1 +
 ql/src/test/queries/clientpositive/groupby13.q  | 16 ++++
 .../test/results/clientpositive/groupby13.q.out | 86 ++++++++++++++++++++
 3 files changed, 103 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7aec2726/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 778c7b2..f6052e3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -4622,6 +4622,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
       if (ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
         // Skip duplicated grouping keys
+        grpByExprs.remove(i--);
         continue;
       }
       groupByKeys.add(grpByExprNode);

http://git-wip-us.apache.org/repos/asf/hive/blob/7aec2726/ql/src/test/queries/clientpositive/groupby13.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby13.q b/ql/src/test/queries/clientpositive/groupby13.q
new file mode 100644
index 0000000..53feaed
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby13.q
@@ -0,0 +1,16 @@
+CREATE TABLE grpby_test (int_col_5 INT,
+  int_col_7 INT);
+
+SET hive.mapred.mode=strict;
+
+EXPLAIN
+SELECT
+int_col_7,
+MAX(LEAST(COALESCE(int_col_5, -279),
+  COALESCE(int_col_7, 476))) AS int_col
+FROM grpby_test
+GROUP BY
+int_col_7,
+int_col_7,
+LEAST(COALESCE(int_col_5, -279),
+  COALESCE(int_col_7, 476));

http://git-wip-us.apache.org/repos/asf/hive/blob/7aec2726/ql/src/test/results/clientpositive/groupby13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby13.q.out b/ql/src/test/results/clientpositive/groupby13.q.out
new file mode 100644
index 0000000..e698d1a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/groupby13.q.out
@@ -0,0 +1,86 @@
+PREHOOK: query: CREATE TABLE grpby_test (int_col_5 INT,
+  int_col_7 INT)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@grpby_test
+POSTHOOK: query: CREATE TABLE grpby_test (int_col_5 INT,
+  int_col_7 INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@grpby_test
+PREHOOK: query: EXPLAIN
+SELECT
+int_col_7,
+MAX(LEAST(COALESCE(int_col_5, -279),
+  COALESCE(int_col_7, 476))) AS int_col
+FROM grpby_test
+GROUP BY
+int_col_7,
+int_col_7,
+LEAST(COALESCE(int_col_5, -279),
+  COALESCE(int_col_7, 476))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+int_col_7,
+MAX(LEAST(COALESCE(int_col_5, -279),
+  COALESCE(int_col_7, 476))) AS int_col
+FROM grpby_test
+GROUP BY
+int_col_7,
+int_col_7,
+LEAST(COALESCE(int_col_5, -279),
+  COALESCE(int_col_7, 476))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: grpby_test
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Select Operator
+              expressions: int_col_7 (type: int), int_col_5 (type: int)
+              outputColumnNames: int_col_7, int_col_5
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Group By Operator
+                aggregations: max(least(COALESCE(int_col_5,(- 279)),COALESCE(int_col_7,476)))
+                keys: int_col_7 (type: int), least(COALESCE(int_col_5,(- 279)),COALESCE(int_col_7,476)) (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  value expressions: _col2 (type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: max(VALUE._col0)
+          keys: KEY._col0 (type: int), KEY._col0 (type: int), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: int), _col3 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

[15/50] [abbrv] hive git commit: HIVE-11357 ACID enable predicate pushdown for insert-only delta file 2 (Eugene Koifman, reviewed by Alan Gates)

Posted by xu...@apache.org.

HIVE-11357 ACID enable predicate pushdown for insert-only delta file 2 (Eugene Koifman, reviewed by Alan Gates)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ed4517cf
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ed4517cf
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ed4517cf

Branch: refs/heads/beeline-cli
Commit: ed4517cfb14b90d03f3cf33d653827bec90bcb98
Parents: 8e712da
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Fri Aug 28 12:19:32 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Fri Aug 28 12:19:32 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   | 18 ++++-
 .../apache/hadoop/hive/ql/TestTxnCommands2.java | 85 ++++++++++++++++----
 2 files changed, 88 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ed4517cf/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index fd6d2ad..8c138b9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -127,7 +127,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
 
   /**
    * When picking the hosts for a split that crosses block boundaries,
-   * any drop any host that has fewer than MIN_INCLUDED_LOCATION of the
+   * drop any host that has fewer than MIN_INCLUDED_LOCATION of the
    * number of bytes available on the host with the most.
    * If host1 has 10MB of the split, host2 has 20MB, and host3 has 18MB the
    * split will contain host2 (100% of host2) and host3 (90% of host2). Host1
@@ -1283,6 +1283,22 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     } else {
       bucket = (int) split.getStart();
       reader = null;
+      if(deltas != null && deltas.length > 0) {
+        Path bucketPath = AcidUtils.createBucketFile(deltas[0], bucket);
+        OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf);
+        FileSystem fs = readerOptions.getFilesystem();
+        if(fs == null) {
+          fs = path.getFileSystem(options.getConfiguration());
+        }
+        if(fs.exists(bucketPath)) {
+        /* w/o schema evolution (which ACID doesn't support yet) all delta
+        files have the same schema, so choosing the 1st one*/
+          final List<OrcProto.Type> types =
+            OrcFile.createReader(bucketPath, readerOptions).getTypes();
+          readOptions.include(genIncludedColumns(types, conf, split.isOriginal()));
+          setSearchArgument(readOptions, types, conf, split.isOriginal());
+        }
+      }
     }
     String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY,
                                 Long.MAX_VALUE + ":");

http://git-wip-us.apache.org/repos/asf/hive/blob/ed4517cf/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index 58c2fca..5aa2500 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -22,6 +22,7 @@ import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.txn.compactor.Worker;
@@ -51,7 +52,7 @@ public class TestTxnCommands2 {
   ).getPath().replaceAll("\\\\", "/");
   private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse";
   //bucket count for test tables; set it to 1 for easier debugging
-  private static int BUCKET_COUNT = 1;
+  private static int BUCKET_COUNT = 2;
   @Rule
   public TestName testName = new TestName();
   private HiveConf hiveConf;
@@ -107,7 +108,6 @@ public class TestTxnCommands2 {
   public void tearDown() throws Exception {
     try {
       if (d != null) {
-     //   runStatementOnDriver("set autocommit true");
         dropTables();
         d.destroy();
         d.close();
@@ -126,13 +126,51 @@ public class TestTxnCommands2 {
   public void testOrcNoPPD() throws Exception {
     testOrcPPD(false);
   }
-  private void testOrcPPD(boolean enablePPD) throws Exception  {
+
+  /**
+   * this is run 2 times: 1 with PPD on, 1 with off
+   * Also, the queries are such that if we were to push predicate down to an update/delete delta,
+   * the test would produce wrong results
+   * @param enablePPD
+   * @throws Exception
+   */
+  private void testOrcPPD(boolean enablePPD) throws Exception {
     boolean originalPpd = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER);
     hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER, enablePPD);//enables ORC PPD
-    int[][] tableData = {{1,2},{3,4}};
-    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData));
-    List<String> rs2 = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " where a > 1 order by a,b");
-    runStatementOnDriver("alter table "+ Table.ACIDTBL + " compact 'MAJOR'");
+    //create delta_0001_0001_0000 (should push predicate here)
+    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(new int[][]{{1, 2}, {3, 4}}));
+    List<String> explain;
+    String query = "update " + Table.ACIDTBL + " set b = 5 where a = 3";
+    if (enablePPD) {
+      explain = runStatementOnDriver("explain " + query);
+      /*
+      here is a portion of the above "explain".  The "filterExpr:" in the TableScan is the pushed predicate
+      w/o PPD, the line is simply not there, otherwise the plan is the same
+       Map Operator Tree:,
+         TableScan,
+          alias: acidtbl,
+          filterExpr: (a = 3) (type: boolean),
+            Filter Operator,
+             predicate: (a = 3) (type: boolean),
+             Select Operator,
+             ...
+       */
+      assertPredicateIsPushed("filterExpr: (a = 3)", explain);
+    }
+    //create delta_0002_0002_0000 (can't push predicate)
+    runStatementOnDriver(query);
+    query = "select a,b from " + Table.ACIDTBL + " where b = 4 order by a,b";
+    if (enablePPD) {
+      /*at this point we have 2 delta files, 1 for insert 1 for update
+      * we should push predicate into 1st one but not 2nd.  If the following 'select' were to
+      * push into the 'update' delta, we'd filter out {3,5} before doing merge and thus
+     * produce {3,4} as the value for 2nd row.  The right result is 0-rows.*/
+      explain = runStatementOnDriver("explain " + query);
+      assertPredicateIsPushed("filterExpr: (b = 4)", explain);
+    }
+    List<String> rs0 = runStatementOnDriver(query);
+    Assert.assertEquals("Read failed", 0, rs0.size());
+    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
     Worker t = new Worker();
     t.setThreadId((int) t.getId());
     t.setHiveConf(hiveConf);
@@ -142,18 +180,37 @@ public class TestTxnCommands2 {
     t.init(stop, looped);
     t.run();
     //now we have base_0001 file
-    int[][] tableData2 = {{1,7},{5,6},{7,8},{9,10}};
+    int[][] tableData2 = {{1, 7}, {5, 6}, {7, 8}, {9, 10}};
     runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData2));
-    //now we have delta_0002_0002_0000 with inserts only (ok to push predicate)
+    //now we have delta_0003_0003_0000 with inserts only (ok to push predicate)
+    if (enablePPD) {
+      explain = runStatementOnDriver("explain delete from " + Table.ACIDTBL + " where a=7 and b=8");
+      assertPredicateIsPushed("filterExpr: ((a = 7) and (b = 8))", explain);
+    }
     runStatementOnDriver("delete from " + Table.ACIDTBL + " where a=7 and b=8");
-    //now we have delta_0003_0003_0000 with delete events (can't push predicate)
-    runStatementOnDriver("update " + Table.ACIDTBL + " set b = 11 where a = 9");
-    //and another delta with update op
-    List<String> rs1 = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " where a > 1 order by a,b");
-    int [][] resultData = {{3,4},{5,6},{9,11}};
+    //now we have delta_0004_0004_0000 with delete events
+
+    /*(can't push predicate to 'delete' delta)
+    * if we were to push to 'delete' delta, we'd filter out all rows since the 'row' is always NULL for
+    * delete events and we'd produce data as if the delete never happened*/
+    query = "select a,b from " + Table.ACIDTBL + " where a > 1 order by a,b";
+    if(enablePPD) {
+      explain = runStatementOnDriver("explain " + query);
+      assertPredicateIsPushed("filterExpr: (a > 1)", explain);
+    }
+    List<String> rs1 = runStatementOnDriver(query);
+    int [][] resultData = new int[][] {{3, 5}, {5, 6}, {9, 10}};
     Assert.assertEquals("Update failed", stringifyValues(resultData), rs1);
     hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER, originalPpd);
   }
+  private static void assertPredicateIsPushed(String ppd, List<String> queryPlan) {
+    for(String line : queryPlan) {
+      if(line != null && line.contains(ppd)) {
+        return;
+      }
+    }
+    Assert.assertFalse("PPD '" + ppd + "' wasn't pushed", true);
+  }
   @Ignore("alter table")
   @Test
   public void testAlterTable() throws Exception {

[35/50] [abbrv] hive git commit: HIVE-11700: exception in logs in Tez test with new logger (Prasanth Jayachandran reviewed by Sergey Shelukhin)

Posted by xu...@apache.org.

HIVE-11700: exception in logs in Tez test with new logger (Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1fc9320f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1fc9320f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1fc9320f

Branch: refs/heads/beeline-cli
Commit: 1fc9320f07b066e4850a04958a2c73643b5ad5b1
Parents: 5a1957f
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Sep 2 15:56:15 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Sep 2 15:56:15 2015 -0500

----------------------------------------------------------------------
 data/conf/hive-log4j2.xml | 3 ---
 1 file changed, 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1fc9320f/data/conf/hive-log4j2.xml
----------------------------------------------------------------------
diff --git a/data/conf/hive-log4j2.xml b/data/conf/hive-log4j2.xml
index 11c8e79..452f01f 100644
--- a/data/conf/hive-log4j2.xml
+++ b/data/conf/hive-log4j2.xml
@@ -94,9 +94,6 @@
     <Logger name="org.apache.zookeeper.ClientCnxnSocketNIO" level="WARN">
       <AppenderRef ref="${sys:hive.root.logger}"/>
     </Logger>
-    <Logger name="org.apache.hadoop.hive.ql.log.PerfLogger" level="${sys:hive.ql.log.PerfLogger.level}">
-      <AppenderRef ref="${sys:hive.ql.log.PerfLogger.logger}"/>
-    </Logger>
     <Logger name="org.apache.hadoop.hive.ql.exec.Operator" level="INFO">
       <AppenderRef ref="${sys:hive.root.logger}"/>
     </Logger>

[17/50] [abbrv] hive git commit: HIVE-11595 : refactor ORC footer reading to make it usable from outside (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

Posted by xu...@apache.org.

HIVE-11595 : refactor ORC footer reading to make it usable from outside (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/22fa9216
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/22fa9216
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/22fa9216

Branch: refs/heads/beeline-cli
Commit: 22fa9216d4e32d7681d3c1be8cbedc8c7999e56d
Parents: 97bf32a
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Aug 28 18:23:05 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Aug 28 18:23:05 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/io/orc/Reader.java    |   6 +
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       | 281 +++++++++++++------
 2 files changed, 204 insertions(+), 83 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/22fa9216/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
index 7bddefc..187924d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.List;
 
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 
@@ -358,4 +359,9 @@ public interface Reader {
                     String[] neededColumns) throws IOException;
 
   MetadataReader metadata() throws IOException;
+
+  /** Gets serialized file metadata read from disk for the purposes of caching, etc. */
+  ByteBuffer getSerializedFileFooter();
+
+  Footer getFooter();
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/22fa9216/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index c990d85..ab539c4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -35,6 +35,8 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.DiskRange;
 import org.apache.hadoop.hive.ql.io.FileFormatException;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem;
 import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
@@ -74,6 +76,9 @@ public class ReaderImpl implements Reader {
   // will help avoid cpu cycles spend in deserializing at cost of increased
   // memory footprint.
   private final ByteBuffer footerByteBuffer;
+  // Same for metastore cache - maintains the same background buffer, but includes postscript.
+  // This will only be set if the file footer/metadata was read from disk.
+  private final ByteBuffer footerMetaAndPsBuffer;
 
   static class StripeInformationImpl
       implements StripeInformation {
@@ -166,11 +171,7 @@ public class ReaderImpl implements Reader {
 
   @Override
   public List<StripeInformation> getStripes() {
-    List<StripeInformation> result = new ArrayList<StripeInformation>();
-    for(OrcProto.StripeInformation info: footer.getStripesList()) {
-      result.add(new StripeInformationImpl(info));
-    }
-    return result;
+    return convertProtoStripesToStripes(footer.getStripesList());
   }
 
   @Override
@@ -274,7 +275,7 @@ public class ReaderImpl implements Reader {
    * Check to see if this ORC file is from a future version and if so,
    * warn the user that we may not be able to read all of the column encodings.
    * @param log the logger to write any error message to
-   * @param path the filename for error messages
+   * @param path the data source path for error messages
    * @param version the version of hive that wrote the file.
    */
   static void checkOrcVersion(Log log, Path path, List<Integer> version) {
@@ -287,8 +288,7 @@ public class ReaderImpl implements Reader {
       if (major > OrcFile.Version.CURRENT.getMajor() ||
           (major == OrcFile.Version.CURRENT.getMajor() &&
            minor > OrcFile.Version.CURRENT.getMinor())) {
-        log.warn("ORC file " + path +
-                 " was written by a future Hive version " +
+        log.warn(path + " was written by a future Hive version " +
                  versionString(version) +
                  ". This file may not be readable by this version of Hive.");
       }
@@ -313,9 +313,11 @@ public class ReaderImpl implements Reader {
     FileMetaInfo footerMetaData;
     if (options.getFileMetaInfo() != null) {
       footerMetaData = options.getFileMetaInfo();
+      this.footerMetaAndPsBuffer = null;
     } else {
       footerMetaData = extractMetaInfoFromFooter(fs, path,
           options.getMaxLength());
+      this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
     }
     MetaInfoObjExtractor rInfo =
         new MetaInfoObjExtractor(footerMetaData.compressionType,
@@ -349,6 +351,111 @@ public class ReaderImpl implements Reader {
     return OrcFile.WriterVersion.ORIGINAL;
   }
 
+  /** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */
+  public static FooterInfo extractMetaInfoFromFooter(
+      ByteBuffer bb, Path srcPath) throws IOException {
+    // Read the PostScript. Be very careful as some parts of this historically use bb position
+    // and some use absolute offsets that have to take position into account.
+    int baseOffset = bb.position();
+    int lastByteAbsPos = baseOffset + bb.remaining() - 1;
+    int psLen = bb.get(lastByteAbsPos) & 0xff;
+    int psAbsPos = lastByteAbsPos - psLen;
+    OrcProto.PostScript ps = extractPostScript(bb, srcPath, psLen, psAbsPos);
+    assert baseOffset == bb.position();
+
+    // Extract PS information.
+    int footerSize = (int)ps.getFooterLength(), metadataSize = (int)ps.getMetadataLength(),
+        footerAbsPos = psAbsPos - footerSize, metadataAbsPos = footerAbsPos - metadataSize;
+    String compressionType = ps.getCompression().toString();
+    CompressionCodec codec = WriterImpl.createCodec(CompressionKind.valueOf(compressionType));
+    int bufferSize = (int)ps.getCompressionBlockSize();
+    bb.position(metadataAbsPos);
+    bb.mark();
+
+    // Extract metadata and footer.
+    Metadata metadata = new Metadata(extractMetadata(
+        bb, metadataAbsPos, metadataSize, codec, bufferSize));
+    OrcProto.Footer footer = extractFooter(bb, footerAbsPos, footerSize, codec, bufferSize);
+    bb.position(metadataAbsPos);
+    bb.limit(psAbsPos);
+    // TODO: do we need footer buffer here? FileInfo/FileMetaInfo is a mess...
+    FileMetaInfo fmi = new FileMetaInfo(
+        compressionType, bufferSize, metadataSize, bb, extractWriterVersion(ps));
+    return new FooterInfo(metadata, footer, fmi);
+  }
+
+  private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos,
+      int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
+    bb.position(footerAbsPos);
+    bb.limit(footerAbsPos + footerSize);
+    InputStream instream = InStream.create("footer", Lists.<DiskRange>newArrayList(
+          new BufferChunk(bb, 0)), footerSize, codec, bufferSize);
+    return OrcProto.Footer.parseFrom(instream);
+  }
+
+  private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
+      int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
+    bb.position(metadataAbsPos);
+    bb.limit(metadataAbsPos + metadataSize);
+    InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
+        new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
+    CodedInputStream in = CodedInputStream.newInstance(instream);
+    int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
+    OrcProto.Metadata meta = null;
+    do {
+      try {
+        in.setSizeLimit(msgLimit);
+        meta = OrcProto.Metadata.parseFrom(in);
+      } catch (InvalidProtocolBufferException e) {
+        if (e.getMessage().contains("Protocol message was too large")) {
+          LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
+              " size of the coded input stream." );
+
+          msgLimit = msgLimit << 1;
+          if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
+            LOG.error("Metadata section exceeds max protobuf message size of " +
+                PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
+            throw e;
+          }
+
+          // we must have failed in the middle of reading instream and instream doesn't support
+          // resetting the stream
+          instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
+              new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
+          in = CodedInputStream.newInstance(instream);
+        } else {
+          throw e;
+        }
+      }
+    } while (meta == null);
+    return meta;
+  }
+
+  private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,
+      int psLen, int psAbsOffset) throws IOException {
+    // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here.
+    assert bb.hasArray();
+    CodedInputStream in = CodedInputStream.newInstance(
+        bb.array(), bb.arrayOffset() + psAbsOffset, psLen);
+    OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
+    checkOrcVersion(LOG, path, ps.getVersionList());
+
+    // Check compression codec.
+    switch (ps.getCompression()) {
+      case NONE:
+        break;
+      case ZLIB:
+        break;
+      case SNAPPY:
+        break;
+      case LZO:
+        break;
+      default:
+        throw new IllegalArgumentException("Unknown compression");
+    }
+    return ps;
+  }
+
   private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
                                                         Path path,
                                                         long maxFileLength
@@ -367,44 +474,24 @@ public class ReaderImpl implements Reader {
     int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
     file.seek(size - readSize);
     ByteBuffer buffer = ByteBuffer.allocate(readSize);
-    file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(),
-      buffer.remaining());
+    assert buffer.position() == 0;
+    file.readFully(buffer.array(), buffer.arrayOffset(), readSize);
+    buffer.position(0);
 
     //read the PostScript
     //get length of PostScript
     int psLen = buffer.get(readSize - 1) & 0xff;
     ensureOrcFooter(file, path, psLen, buffer);
     int psOffset = readSize - 1 - psLen;
-    CodedInputStream in = CodedInputStream.newInstance(buffer.array(),
-      buffer.arrayOffset() + psOffset, psLen);
-    OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
-
-    checkOrcVersion(LOG, path, ps.getVersionList());
+    OrcProto.PostScript ps = extractPostScript(buffer, path, psLen, psOffset);
 
     int footerSize = (int) ps.getFooterLength();
     int metadataSize = (int) ps.getMetadataLength();
-    OrcFile.WriterVersion writerVersion;
-    if (ps.hasWriterVersion()) {
-      writerVersion =  getWriterVersion(ps.getWriterVersion());
-    } else {
-      writerVersion = OrcFile.WriterVersion.ORIGINAL;
-    }
+    OrcFile.WriterVersion writerVersion = extractWriterVersion(ps);
 
-    //check compression codec
-    switch (ps.getCompression()) {
-      case NONE:
-        break;
-      case ZLIB:
-        break;
-      case SNAPPY:
-        break;
-      case LZO:
-        break;
-      default:
-        throw new IllegalArgumentException("Unknown compression");
-    }
 
     //check if extra bytes need to be read
+    ByteBuffer fullFooterBuffer = null;
     int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
     if (extra > 0) {
       //more bytes need to be read, seek back to the right place and read extra bytes
@@ -417,10 +504,12 @@ public class ReaderImpl implements Reader {
       extraBuf.put(buffer);
       buffer = extraBuf;
       buffer.position(0);
+      fullFooterBuffer = buffer.slice();
       buffer.limit(footerSize + metadataSize);
     } else {
       //footer is already in the bytes in buffer, just adjust position, length
       buffer.position(psOffset - footerSize - metadataSize);
+      fullFooterBuffer = buffer.slice();
       buffer.limit(psOffset);
     }
 
@@ -435,11 +524,24 @@ public class ReaderImpl implements Reader {
         (int) ps.getMetadataLength(),
         buffer,
         ps.getVersionList(),
-        writerVersion
+        writerVersion,
+        fullFooterBuffer
         );
   }
 
+  private static OrcFile.WriterVersion extractWriterVersion(OrcProto.PostScript ps) {
+    return (ps.hasWriterVersion()
+        ? getWriterVersion(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
+  }
 
+  private static List<StripeInformation> convertProtoStripesToStripes(
+      List<OrcProto.StripeInformation> stripes) {
+    List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
+    for (OrcProto.StripeInformation info : stripes) {
+      result.add(new StripeInformationImpl(info));
+    }
+    return result;
+  }
 
   /**
    * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl
@@ -467,46 +569,10 @@ public class ReaderImpl implements Reader {
 
       int position = footerBuffer.position();
       int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
-      footerBuffer.limit(position + metadataSize);
-
-      InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
-          new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
-      CodedInputStream in = CodedInputStream.newInstance(instream);
-      int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
-      OrcProto.Metadata meta = null;
-      do {
-        try {
-          in.setSizeLimit(msgLimit);
-          meta = OrcProto.Metadata.parseFrom(in);
-        } catch (InvalidProtocolBufferException e) {
-          if (e.getMessage().contains("Protocol message was too large")) {
-            LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
-                " size of the coded input stream." );
-
-            msgLimit = msgLimit << 1;
-            if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
-              LOG.error("Metadata section exceeds max protobuf message size of " +
-                  PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
-              throw e;
-            }
-
-            // we must have failed in the middle of reading instream and instream doesn't support
-            // resetting the stream
-            instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
-                new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
-            in = CodedInputStream.newInstance(instream);
-          } else {
-            throw e;
-          }
-        }
-      } while (meta == null);
-      this.metadata = meta;
 
-      footerBuffer.position(position + metadataSize);
-      footerBuffer.limit(position + metadataSize + footerBufferSize);
-      instream = InStream.create("footer", Lists.<DiskRange>newArrayList(
-          new BufferChunk(footerBuffer, 0)), footerBufferSize, codec, bufferSize);
-      this.footer = OrcProto.Footer.parseFrom(instream);
+      this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
+      this.footer = extractFooter(
+          footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);
 
       footerBuffer.position(position);
       this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList());
@@ -518,7 +584,8 @@ public class ReaderImpl implements Reader {
    * that is useful for Reader implementation
    *
    */
-  static class FileMetaInfo{
+  static class FileMetaInfo {
+    private ByteBuffer footerMetaAndPsBuffer;
     final String compressionType;
     final int bufferSize;
     final int metadataSize;
@@ -526,30 +593,68 @@ public class ReaderImpl implements Reader {
     final List<Integer> versionList;
     final OrcFile.WriterVersion writerVersion;
 
+    /** Ctor used when reading splits - no version list or full footer buffer. */
     FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
         ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
       this(compressionType, bufferSize, metadataSize, footerBuffer, null,
-          writerVersion);
+          writerVersion, null);
     }
 
-    FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
-                 ByteBuffer footerBuffer, List<Integer> versionList,
-                 OrcFile.WriterVersion writerVersion){
+    /** Ctor used when creating file info during init and when getting a new one. */
+    public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
+        ByteBuffer footerBuffer, List<Integer> versionList, WriterVersion writerVersion,
+        ByteBuffer fullFooterBuffer) {
       this.compressionType = compressionType;
       this.bufferSize = bufferSize;
       this.metadataSize = metadataSize;
       this.footerBuffer = footerBuffer;
       this.versionList = versionList;
       this.writerVersion = writerVersion;
+      this.footerMetaAndPsBuffer = fullFooterBuffer;
     }
   }
 
-  public FileMetaInfo getFileMetaInfo(){
+  public FileMetaInfo getFileMetaInfo() {
     return new FileMetaInfo(compressionKind.toString(), bufferSize,
-        metadataSize, footerByteBuffer, versionList, writerVersion);
+        metadataSize, footerByteBuffer, versionList, writerVersion, footerMetaAndPsBuffer);
   }
 
+  /** Same as FileMetaInfo, but with extra fields. FileMetaInfo is serialized for splits
+   * and so we don't just add fields to it, it's already messy and confusing. */
+  public static final class FooterInfo {
+    private final OrcProto.Footer footer;
+    private final Metadata metadata;
+    private final List<StripeInformation> stripes;
+    private final FileMetaInfo fileMetaInfo;
 
+    private FooterInfo(Metadata metadata, OrcProto.Footer footer, FileMetaInfo fileMetaInfo) {
+      this.metadata = metadata;
+      this.footer = footer;
+      this.fileMetaInfo = fileMetaInfo;
+      this.stripes = convertProtoStripesToStripes(footer.getStripesList());
+    }
+
+    public OrcProto.Footer getFooter() {
+      return footer;
+    }
+
+    public Metadata getMetadata() {
+      return metadata;
+    }
+
+    public FileMetaInfo getFileMetaInfo() {
+      return fileMetaInfo;
+    }
+
+    public List<StripeInformation> getStripes() {
+      return stripes;
+    }
+  }
+
+  @Override
+  public ByteBuffer getSerializedFileFooter() {
+    return footerMetaAndPsBuffer;
+  }
 
   @Override
   public RecordReader rows() throws IOException {
@@ -609,14 +714,19 @@ public class ReaderImpl implements Reader {
 
   @Override
   public long getRawDataSizeFromColIndices(List<Integer> colIndices) {
+    return getRawDataSizeFromColIndices(colIndices, footer);
+  }
+
+  public static long getRawDataSizeFromColIndices(
+      List<Integer> colIndices, OrcProto.Footer footer) {
     long result = 0;
     for (int colIdx : colIndices) {
-      result += getRawDataSizeOfColumn(colIdx);
+      result += getRawDataSizeOfColumn(colIdx, footer);
     }
     return result;
   }
 
-  private long getRawDataSizeOfColumn(int colIdx) {
+  private static long getRawDataSizeOfColumn(int colIdx, OrcProto.Footer footer) {
     OrcProto.ColumnStatistics colStat = footer.getStatistics(colIdx);
     long numVals = colStat.getNumberOfValues();
     Type type = footer.getTypes(colIdx);
@@ -738,4 +848,9 @@ public class ReaderImpl implements Reader {
   public MetadataReader metadata() throws IOException {
     return new MetadataReader(fileSystem, path, codec, bufferSize, footer.getTypesCount());
   }
+
+  @Override
+  public Footer getFooter() {
+    return footer;
+  }
 }

[11/50] [abbrv] hive git commit: HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)

Posted by xu...@apache.org.

HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b6d1143a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b6d1143a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b6d1143a

Branch: refs/heads/beeline-cli
Commit: b6d1143aa7aaa20de035898f34df2d6b581895b6
Parents: d147a79
Author: Gopal V <go...@apache.org>
Authored: Fri Aug 28 01:22:45 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Fri Aug 28 01:22:45 2015 -0700

----------------------------------------------------------------------
 .../optimizer/DynamicPartitionPruningOptimization.java  | 12 ++++++++++++
 1 file changed, 12 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b6d1143a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index f475926..5ebd28a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -189,6 +189,18 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
       LOG.debug("TableScan: " + ts);
     }
 
+    if (ts == null) {
+      // could be a reduce sink
+      LOG.warn("Could not find the table scan for " + filter); 
+      return null;
+    } else {
+      Table table = ts.getConf().getTableMetadata();
+      if (table != null && !table.isPartitioned()) {
+        // table is not partitioned, skip optimizer
+        return null;
+      }
+    }
+
     // collect the dynamic pruning conditions
     removerContext.dynLists.clear();
     walkExprTree(desc.getPredicate(), removerContext);

[48/50] [abbrv] hive git commit: HIVE-11617: Explain plan for multiple lateral views is very slow (Aihua Xu, reviewed by Jesus Camacho Rodriguez)

Posted by xu...@apache.org.

HIVE-11617: Explain plan for multiple lateral views is very slow (Aihua Xu, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7281a460
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7281a460
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7281a460

Branch: refs/heads/beeline-cli
Commit: 7281a46062bd4a6dea0c4ef80930246fad16bdea
Parents: 2d3316b
Author: Aihua Xu <ai...@gmail.com>
Authored: Tue Sep 8 11:37:01 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Tue Sep 8 11:37:01 2015 +0100

----------------------------------------------------------------------
 .../hadoop/hive/ql/lib/DefaultGraphWalker.java  |   4 +-
 .../hadoop/hive/ql/lib/ForwardWalker.java       |   2 +-
 .../hadoop/hive/ql/lib/LevelOrderWalker.java    | 153 +++++++++++++++++++
 .../hadoop/hive/ql/lib/PreOrderWalker.java      |   2 +-
 .../hadoop/hive/ql/optimizer/ColumnPruner.java  |   2 +-
 .../hive/ql/optimizer/ConstantPropagate.java    |   2 +-
 .../hive/ql/optimizer/lineage/Generator.java    |   4 +-
 .../annotation/AnnotateWithOpTraits.java        |   6 +-
 .../annotation/AnnotateWithStatistics.java      |   6 +-
 .../ql/optimizer/unionproc/UnionProcessor.java  |  10 +-
 .../hadoop/hive/ql/parse/GenMapRedWalker.java   |   2 +-
 .../hadoop/hive/ql/parse/GenTezWorkWalker.java  |   2 +-
 .../apache/hadoop/hive/ql/parse/TezWalker.java  |   2 +-
 .../hive/ql/parse/spark/GenSparkWorkWalker.java |   2 +-
 .../ql/ppd/PredicateTransitivePropagate.java    |   4 +-
 .../clientpositive/correlationoptimizer5.q.out  |   6 +-
 ql/src/test/results/clientpositive/join32.q.out |   2 +-
 .../clientpositive/join32_lessSize.q.out        |   6 +-
 ql/src/test/results/clientpositive/join33.q.out |   2 +-
 .../test/results/clientpositive/lineage2.q.out  |  10 +-
 .../test/results/clientpositive/lineage3.q.out  |  18 +--
 .../results/clientpositive/spark/join32.q.out   |   2 +-
 .../clientpositive/spark/join32_lessSize.q.out  |   6 +-
 .../results/clientpositive/spark/join33.q.out   |   2 +-
 .../spark/subquery_multiinsert.q.java1.7.out    |  16 +-
 .../subquery_multiinsert.q.java1.7.out          |  16 +-
 .../clientpositive/tez/explainuser_2.q.out      |  12 +-
 27 files changed, 228 insertions(+), 73 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
index 07d2734..d452f50 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
@@ -72,7 +72,7 @@ public class DefaultGraphWalker implements GraphWalker {
   /**
    * @return the doneList
    */
-  public Set<Node> getDispatchedList() {
+  protected Set<Node> getDispatchedList() {
     return retMap.keySet();
   }
 
@@ -143,7 +143,7 @@ public class DefaultGraphWalker implements GraphWalker {
    *          current operator in the graph
    * @throws SemanticException
    */
-  public void walk(Node nd) throws SemanticException {    
+  protected void walk(Node nd) throws SemanticException {
     // Push the node in the stack
     opStack.push(nd);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
index 67b4700..a10dc52 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
@@ -63,7 +63,7 @@ public class ForwardWalker extends DefaultGraphWalker {
    * @throws SemanticException
    */
   @Override
-  public void walk(Node nd) throws SemanticException {
+  protected void walk(Node nd) throws SemanticException {
     if (opStack.empty() || nd != opStack.peek()) {
       opStack.push(nd);
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/lib/LevelOrderWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/LevelOrderWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/LevelOrderWalker.java
new file mode 100644
index 0000000..cf05d5f
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/LevelOrderWalker.java
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.lib;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * This is a level-wise walker implementation which dispatches the node in the order
+ * that the node will only get dispatched after all the parents are dispatched.
+ *
+ * Each node will be accessed once while it could be dispatched multiple times.
+ * e.g., for a lineage generator with operator tree, 2 levels of current node's
+ * ancestors need to keep in the operator stack.
+ *                  FIL(2) FIL(4)
+ *                      |    |
+ *                    RS(3) RS(5)
+ *                       \  /
+ *                      JOIN(7)
+ * The join lineage needs to be called twice for JOIN(7) node with different operator
+ * ancestors.
+ */
+public class LevelOrderWalker extends DefaultGraphWalker {
+  // Only specified nodes of these types will be walked.
+  // Empty set means all the nodes will be walked.
+  private HashSet<Class<? extends Node>> nodeTypes = new HashSet<Class<? extends Node>>();
+
+  // How many levels of ancestors to keep in the stack during dispatching
+  private final int numLevels;
+
+  /**
+   * Constructor with keeping all the ancestors in the operator stack during dispatching.
+   * @param disp Dispatcher to call for each op encountered
+   */
+  public LevelOrderWalker(Dispatcher disp) {
+    super(disp);
+    this.numLevels = Integer.MAX_VALUE;
+  }
+
+  /**
+   * Constructor with specified number of ancestor levels to keep in the operator
+   * stack during dispatching.
+   * @param disp      Dispatcher to call for each op encountered
+   * @param numLevels Number of ancestor levels
+   */
+  public LevelOrderWalker(Dispatcher disp, int numLevels) {
+    super(disp);
+    this.numLevels = numLevels;
+  }
+
+  @SuppressWarnings("unchecked")
+  public void setNodeTypes(Class<? extends Node> ...nodeTypes) {
+    this.nodeTypes.addAll(Arrays.asList(nodeTypes));
+  }
+
+  /**
+   * starting point for walking.
+   *
+   * @throws SemanticException
+   */
+  @SuppressWarnings("unchecked")
+  @Override
+  public void startWalking(Collection<Node> startNodes,
+      HashMap<Node, Object> nodeOutput) throws SemanticException {
+    toWalk.addAll(startNodes);
+
+    // Starting from the startNodes, add the children whose parents have been
+    // included in the list.
+    HashSet<Node> addedNodes = new HashSet<Node>();
+    for (Node node : startNodes) {
+      addedNodes.add(node);
+    }
+    int index = 0;
+    while(index < toWalk.size()) {
+      if (toWalk.get(index).getChildren() != null) {
+        for(Node child : toWalk.get(index).getChildren()) {
+          Operator<? extends OperatorDesc> childOP =
+              (Operator<? extends OperatorDesc>) child;
+
+          if (!addedNodes.contains(child) &&
+              (childOP.getParentOperators() == null ||
+              addedNodes.containsAll(childOP.getParentOperators()))) {
+            toWalk.add(child);
+            addedNodes.add(child);
+          }
+        }
+      }
+      ++index;
+    }
+
+    for(Node nd : toWalk) {
+      if (!nodeTypes.isEmpty() && !nodeTypes.contains(nd.getClass())) {
+        continue;
+      }
+
+      opStack.clear();
+      opStack.push(nd);
+      walk(nd, 0, opStack);
+      if (nodeOutput != null && getDispatchedList().contains(nd)) {
+        nodeOutput.put(nd, retMap.get(nd));
+      }
+    }
+  }
+
+  /**
+   * Enumerate numLevels of ancestors by putting them in the stack and dispatch
+   * the current node.
+   * @param nd current operator in the ancestor tree
+   * @param level how many level of ancestors included in the stack
+   * @param stack operator stack
+   * @throws SemanticException
+   */
+  @SuppressWarnings("unchecked")
+  private void walk(Node nd, int level, Stack<Node> stack) throws SemanticException {
+    List<Operator<? extends OperatorDesc>> parents =
+        ((Operator<? extends OperatorDesc>)nd).getParentOperators();
+
+    if (level >= numLevels || parents == null || parents.isEmpty()) {
+      dispatch(stack.peek(), stack);
+      return;
+    }
+
+    for(Node parent : parents) {
+      stack.add(0, parent);
+      walk(parent, level+1, stack);
+      stack.remove(0);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java
index f22694b..8d8dab8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java
@@ -51,7 +51,7 @@ public class PreOrderWalker extends DefaultGraphWalker {
    * @throws SemanticException
    */
   @Override
-  public void walk(Node nd) throws SemanticException {
+  protected void walk(Node nd) throws SemanticException {
     opStack.push(nd);
     dispatch(nd, opStack);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
index 735b448..561b8fc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
@@ -150,7 +150,7 @@ public class ColumnPruner implements Transform {
      * Walk the given operator.
      */
     @Override
-    public void walk(Node nd) throws SemanticException {
+    protected void walk(Node nd) throws SemanticException {
       boolean walkChildren = true;
       opStack.push(nd);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
index b6f1f27..aacded6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
@@ -140,7 +140,7 @@ public class ConstantPropagate implements Transform {
     }
 
     @Override
-    public void walk(Node nd) throws SemanticException {
+    protected void walk(Node nd) throws SemanticException {
 
       List<Node> parents = ((Operator) nd).getParentOperators();
       if ((parents == null)

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
index 9a5cf55..82e26d9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
@@ -37,9 +37,9 @@ import org.apache.hadoop.hive.ql.exec.UnionOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.LevelOrderWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
-import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.optimizer.Transform;
@@ -94,7 +94,7 @@ public class Generator implements Transform {
 
     // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, lCtx);
-    GraphWalker ogw = new PreOrderWalker(disp);
+    GraphWalker ogw = new LevelOrderWalker(disp, 2);
 
     // Create a list of topop nodes
     ArrayList<Node> topNodes = new ArrayList<Node>();

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java
index c304e97..0398115 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java
@@ -35,9 +35,9 @@ import org.apache.hadoop.hive.ql.exec.UnionOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.LevelOrderWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
-import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.optimizer.Transform;
@@ -55,7 +55,7 @@ public class AnnotateWithOpTraits implements Transform {
   public ParseContext transform(ParseContext pctx) throws SemanticException {
     AnnotateOpTraitsProcCtx annotateCtx = new AnnotateOpTraitsProcCtx(pctx);
 
-    // create a walker which walks the tree in a DFS manner while maintaining the
+    // create a walker which walks the tree in a BFS manner while maintaining the
     // operator stack. The dispatcher generates the plan from the operator tree
     Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
     opRules.put(new RuleRegExp("TS", TableScanOperator.getOperatorName() + "%"),
@@ -83,7 +83,7 @@ public class AnnotateWithOpTraits implements Transform {
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(OpTraitsRulesProcFactory.getDefaultRule(), opRules,
         annotateCtx);
-    GraphWalker ogw = new PreOrderWalker(disp);
+    GraphWalker ogw = new LevelOrderWalker(disp, 0);
 
     // Create a list of topop nodes
     ArrayList<Node> topNodes = new ArrayList<Node>();

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
index 4aeeff2..c8b3545 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
@@ -33,9 +33,9 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.LevelOrderWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
-import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.optimizer.Transform;
@@ -48,7 +48,7 @@ public class AnnotateWithStatistics implements Transform {
   public ParseContext transform(ParseContext pctx) throws SemanticException {
     AnnotateStatsProcCtx aspCtx = new AnnotateStatsProcCtx(pctx);
 
-    // create a walker which walks the tree in a DFS manner while maintaining the
+    // create a walker which walks the tree in a BFS manner while maintaining the
     // operator stack. The dispatcher generates the plan from the operator tree
     Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
     opRules.put(new RuleRegExp("TS", TableScanOperator.getOperatorName() + "%"),
@@ -70,7 +70,7 @@ public class AnnotateWithStatistics implements Transform {
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(StatsRulesProcFactory.getDefaultRule(), opRules,
         aspCtx);
-    GraphWalker ogw = new PreOrderWalker(disp);
+    GraphWalker ogw = new LevelOrderWalker(disp, 0);
 
     // Create a list of topop nodes
     ArrayList<Node> topNodes = new ArrayList<Node>();

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java
index 9937343..b1286e0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java
@@ -31,9 +31,9 @@ import org.apache.hadoop.hive.ql.exec.UnionOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.LevelOrderWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
-import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.optimizer.Transform;
@@ -66,7 +66,7 @@ public class UnionProcessor implements Transform {
    *          the current parse context
    */
   public ParseContext transform(ParseContext pCtx) throws SemanticException {
-    // create a walker which walks the tree in a DFS manner while maintaining
+    // create a walker which walks the tree in a BFS manner while maintaining
     // the operator stack.
     Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
     opRules.put(new RuleRegExp("R1",
@@ -85,7 +85,8 @@ public class UnionProcessor implements Transform {
     uCtx.setParseContext(pCtx);
     Dispatcher disp = new DefaultRuleDispatcher(UnionProcFactory.getNoUnion(),
         opRules, uCtx);
-    GraphWalker ogw = new PreOrderWalker(disp);
+    LevelOrderWalker ogw = new LevelOrderWalker(disp);
+    ogw.setNodeTypes(UnionOperator.class);
 
     // Create a list of topop nodes
     ArrayList<Node> topNodes = new ArrayList<Node>();
@@ -109,7 +110,8 @@ public class UnionProcessor implements Transform {
         UnionProcFactory.getUnionNoProcessFile());
 
       disp = new DefaultRuleDispatcher(UnionProcFactory.getNoUnion(), opRules, uCtx);
-      ogw = new PreOrderWalker(disp);
+      ogw = new LevelOrderWalker(disp);
+      ogw.setNodeTypes(FileSinkOperator.class);
 
       // Create a list of topop nodes
       topNodes.clear();

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java
index 9583a1b..c1056ae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java
@@ -46,7 +46,7 @@ public class GenMapRedWalker extends DefaultGraphWalker {
    *          operator being walked
    */
   @Override
-  public void walk(Node nd) throws SemanticException {
+  protected void walk(Node nd) throws SemanticException {
     List<? extends Node> children = nd.getChildren();
 
     // maintain the stack of operators encountered

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWorkWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWorkWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWorkWalker.java
index 2d8c8b2..8927579 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWorkWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWorkWalker.java
@@ -82,7 +82,7 @@ public class GenTezWorkWalker extends DefaultGraphWalker {
    * @param nd operator being walked
    */
   @Override
-  public void walk(Node nd) throws SemanticException {
+  protected void walk(Node nd) throws SemanticException {
     List<? extends Node> children = nd.getChildren();
 
     // maintain the stack of operators encountered

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/parse/TezWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezWalker.java
index 2f63c1a..3187497 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezWalker.java
@@ -46,7 +46,7 @@ public class TezWalker extends DefaultGraphWalker {
    *          operator being walked
    */
   @Override
-  public void walk(Node nd) throws SemanticException {
+  protected void walk(Node nd) throws SemanticException {
     List<? extends Node> children = nd.getChildren();
 
     // maintain the stack of operators encountered

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWorkWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWorkWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWorkWalker.java
index e31c025..4450079 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWorkWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWorkWalker.java
@@ -82,7 +82,7 @@ public class GenSparkWorkWalker extends DefaultGraphWalker {
    * @param nd operator being walked
    */
   @Override
-  public void walk(Node nd) throws SemanticException {
+  protected void walk(Node nd) throws SemanticException {
     List<? extends Node> children = nd.getChildren();
 
     // maintain the stack of operators encountered

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java
index ea1f713..fb76d5d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java
@@ -37,10 +37,10 @@ import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.LevelOrderWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.optimizer.Transform;
@@ -74,7 +74,7 @@ public class PredicateTransitivePropagate implements Transform {
     // rule and passes the context along
     TransitiveContext context = new TransitiveContext();
     Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
-    GraphWalker ogw = new PreOrderWalker(disp);
+    GraphWalker ogw = new LevelOrderWalker(disp, 2);
 
     // Create a list of topop nodes
     List<Node> topNodes = new ArrayList<Node>();

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/correlationoptimizer5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out
index 63741fc..7f2e19f 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out
@@ -280,7 +280,7 @@ POSTHOOK: Input: default@t2
 POSTHOOK: Input: default@t3
 POSTHOOK: Input: default@t4
 POSTHOOK: Output: default@dest_co1
-POSTHOOK: Lineage: dest_co1.key EXPRESSION [(t1)x.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest_co1.key SIMPLE [(t1)x.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: dest_co1.val SIMPLE [(t4)n.FieldSchema(name:val, type:string, comment:null), ]
 PREHOOK: query: EXPLAIN
 INSERT OVERWRITE TABLE dest_co2
@@ -461,7 +461,7 @@ POSTHOOK: Input: default@t2
 POSTHOOK: Input: default@t3
 POSTHOOK: Input: default@t4
 POSTHOOK: Output: default@dest_co2
-POSTHOOK: Lineage: dest_co2.key EXPRESSION [(t1)x.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest_co2.key SIMPLE [(t1)x.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: dest_co2.val SIMPLE [(t4)n.FieldSchema(name:val, type:string, comment:null), ]
 PREHOOK: query: -- Enable hive.auto.convert.join.
 EXPLAIN
@@ -754,7 +754,7 @@ POSTHOOK: Input: default@t2
 POSTHOOK: Input: default@t3
 POSTHOOK: Input: default@t4
 POSTHOOK: Output: default@dest_co3
-POSTHOOK: Lineage: dest_co3.key EXPRESSION [(t1)x.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest_co3.key SIMPLE [(t1)x.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: dest_co3.val SIMPLE [(t4)n.FieldSchema(name:val, type:string, comment:null), ]
 PREHOOK: query: -- dest_co1, dest_co2 and dest_co3 should be same
 -- SELECT * FROM dest_co1 x ORDER BY x.key, x.val;

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/join32.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out
index 5795669..afb373d 100644
--- a/ql/src/test/results/clientpositive/join32.q.out
+++ b/ql/src/test/results/clientpositive/join32.q.out
@@ -406,7 +406,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Output: default@dest_j1
 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: select * from dest_j1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest_j1

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/join32_lessSize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join32_lessSize.q.out b/ql/src/test/results/clientpositive/join32_lessSize.q.out
index c027dba..3c5f9e2 100644
--- a/ql/src/test/results/clientpositive/join32_lessSize.q.out
+++ b/ql/src/test/results/clientpositive/join32_lessSize.q.out
@@ -471,7 +471,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Output: default@dest_j1
 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: select * from dest_j1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest_j1
@@ -1107,9 +1107,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Input: default@src1
 POSTHOOK: Output: default@dest_j1
-POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)w.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: select * from dest_j1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest_j1

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/join33.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out
index 5795669..afb373d 100644
--- a/ql/src/test/results/clientpositive/join33.q.out
+++ b/ql/src/test/results/clientpositive/join33.q.out
@@ -406,7 +406,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Output: default@dest_j1
 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: select * from dest_j1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest_j1

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/lineage2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/lineage2.q.out b/ql/src/test/results/clientpositive/lineage2.q.out
index 9b227c6..549b5f6 100644
--- a/ql/src/test/results/clientpositive/lineage2.q.out
+++ b/ql/src/test/results/clientpositive/lineage2.q.out
@@ -523,14 +523,14 @@ PREHOOK: Input: default@src1
 PREHOOK: Input: default@src2
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest3
-{"version":"1.0","engine":"mr","hash":"a2c4e9a3ec678039814f5d84b1e38ce4","queryText":"create table dest3 as\n  select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"(length(src1.key) > 1)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"
 },{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]}
+{"version":"1.0","engine":"mr","hash":"a2c4e9a3ec678039814f5d84b1e38ce4","queryText":"create table dest3 as\n  select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1,2,3],"expression":"(length(src1.key) > 1)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"
 },{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]}
 PREHOOK: query: insert overwrite table dest2
   select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src1
 PREHOOK: Input: default@src2
 PREHOOK: Output: default@dest2
-{"version":"1.0","engine":"mr","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n  select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"(length(src1.key) > 3)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1
 .value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]}
+{"version":"1.0","engine":"mr","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n  select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1,2,3],"expression":"(length(src1.key) > 3)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1
 .value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]}
 PREHOOK: query: drop table if exists dest_l1
 PREHOOK: type: DROPTABLE
 PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE
@@ -593,7 +593,7 @@ PREHOOK: Input: default@dept
 PREHOOK: Input: default@emp
 PREHOOK: Input: default@project
 PREHOOK: Output: default@tgt
-{"version":"1.0","engine":"mr","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n  SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n  FROM (\n    SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n    FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n    ) em\n  JOIN dept d ON d.dept_id = em.dept_id\n  ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"emd.name","edgeType":"PROJECTION"},{"sources":[8],"targets":[2],"expression":"emd.emp_id","edgeType":"PROJECTION"},{"sources":[8],"targets":[3],"expression":"emd.mgr_id","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PR
 EDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(em._col1 = d.dept_id)","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(emd._col4 = p.project_id)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":
 "default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]}
+{"version":"1.0","engine":"mr","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n  SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n  FROM (\n    SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n    FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n    ) em\n  JOIN dept d ON d.dept_id = em.dept_id\n  ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(em._col1 = d.dept_id)","edgeType":"PREDICATE"},{"sources":[1
 1,9],"targets":[0,1,2,3,4,5],"expression":"(emd._col4 = p.project_id)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]}
 PREHOOK: query: drop table if exists dest_l2
 PREHOOK: type: DROPTABLE
 PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile
@@ -646,7 +646,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@dest_l2
 PREHOOK: Input: default@dest_l3
 #### A masked pattern was here ####
-{"version":"1.0","engine":"mr","hash":"01879c619517509d9f5b6ead998bb4bb","queryText":"select sum(a.c1), count(b.c1), b.c2, b.c3\nfrom dest_l2 a join dest_l3 b on (a.id = b.id)\nwhere a.c2 != 10 and b.c3 > 0\ngroup by a.c1, a.c2, a.id, b.c1, b.c2, b.c3\nhaving count(a.c2) > 0\norder by b.c3 limit 5","edges":[{"sources":[4],"targets":[0],"expression":"sum(default.dest_l2.c1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"count(default.dest_l3.c1)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8,7],"targets":[0,1,2,3],"expression":"((a.c2 <> 10) and (b.c3 > 0))","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3],"expression":"(count(default.dest_l2.c2) > 0)","edgeType":"PREDICATE"},{"sources":[9,10],"targets":[0,1,2,3],"expression":"(a.id = b.id)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"_c0"},{"id":1,"vertexType":"COLUMN
 ","vertexId":"_c1"},{"id":2,"vertexType":"COLUMN","vertexId":"b.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"b.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"}]}
+{"version":"1.0","engine":"mr","hash":"01879c619517509d9f5b6ead998bb4bb","queryText":"select sum(a.c1), count(b.c1), b.c2, b.c3\nfrom dest_l2 a join dest_l3 b on (a.id = b.id)\nwhere a.c2 != 10 and b.c3 > 0\ngroup by a.c1, a.c2, a.id, b.c1, b.c2, b.c3\nhaving count(a.c2) > 0\norder by b.c3 limit 5","edges":[{"sources":[4],"targets":[0],"expression":"sum(default.dest_l2.c1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"count(default.dest_l3.c1)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8,9],"targets":[0,1,2,3],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[10,7],"targets":[0,1,2,3],"expression":"((a.c2 <> 10) and (b.c3 > 0))","edgeType":"PREDICATE"},{"sources":[10],"targets":[0,1,2,3],"expression":"(count(default.dest_l2.c2) > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"_c0"},{"id":1,"vertexType":"COLUM
 N","vertexId":"_c1"},{"id":2,"vertexType":"COLUMN","vertexId":"b.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"b.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"}]}
 1	1	s2	15
 PREHOOK: query: drop table if exists t
 PREHOOK: type: DROPTABLE
@@ -659,7 +659,7 @@ PREHOOK: Input: default@dest_l2
 PREHOOK: Input: default@dest_l3
 PREHOOK: Output: database:default
 PREHOOK: Output: default@t
-{"version":"1.0","engine":"mr","hash":"0d2f15b494111ffe236d5be42a76fa28","queryText":"create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4,5],"targets":[0,1],"expression":"((a.id > 0) and (b.c3 = 15))","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"}]}
+{"version":"1.0","engine":"mr","hash":"0d2f15b494111ffe236d5be42a76fa28","queryText":"create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1],"expression":"((a.id > 0) and (b.c3 = 15))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]}
 PREHOOK: query: SELECT substr(src1.key,1,1), count(DISTINCT substr(src1.value,5)),
 concat(substr(src1.key,1,1),sum(substr(src1.value,5)))
 from src1

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/lineage3.q.out b/ql/src/test/results/clientpositive/lineage3.q.out
index b6b4e0b..6fd2aa4 100644
--- a/ql/src/test/results/clientpositive/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/lineage3.q.out
@@ -25,7 +25,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 PREHOOK: Output: default@d1
 PREHOOK: Output: default@d2
-{"version":"1.0","engine":"mr","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[3],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(t.x > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.
 cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]}
+{"version":"1.0","engine":"mr","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(t.x > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.
 cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]}
 PREHOOK: query: drop table if exists t
 PREHOOK: type: DROPTABLE
 PREHOOK: query: create table t as
@@ -116,7 +116,7 @@ order by a.cbigint, a.ctinyint, b.cint, b.ctinyint limit 5
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-{"version":"1.0","engine":"mr","hash":"afd760470fc5aa6d3e8348dee03af97f","queryText":"select a.cbigint, a.ctinyint, b.cint, b.ctinyint\nfrom\n  (select ctinyint, cbigint from alltypesorc\n   union all\n   select ctinyint, cbigint from alltypesorc) a\n  inner join\n  alltypesorc b\n  on (a.ctinyint = b.ctinyint)\nwhere b.ctinyint < 100 and a.cbigint is not null and b.cint is not null\norder by a.cbigint, a.ctinyint, b.cint, b.ctinyint limit 5","edges":[{"sources":[4],"targets":[0],"expression":"a.ctinyint","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"a.cbigint","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[5],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"alltypesorc.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(alltypesorc.ctinyint < 100)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(ctinyint < 100)
 ","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1,2,3],"expression":"((alltypesorc.ctinyint < 100) and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(a.cbigint = alltypesorc.ctinyint)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.cbigint"},{"id":1,"vertexType":"COLUMN","vertexId":"a.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"b.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"b.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"}]}
+{"version":"1.0","engine":"mr","hash":"afd760470fc5aa6d3e8348dee03af97f","queryText":"select a.cbigint, a.ctinyint, b.cint, b.ctinyint\nfrom\n  (select ctinyint, cbigint from alltypesorc\n   union all\n   select ctinyint, cbigint from alltypesorc) a\n  inner join\n  alltypesorc b\n  on (a.ctinyint = b.ctinyint)\nwhere b.ctinyint < 100 and a.cbigint is not null and b.cint is not null\norder by a.cbigint, a.ctinyint, b.cint, b.ctinyint limit 5","edges":[{"sources":[4],"targets":[0],"expression":"cbigint","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"ctinyint","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[5],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"alltypesorc.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(ctinyint < 100)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(ctinyint = alltypesorc.ctinyint
 )","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1,2,3],"expression":"((alltypesorc.ctinyint < 100) and alltypesorc.cint is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.cbigint"},{"id":1,"vertexType":"COLUMN","vertexId":"a.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"b.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"b.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"}]}
 -2147311592	-51	-1071480828	-51
 -2147311592	-51	-1071480828	-51
 -2147311592	-51	-1067683781	-51
@@ -135,7 +135,7 @@ and x.ctinyint + length(c.cstring2) < 1000
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-{"version":"1.0","engine":"mr","hash":"3a12ad24b2622a8958df12d0bdc60f8a","queryText":"select x.ctinyint, x.cint, c.cbigint-100, c.cstring1\nfrom alltypesorc c\njoin (\n   select a.ctinyint ctinyint, b.cint cint\n   from (select * from alltypesorc a where cboolean1=false) a\n   join alltypesorc b on (a.cint = b.cbigint - 224870380)\n ) x on (x.cint = c.cint)\nwhere x.ctinyint > 10\nand x.cint < 4.5\nand x.ctinyint + length(c.cstring2) < 1000","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"cint","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"(c.cbigint - UDFToLong(100))","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"(UDFToDouble(c.cint) < 4.5)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3],"expression":"((UDFToInteger(ctinyint) + length(c.cstring2)) < 1000)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2
 ,3],"expression":"(c.cint = c.cint)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3],"expression":"(c.cboolean1 = false)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1,2,3],"expression":"(c.ctinyint > 10)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1,2,3],"expression":"((c.cbigint - UDFToLong(224870380)) = UDFToLong(c.cint))","edgeType":"PREDICATE"},{"sources":[4,8],"targets":[0,1,2,3],"expression":"((UDFToInteger(c.ctinyint) + length(c.cstring2)) < 1000)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"x.ctinyint"},{"id":1,"vertexType":"COLUMN","vertexId":"x.cint"},{"id":2,"vertexType":"COLUMN","vertexId":"c2"},{"id":3,"vertexType":"COLUMN","vertexId":"c.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":7,"vertexType":"COLUMN","vertexId":"defau
 lt.alltypesorc.cstring1"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"}]}
+{"version":"1.0","engine":"mr","hash":"3a12ad24b2622a8958df12d0bdc60f8a","queryText":"select x.ctinyint, x.cint, c.cbigint-100, c.cstring1\nfrom alltypesorc c\njoin (\n   select a.ctinyint ctinyint, b.cint cint\n   from (select * from alltypesorc a where cboolean1=false) a\n   join alltypesorc b on (a.cint = b.cbigint - 224870380)\n ) x on (x.cint = c.cint)\nwhere x.ctinyint > 10\nand x.cint < 4.5\nand x.ctinyint + length(c.cstring2) < 1000","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"(c.cbigint - UDFToLong(100))","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"(UDFToDouble(c.cint) < 4.5)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(c.cint = c.cint)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1,2,3],"expression":"((c.cbigint - UDFToLong(224870380)) =
  UDFToLong(c.cint))","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3],"expression":"(c.cboolean1 = false)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1,2,3],"expression":"(c.ctinyint > 10)","edgeType":"PREDICATE"},{"sources":[4,9],"targets":[0,1,2,3],"expression":"((UDFToInteger(c.ctinyint) + length(c.cstring2)) < 1000)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"x.ctinyint"},{"id":1,"vertexType":"COLUMN","vertexId":"x.cint"},{"id":2,"vertexType":"COLUMN","vertexId":"c2"},{"id":3,"vertexType":"COLUMN","vertexId":"c.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":9,"vertexType":"COLUMN","vertexId":"default
 .alltypesorc.cstring2"}]}
 11	-654374827	857266369	OEfPnHnIYueoup
 PREHOOK: query: select c1, x2, x3
 from (
@@ -178,7 +178,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 PREHOOK: Input: default@src1
 #### A masked pattern was here ####
-{"version":"1.0","engine":"mr","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > UDFToDouble(300))","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
+{"version":"1.0","engine":"mr","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > UDFToDouble(300))","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
 311	val_311
 Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: select key, value from src1
@@ -186,7 +186,7 @@ where key not in (select key+18 from src1) order by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src1
 #### A masked pattern was here ####
-{"version":"1.0","engine":"mr","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"expression":"key","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"_o__c0 is null","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + UDFToDouble(18)) is null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(count(*) = 0)","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"true","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(key) = (UDFToDouble(src1.key) + UDFToDouble(18)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","ver
 texId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]}
+{"version":"1.0","engine":"mr","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + UDFToDouble(18)) is null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(count(*) = 0)","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"true","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + UDFToDouble(18)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"
 default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]}
 PREHOOK: query: select * from src1 a
 where not exists
   (select cint from alltypesorc b
@@ -196,7 +196,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 PREHOOK: Input: default@src1
 #### A masked pattern was here ####
-{"version":"1.0","engine":"mr","hash":"53191056e05af9080a30de853e8cea9c","queryText":"select * from src1 a\nwhere not exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > UDFToDouble(300))","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"sq_corr_0 is null","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(UDFToInteger(b.ctinyint) + 300) is null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUM
 N","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
+{"version":"1.0","engine":"mr","hash":"53191056e05af9080a30de853e8cea9c","queryText":"select * from src1 a\nwhere not exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > UDFToDouble(300))","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(UDFToInteger(b.ctinyint) + 300) is null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src
 1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
 369	
 401	val_401
 406	val_406
@@ -297,7 +297,7 @@ PREHOOK: type: CREATEVIEW
 PREHOOK: Input: default@alltypesorc
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest_v3
-{"version":"1.0","engine":"mr","hash":"a0c2481ce1c24895a43a950f93a10da7","queryText":"create view dest_v3 (a1, a2, a3, a4, a5, a6, a7) as\n  select x.csmallint, x.cbigint bint1, x.ctinyint, c.cbigint bint2, x.cint, x.cfloat, c.cstring1\n  from alltypesorc c\n  join (\n     select a.csmallint csmallint, a.ctinyint ctinyint, a.cstring2 cstring2,\n           a.cint cint, a.cstring1 ctring1, b.cfloat cfloat, b.cbigint cbigint\n     from ( select * from alltypesorc a where cboolean1=true ) a\n     join alltypesorc b on (a.csmallint = b.cint)\n   ) x on (x.ctinyint = c.cbigint)\n  where x.csmallint=11\n  and x.cint > 899\n  and x.cfloat > 4.5\n  and c.cstring1 < '7'\n  and x.cint + x.cfloat + length(c.cstring1) < 1000","edges":[{"sources":[7],"targets":[0],"expression":"x._col15","edgeType":"PROJECTION"},{"sources":[8],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[9],"targets":[3],"expression":"x._col16","edgeType":"PROJECTION"},{"sources":[10],"targets":[4],"expression":"x._col18"
 ,"edgeType":"PROJECTION"},{"sources":[11],"targets":[5],"edgeType":"PROJECTION"},{"sources":[12],"targets":[6],"edgeType":"PROJECTION"},{"sources":[13],"targets":[0,1,3,2,4,5,6],"expression":"(a.cboolean1 = true)","edgeType":"PREDICATE"},{"sources":[7,10,12,11],"targets":[0,1,3,2,4,5,6],"expression":"((x.csmallint = 11) and (x.cint > 899) and (x.cfloat > 4.5) and (c.cstring1 < '7') and (((x.cint + x.cfloat) + length(c.cstring1)) < 1000))","edgeType":"PREDICATE"},{"sources":[7,10],"targets":[0,1,3,2,4,5,6],"expression":"(UDFToInteger(a._col1) = b.cint)","edgeType":"PREDICATE"},{"sources":[8,9],"targets":[0,1,3,2,4,5,6],"expression":"(c.cbigint = UDFToLong(x._col1))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_v3.csmallint"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_v3.bint1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_v3.bint2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_v3.ctinyint"},{"id":4,"vertex
 Type":"COLUMN","vertexId":"default.dest_v3.cint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_v3.cfloat"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_v3.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":10,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":11,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"},{"id":12,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":13,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"}]}
+{"version":"1.0","engine":"mr","hash":"a0c2481ce1c24895a43a950f93a10da7","queryText":"create view dest_v3 (a1, a2, a3, a4, a5, a6, a7) as\n  select x.csmallint, x.cbigint bint1, x.ctinyint, c.cbigint bint2, x.cint, x.cfloat, c.cstring1\n  from alltypesorc c\n  join (\n     select a.csmallint csmallint, a.ctinyint ctinyint, a.cstring2 cstring2,\n           a.cint cint, a.cstring1 ctring1, b.cfloat cfloat, b.cbigint cbigint\n     from ( select * from alltypesorc a where cboolean1=true ) a\n     join alltypesorc b on (a.csmallint = b.cint)\n   ) x on (x.ctinyint = c.cbigint)\n  where x.csmallint=11\n  and x.cint > 899\n  and x.cfloat > 4.5\n  and c.cstring1 < '7'\n  and x.cint + x.cfloat + length(c.cstring1) < 1000","edges":[{"sources":[7],"targets":[0],"edgeType":"PROJECTION"},{"sources":[8],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[9],"targets":[3],"edgeType":"PROJECTION"},{"sources":[10],"targets":[4],"edgeType":"PROJECTION"},{"sources":[11],"targets":[5],"edgeType":"PROJ
 ECTION"},{"sources":[12],"targets":[6],"edgeType":"PROJECTION"},{"sources":[8,9],"targets":[0,1,3,2,4,5,6],"expression":"(c.cbigint = UDFToLong(x._col1))","edgeType":"PREDICATE"},{"sources":[13],"targets":[0,1,3,2,4,5,6],"expression":"(a.cboolean1 = true)","edgeType":"PREDICATE"},{"sources":[7,10],"targets":[0,1,3,2,4,5,6],"expression":"(UDFToInteger(a._col1) = b.cint)","edgeType":"PREDICATE"},{"sources":[7,10,11,12],"targets":[0,1,3,2,4,5,6],"expression":"((x.csmallint = 11) and (x.cint > 899) and (x.cfloat > 4.5) and (c.cstring1 < '7') and (((x.cint + x.cfloat) + length(c.cstring1)) < 1000))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_v3.csmallint"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_v3.bint1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_v3.bint2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_v3.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_v3.cint"},{"id":5,"vertexType":"
 COLUMN","vertexId":"default.dest_v3.cfloat"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_v3.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":10,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":11,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"},{"id":12,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":13,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"}]}
 PREHOOK: query: alter view dest_v3 as
   select * from (
     select sum(a.ctinyint) over (partition by a.csmallint order by a.csmallint) a,
@@ -311,12 +311,12 @@ PREHOOK: type: CREATEVIEW
 PREHOOK: Input: default@alltypesorc
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest_v3
-{"version":"1.0","engine":"mr","hash":"949093880975cc807ad1a8003e8a8c7c","queryText":"alter view dest_v3 as\n  select * from (\n    select sum(a.ctinyint) over (partition by a.csmallint order by a.csmallint) a,\n      count(b.cstring1) x, b.cboolean1\n    from alltypesorc a join alltypesorc b on (a.cint = b.cint)\n    where a.cboolean2 = true and b.cfloat > 0\n    group by a.ctinyint, a.csmallint, b.cboolean1\n    having count(a.cint) > 10\n    order by a, x, b.cboolean1 limit 10) t","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col a) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col a) csmallint)) (tok_orderby (tok_tabsortcolnameasc (. (tok_table_or_col a) csmallint))))))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8,9],"targets":[0,1,2],"express
 ion":"((a.cboolean2 = true) and (b.cfloat > 0.0))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = b.cint)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]}
+{"version":"1.0","engine":"mr","hash":"949093880975cc807ad1a8003e8a8c7c","queryText":"alter view dest_v3 as\n  select * from (\n    select sum(a.ctinyint) over (partition by a.csmallint order by a.csmallint) a,\n      count(b.cstring1) x, b.cboolean1\n    from alltypesorc a join alltypesorc b on (a.cint = b.cint)\n    where a.cboolean2 = true and b.cfloat > 0\n    group by a.ctinyint, a.csmallint, b.cboolean1\n    having count(a.cint) > 10\n    order by a, x, b.cboolean1 limit 10) t","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col a) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col a) csmallint)) (tok_orderby (tok_tabsortcolnameasc (. (tok_table_or_col a) csmallint))))))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[0,1,2],"expressio
 n":"(a.cint = b.cint)","edgeType":"PREDICATE"},{"sources":[8,9],"targets":[0,1,2],"expression":"((a.cboolean2 = true) and (b.cfloat > 0.0))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]}
 PREHOOK: query: select * from dest_v3 limit 2
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 PREHOOK: Input: default@dest_v3
 #### A masked pattern was here ####
-{"version":"1.0","engine":"mr","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) $f0) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) $f1)) (tok_orderby (tok_tabsortcolnameasc (. (tok_table_or_col $hdt$_0) $f1)))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2],"expression":"(a.cboolean2 = true)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2],"expression":"(a.cfloat > 0.0)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edge
 Type":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]}
+{"version":"1.0","engine":"mr","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) $f0) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) $f1)) (tok_orderby (tok_tabsortcolnameasc (. (tok_table_or_col $hdt$_0) $f1)))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2],"expression":"(a.cboolean2 = true)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2],"expression":"(a.cfloat > 0.0)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edge
 Type":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]}
 38	216	false
 38	229	true

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/spark/join32.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join32.q.out b/ql/src/test/results/clientpositive/spark/join32.q.out
index 4ae9dc6..1c1c103 100644
--- a/ql/src/test/results/clientpositive/spark/join32.q.out
+++ b/ql/src/test/results/clientpositive/spark/join32.q.out
@@ -423,7 +423,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Output: default@dest_j1
 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: select * from dest_j1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest_j1

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
index 78bb655..937e8fc 100644
--- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
+++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
@@ -431,7 +431,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Output: default@dest_j1
 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: select * from dest_j1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest_j1
@@ -1027,9 +1027,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Input: default@src1
 POSTHOOK: Output: default@dest_j1
-POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)w.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: select * from dest_j1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest_j1

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/spark/join33.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out
index 4ae9dc6..1c1c103 100644
--- a/ql/src/test/results/clientpositive/spark/join33.q.out
+++ b/ql/src/test/results/clientpositive/spark/join33.q.out
@@ -423,7 +423,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Output: default@dest_j1
 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: select * from dest_j1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest_j1

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out
index 1bfdba2..3aac389 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out
@@ -310,10 +310,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@src_4
 POSTHOOK: Output: default@src_5
-POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
 RUN: Stage-2:MAPRED
 RUN: Stage-1:MOVE
 RUN: Stage-0:MOVE
@@ -732,10 +732,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@src_4
 POSTHOOK: Output: default@src_5
-POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
 RUN: Stage-5:MAPRED
 RUN: Stage-2:MAPRED
 RUN: Stage-1:MOVE

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out b/ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out
index 3a2473f..b668694 100644
--- a/ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out
@@ -333,10 +333,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@src_4
 POSTHOOK: Output: default@src_5
-POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
 RUN: Stage-10:MAPRED
 RUN: Stage-2:MAPRED
 RUN: Stage-3:MAPRED
@@ -839,10 +839,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@src_4
 POSTHOOK: Output: default@src_5
-POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
 RUN: Stage-10:MAPRED
 RUN: Stage-14:CONDITIONAL
 RUN: Stage-17:MAPREDLOCAL

[06/50] [abbrv] hive git commit: HIVE-11658: Load data file format validation does not work with directories (Prasanth Jayachandran reviewed by Gunther Hagleitner)

Posted by xu...@apache.org.

HIVE-11658: Load data file format validation does not work with directories (Prasanth Jayachandran reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9670a2b3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9670a2b3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9670a2b3

Branch: refs/heads/beeline-cli
Commit: 9670a2b3c35dfc3b9f61481b7ea8fcefbb01571c
Parents: b247cac
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Thu Aug 27 11:43:25 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Thu Aug 27 11:43:25 2015 -0500

----------------------------------------------------------------------
 .../hive/ql/parse/LoadSemanticAnalyzer.java     | 38 +++++++++++---------
 .../queries/clientnegative/load_orc_negative3.q |  6 ++++
 .../test/queries/clientpositive/load_orc_part.q |  4 +++
 .../clientnegative/load_orc_negative3.q.out     | 25 +++++++++++++
 .../results/clientpositive/load_orc_part.q.out  | 18 ++++++++++
 5 files changed, 75 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index 85fa9c9..9d2702f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -128,9 +128,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     return new URI(fromScheme, fromAuthority, path, null, null);
   }
 
-  private void applyConstraints(URI fromURI, URI toURI, Tree ast,
+  private FileStatus[] applyConstraintsAndGetFiles(URI fromURI, URI toURI, Tree ast,
       boolean isLocal) throws SemanticException {
 
+    FileStatus[] srcs = null;
+
     // local mode implies that scheme should be "file"
     // we can change this going forward
     if (isLocal && !fromURI.getScheme().equals("file")) {
@@ -139,7 +141,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     try {
-      FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
+      srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
       if (srcs == null || srcs.length == 0) {
         throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
             "No files matching path " + fromURI));
@@ -168,6 +170,8 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
           + "\"hive.metastore.warehouse.dir\" do not conflict.";
       throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason));
     }
+
+    return srcs;
   }
 
   @Override
@@ -227,11 +231,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     // make sure the arguments make sense
-    applyConstraints(fromURI, toURI, fromTree, isLocal);
+    FileStatus[] files = applyConstraintsAndGetFiles(fromURI, toURI, fromTree, isLocal);
 
     // for managed tables, make sure the file formats match
     if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType())) {
-      ensureFileFormatsMatch(ts, fromURI);
+      ensureFileFormatsMatch(ts, files);
     }
     inputs.add(toReadEntity(new Path(fromURI)));
     Task<? extends Serializable> rTask = null;
@@ -325,7 +329,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
   }
 
-  private void ensureFileFormatsMatch(TableSpec ts, URI fromURI) throws SemanticException {
+  private void ensureFileFormatsMatch(TableSpec ts, FileStatus[] fileStatuses) throws SemanticException {
     final Class<? extends InputFormat> destInputFormat;
     try {
       if (ts.getPartSpec() == null || ts.getPartSpec().isEmpty()) {
@@ -340,17 +344,19 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     // Other file formats should do similar check to make sure file formats match
     // when doing LOAD DATA .. INTO TABLE
     if (OrcInputFormat.class.equals(destInputFormat)) {
-      Path inputFilePath = new Path(fromURI);
-      try {
-        FileSystem fs = FileSystem.get(fromURI, conf);
-        // just creating orc reader is going to do sanity checks to make sure its valid ORC file
-        OrcFile.createReader(fs, inputFilePath);
-      } catch (FileFormatException e) {
-        throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
-            " table is stored as ORC but the file being loaded is not a valid ORC file."));
-      } catch (IOException e) {
-        throw new SemanticException("Unable to load data to destination table." +
-            " Error: " + e.getMessage());
+      for (FileStatus fileStatus : fileStatuses) {
+        try {
+          Path filePath = fileStatus.getPath();
+          FileSystem fs = FileSystem.get(filePath.toUri(), conf);
+          // just creating orc reader is going to do sanity checks to make sure its valid ORC file
+          OrcFile.createReader(fs, filePath);
+        } catch (FileFormatException e) {
+          throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
+              " table is stored as ORC but the file being loaded is not a valid ORC file."));
+        } catch (IOException e) {
+          throw new SemanticException("Unable to load data to destination table." +
+              " Error: " + e.getMessage());
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientnegative/load_orc_negative3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/load_orc_negative3.q b/ql/src/test/queries/clientnegative/load_orc_negative3.q
new file mode 100644
index 0000000..9a4116e
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/load_orc_negative3.q
@@ -0,0 +1,6 @@
+create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
+load data local inpath '../../data/files/kv1.txt' into table text_test;
+
+set hive.default.fileformat=ORC;
+create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
+load data inpath '${hiveconf:hive.metastore.warehouse.dir}/text_test/' into table orc_test;

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientpositive/load_orc_part.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/load_orc_part.q b/ql/src/test/queries/clientpositive/load_orc_part.q
index 0927ea4..2ff884d 100644
--- a/ql/src/test/queries/clientpositive/load_orc_part.q
+++ b/ql/src/test/queries/clientpositive/load_orc_part.q
@@ -9,6 +9,10 @@ load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/orc_split
 load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10');
 dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;
 
+load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging;
+load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/' overwrite into table orc_test partition (ds='10');
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;
+
 alter table orc_test add partition(ds='11');
 alter table orc_test partition(ds='11') set fileformat textfile;
 load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11');

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientnegative/load_orc_negative3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/load_orc_negative3.q.out b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
new file mode 100644
index 0000000..77fb50e
--- /dev/null
+++ b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
@@ -0,0 +1,25 @@
+PREHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@text_test
+POSTHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@text_test
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@text_test
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@text_test
+PREHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_test
+POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_test
+FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file.

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientpositive/load_orc_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/load_orc_part.q.out b/ql/src/test/results/clientpositive/load_orc_part.q.out
index 34ca493..2e02c2e 100644
--- a/ql/src/test/results/clientpositive/load_orc_part.q.out
+++ b/ql/src/test/results/clientpositive/load_orc_part.q.out
@@ -42,6 +42,24 @@ POSTHOOK: type: LOAD
 POSTHOOK: Output: default@orc_test@ds=10
 Found 2 items
 #### A masked pattern was here ####
+PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@orc_staging
+POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@orc_staging
+#### A masked pattern was here ####
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@orc_test@ds=10
+#### A masked pattern was here ####
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@orc_test@ds=10
+Found 1 items
+#### A masked pattern was here ####
 PREHOOK: query: alter table orc_test add partition(ds='11')
 PREHOOK: type: ALTERTABLE_ADDPARTS
 PREHOOK: Output: default@orc_test

[34/50] [abbrv] hive git commit: HIVE-11668 : make sure directsql calls pre-query init when needed (Sergey Shelukhin, reviewed by Sushanth Sowmyan)

Posted by xu...@apache.org.

HIVE-11668 : make sure directsql calls pre-query init when needed (Sergey Shelukhin, reviewed by Sushanth Sowmyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5a1957fc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5a1957fc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5a1957fc

Branch: refs/heads/beeline-cli
Commit: 5a1957fc61da4d5e32c46e8e38bdf596eaeef8a3
Parents: 308ae90
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 2 11:05:44 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 2 12:27:11 2015 -0700

----------------------------------------------------------------------
 .../hive/metastore/MetaStoreDirectSql.java      | 29 ++++++++++++++++----
 1 file changed, 23 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5a1957fc/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 522fcc2..1f89b7c 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -178,7 +178,13 @@ class MetaStoreDirectSql {
 
   private boolean ensureDbInit() {
     Transaction tx = pm.currentTransaction();
+    boolean doCommit = false;
+    if (!tx.isActive()) {
+      tx.begin();
+      doCommit = true;
+    }
     Query dbQuery = null, tblColumnQuery = null, partColumnQuery = null;
+
     try {
       // Force the underlying db to initialize.
       dbQuery = pm.newQuery(MDatabase.class, "name == ''");
@@ -192,10 +198,14 @@ class MetaStoreDirectSql {
 
       return true;
     } catch (Exception ex) {
+      doCommit = false;
       LOG.warn("Database initialization failed; direct SQL is disabled", ex);
       tx.rollback();
       return false;
     } finally {
+      if (doCommit) {
+        tx.commit();
+      }
       if (dbQuery != null) {
         dbQuery.closeAll();
       }
@@ -210,23 +220,28 @@ class MetaStoreDirectSql {
 
   private boolean runTestQuery() {
     Transaction tx = pm.currentTransaction();
+    boolean doCommit = false;
     if (!tx.isActive()) {
       tx.begin();
+      doCommit = true;
     }
     Query query = null;
     // Run a self-test query. If it doesn't work, we will self-disable. What a PITA...
     String selfTestQuery = "select \"DB_ID\" from \"DBS\"";
     try {
+      doDbSpecificInitializationsBeforeQuery();
       query = pm.newQuery("javax.jdo.query.SQL", selfTestQuery);
       query.execute();
-      tx.commit();
       return true;
-    } catch (Exception ex) {
-      LOG.warn("Self-test query [" + selfTestQuery + "] failed; direct SQL is disabled", ex);
+    } catch (Throwable t) {
+      doCommit = false;
+      LOG.warn("Self-test query [" + selfTestQuery + "] failed; direct SQL is disabled", t);
       tx.rollback();
       return false;
-    }
-    finally {
+    } finally {
+      if (doCommit) {
+        tx.commit();
+      }
       if (query != null) {
         query.closeAll();
       }
@@ -524,7 +539,6 @@ class MetaStoreDirectSql {
     + "where \"PART_ID\" in (" + partIds + ") order by \"PART_NAME\" asc";
     long start = doTrace ? System.nanoTime() : 0;
     Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
-    @SuppressWarnings("unchecked")
     List<Object[]> sqlResult = executeWithArray(query, null, queryText);
     long queryTime = doTrace ? System.nanoTime() : 0;
     Deadline.checkTimeout();
@@ -1095,6 +1109,7 @@ class MetaStoreDirectSql {
     if (colNames.isEmpty()) {
       return null;
     }
+    doDbSpecificInitializationsBeforeQuery();
     boolean doTrace = LOG.isDebugEnabled();
     long start = doTrace ? System.nanoTime() : 0;
     String queryText = "select " + STATS_COLLIST + " from \"TAB_COL_STATS\" "
@@ -1214,6 +1229,7 @@ class MetaStoreDirectSql {
   private List<ColumnStatisticsObj> columnStatisticsObjForPartitions(String dbName,
       String tableName, List<String> partNames, List<String> colNames, long partsFound,
       boolean useDensityFunctionForNDVEstimation) throws MetaException {
+    doDbSpecificInitializationsBeforeQuery();
     // TODO: all the extrapolation logic should be moved out of this class,
     // only mechanical data retrieval should remain here.
     String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", "
@@ -1530,6 +1546,7 @@ class MetaStoreDirectSql {
       return Lists.newArrayList();
     }
     boolean doTrace = LOG.isDebugEnabled();
+    doDbSpecificInitializationsBeforeQuery();
     long start = doTrace ? System.nanoTime() : 0;
     String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
       + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("

[36/50] [abbrv] hive git commit: HIVE-11671 : Optimize RuleRegExp in DPP codepath (Rajesh Balamohan, reviewed by Hari Subramaniyan)

Posted by xu...@apache.org.

HIVE-11671 : Optimize RuleRegExp in DPP codepath (Rajesh Balamohan, reviewed by Hari Subramaniyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/492c8b1d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/492c8b1d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/492c8b1d

Branch: refs/heads/beeline-cli
Commit: 492c8b1d88ffcb68ba4f77a3a49ae8fc768cdd7c
Parents: 1fc9320
Author: Hari Subramaniyan <ha...@apache.org>
Authored: Wed Sep 2 15:54:23 2015 -0700
Committer: Hari Subramaniyan <ha...@apache.org>
Committed: Wed Sep 2 15:54:23 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/lib/RuleRegExp.java   | 22 +++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/492c8b1d/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
index c88ed68..fd5f133 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
@@ -125,13 +125,13 @@ public class RuleRegExp implements Rule {
    */
   private int costPatternWithoutWildCardChar(Stack<Node> stack) throws SemanticException {
     int numElems = (stack != null ? stack.size() : 0);
-    String name = new String("");
     int patLen = patternWithoutWildCardChar.length();
-
+    StringBuilder name = new StringBuilder(patLen + numElems);
     for (int pos = numElems - 1; pos >= 0; pos--) {
-        name = stack.get(pos).getName() + "%" + name;
+      String nodeName = stack.get(pos).getName() + "%";
+      name.insert(0, nodeName);
       if (name.length() >= patLen) {
-        if (patternWithoutWildCardChar.equals(name)) {
+        if (patternWithoutWildCardChar.contentEquals(name)) {
           return patLen;
         } else {
           return -1;
@@ -153,13 +153,14 @@ public class RuleRegExp implements Rule {
   private int costPatternWithORWildCardChar(Stack<Node> stack) throws SemanticException {
     int numElems = (stack != null ? stack.size() : 0);
     for (String pattern : patternORWildChar) {
-      String name = new String("");
       int patLen = pattern.length();
 
+      StringBuilder name = new StringBuilder(patLen + numElems);
       for (int pos = numElems - 1; pos >= 0; pos--) {
-        name = stack.get(pos).getName() + "%" + name;
+        String nodeName = stack.get(pos).getName() + "%";
+        name.insert(0, nodeName);
         if (name.length() >= patLen) {
-          if (pattern.equals(name)) {
+          if (pattern.contentEquals(name)) {
             return patLen;
           } else {
             break;
@@ -181,11 +182,12 @@ public class RuleRegExp implements Rule {
    * @throws SemanticException
    */
   private int costPatternWithWildCardChar(Stack<Node> stack) throws SemanticException {
-	int numElems = (stack != null ? stack.size() : 0);
-    String name = "";
+    int numElems = (stack != null ? stack.size() : 0);
+    StringBuilder name = new StringBuilder();
     Matcher m = patternWithWildCardChar.matcher("");
     for (int pos = numElems - 1; pos >= 0; pos--) {
-      name = stack.get(pos).getName() + "%" + name;
+      String nodeName = stack.get(pos).getName() + "%";
+      name.insert(0, nodeName);
       m.reset(name);
       if (m.matches()) {
         return name.length();

[43/50] [abbrv] hive git commit: HIVE-11747: Unnecessary error log is shown when executing a "INSERT OVERWRITE LOCAL DIRECTORY" cmd in the embedded mode(Ferdinand Xu, reviewed by Dong Chen)

Posted by xu...@apache.org.

HIVE-11747: Unnecessary error log is shown when executing a "INSERT OVERWRITE LOCAL DIRECTORY" cmd in the embedded mode(Ferdinand Xu, reviewed by Dong Chen)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/76fc383e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/76fc383e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/76fc383e

Branch: refs/heads/beeline-cli
Commit: 76fc383e5af919f2c43b7607bcfe732b6681a45b
Parents: f4acb44
Author: Ferdinand Xu <ch...@intel.com>
Authored: Mon Sep 7 03:06:41 2015 -0400
Committer: Ferdinand Xu <ch...@intel.com>
Committed: Mon Sep 7 03:06:58 2015 -0400

----------------------------------------------------------------------
 ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/76fc383e/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
index 82345ee..d9225a9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
@@ -398,7 +398,6 @@ public class ExecDriver extends Task<MapredWork> implements Serializable, Hadoop
       if (pwd != null) {
         HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
       }
-      LOG.error(job.get("mapreduce.framework.name"));
       JobClient jc = new JobClient(job);
       // make this client wait if job tracker is not behaving well.
       Throttle.checkJobTracker(job, LOG);

[16/50] [abbrv] hive git commit: HIVE-11618: Correct the SARG api to reunify the PredicateLeaf.Type INTEGER and LONG (Owen O'Malley, reviewed by Sergio Pena)

Posted by xu...@apache.org.

HIVE-11618: Correct the SARG api to reunify the PredicateLeaf.Type INTEGER and LONG (Owen O'Malley, reviewed by Sergio Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/97bf32a1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/97bf32a1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/97bf32a1

Branch: refs/heads/beeline-cli
Commit: 97bf32a12f754d83a362aaa4048a6612d299a386
Parents: ed4517c
Author: Sergio Pena <se...@cloudera.com>
Authored: Fri Aug 28 17:59:15 2015 -0500
Committer: Sergio Pena <se...@cloudera.com>
Committed: Fri Aug 28 17:59:15 2015 -0500

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |   2 -
 .../hive/ql/io/parquet/LeafFilterFactory.java   |  14 +-
 .../read/ParquetFilterPredicateConverter.java   |  35 +++--
 .../hive/ql/io/sarg/ConvertAstToSearchArg.java  |   3 -
 .../hive/ql/io/orc/TestInputOutputFormat.java   |   4 +-
 .../hadoop/hive/ql/io/orc/TestOrcFile.java      |  10 +-
 .../hive/ql/io/orc/TestRecordReaderImpl.java    |  42 +++---
 .../parquet/TestParquetRecordReaderWrapper.java |  50 +++++---
 .../read/TestParquetFilterPredicate.java        |   6 +-
 .../ql/io/sarg/TestConvertAstToSearchArg.java   | 128 +++++++++++--------
 .../hive/ql/io/sarg/TestSearchArgumentImpl.java |  22 ++--
 .../hadoop/hive/ql/io/sarg/PredicateLeaf.java   |   3 +-
 12 files changed, 181 insertions(+), 138 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index 0d765b1..fcb3746 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -637,8 +637,6 @@ class RecordReaderImpl implements RecordReader {
           return ((BigDecimal) obj).doubleValue();
         }
         break;
-      case INTEGER:
-        // fall through
       case LONG:
         if (obj instanceof Number) {
           // widening conversion

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
index a1dbc1a..1ceea6e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
@@ -22,6 +22,8 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator;
 import org.apache.parquet.filter2.predicate.FilterApi;
 import org.apache.parquet.filter2.predicate.FilterPredicate;
 import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
 
 import static org.apache.parquet.filter2.predicate.FilterApi.eq;
 import static org.apache.parquet.filter2.predicate.FilterApi.lt;
@@ -146,12 +148,16 @@ public class LeafFilterFactory {
    * @param type FilterPredicateType
    * @return
    */
-  public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type){
+  public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type,
+                                                               Type parquetType){
     switch (type){
-      case INTEGER:
-        return new IntFilterPredicateLeafBuilder();
       case LONG:
-        return new LongFilterPredicateLeafBuilder();
+        if (parquetType.asPrimitiveType().getPrimitiveTypeName() ==
+            PrimitiveType.PrimitiveTypeName.INT32) {
+          return new IntFilterPredicateLeafBuilder();
+        } else {
+          return new LongFilterPredicateLeafBuilder();
+        }
       case FLOAT:   // float and double
         return new DoubleFilterPredicateLeafBuilder();
       case STRING:  // string, char, varchar

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
index f170026..d1864ae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
@@ -37,14 +37,6 @@ public class ParquetFilterPredicateConverter {
   private static final Log LOG = LogFactory.getLog(ParquetFilterPredicateConverter.class);
 
   /**
-   * Translate the search argument to the filter predicate parquet uses
-   * @return translate the sarg into a filter predicate
-   */
-  public static FilterPredicate toFilterPredicate(SearchArgument sarg) {
-    return toFilterPredicate(sarg, null);
-  }
-
-  /**
    * Translate the search argument to the filter predicate parquet uses. It includes
    * only the columns from the passed schema.
    * @return translate the sarg into a filter predicate
@@ -58,18 +50,21 @@ public class ParquetFilterPredicateConverter {
       }
     }
 
-    return translate(sarg.getExpression(), sarg.getLeaves(), columns);
+    return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema);
   }
 
-  private static FilterPredicate translate(ExpressionTree root, List<PredicateLeaf> leaves, Set<String> columns) {
+  private static FilterPredicate translate(ExpressionTree root,
+                                           List<PredicateLeaf> leaves,
+                                           Set<String> columns,
+                                           MessageType schema) {
     FilterPredicate p = null;
     switch (root.getOperator()) {
       case OR:
         for(ExpressionTree child: root.getChildren()) {
           if (p == null) {
-            p = translate(child, leaves, columns);
+            p = translate(child, leaves, columns, schema);
           } else {
-            FilterPredicate right = translate(child, leaves, columns);
+            FilterPredicate right = translate(child, leaves, columns, schema);
             // constant means no filter, ignore it when it is null
             if(right != null){
               p = FilterApi.or(p, right);
@@ -80,9 +75,9 @@ public class ParquetFilterPredicateConverter {
       case AND:
         for(ExpressionTree child: root.getChildren()) {
           if (p == null) {
-            p = translate(child, leaves, columns);
+            p = translate(child, leaves, columns, schema);
           } else {
-            FilterPredicate right = translate(child, leaves, columns);
+            FilterPredicate right = translate(child, leaves, columns, schema);
             // constant means no filter, ignore it when it is null
             if(right != null){
               p = FilterApi.and(p, right);
@@ -91,7 +86,8 @@ public class ParquetFilterPredicateConverter {
         }
         return p;
       case NOT:
-        FilterPredicate op = translate(root.getChildren().get(0), leaves, columns);
+        FilterPredicate op = translate(root.getChildren().get(0), leaves,
+            columns, schema);
         if (op != null) {
           return FilterApi.not(op);
         } else {
@@ -101,8 +97,9 @@ public class ParquetFilterPredicateConverter {
         PredicateLeaf leaf = leaves.get(root.getLeaf());
 
         // If columns is null, then we need to create the leaf
-        if (columns == null || columns.contains(leaf.getColumnName())) {
-          return buildFilterPredicateFromPredicateLeaf(leaf);
+        if (columns.contains(leaf.getColumnName())) {
+          Type parquetType = schema.getType(leaf.getColumnName());
+          return buildFilterPredicateFromPredicateLeaf(leaf, parquetType);
         } else {
           // Do not create predicate if the leaf is not on the passed schema.
           return null;
@@ -116,12 +113,12 @@ public class ParquetFilterPredicateConverter {
   }
 
   private static FilterPredicate buildFilterPredicateFromPredicateLeaf
-      (PredicateLeaf leaf) {
+      (PredicateLeaf leaf, Type parquetType) {
     LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
     FilterPredicateLeafBuilder builder;
     try {
       builder = leafFilterFactory
-          .getLeafFilterBuilderByType(leaf.getType());
+          .getLeafFilterBuilderByType(leaf.getType(), parquetType);
       if (builder == null) {
         return null;
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
index 5c4b7ea..e034650 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
@@ -88,7 +88,6 @@ public class ConvertAstToSearchArg {
         case BYTE:
         case SHORT:
         case INT:
-          return PredicateLeaf.Type.INTEGER;
         case LONG:
           return PredicateLeaf.Type.LONG;
         case CHAR:
@@ -139,8 +138,6 @@ public class ConvertAstToSearchArg {
       return null;
     }
     switch (type) {
-      case INTEGER:
-        return ((Number) lit).intValue();
       case LONG:
         return ((Number) lit).longValue();
       case STRING:

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 547e799..ce86cd8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -1844,7 +1844,7 @@ public class TestInputOutputFormat {
     types.add(builder.build());
     types.add(builder.build());
     SearchArgument isNull = SearchArgumentFactory.newBuilder()
-        .startAnd().isNull("cost", PredicateLeaf.Type.INTEGER).end().build();
+        .startAnd().isNull("cost", PredicateLeaf.Type.LONG).end().build();
     conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(isNull));
     conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
         "url,cost");
@@ -1889,7 +1889,7 @@ public class TestInputOutputFormat {
     SearchArgument sarg =
         SearchArgumentFactory.newBuilder()
             .startAnd()
-            .lessThan("z", PredicateLeaf.Type.INTEGER, new Integer(0))
+            .lessThan("z", PredicateLeaf.Type.LONG, new Long(0))
             .end()
             .build();
     conf.set("sarg.pushdown", toKryo(sarg));

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 4480d22..0bb8401 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -1923,9 +1923,9 @@ public class TestOrcFile {
     SearchArgument sarg = SearchArgumentFactory.newBuilder()
         .startAnd()
           .startNot()
-             .lessThan("int1", PredicateLeaf.Type.INTEGER, 300000)
+             .lessThan("int1", PredicateLeaf.Type.LONG, 300000L)
           .end()
-          .lessThan("int1", PredicateLeaf.Type.INTEGER, 600000)
+          .lessThan("int1", PredicateLeaf.Type.LONG, 600000L)
         .end()
         .build();
     RecordReader rows = reader.rowsOptions(new Reader.Options()
@@ -1946,7 +1946,7 @@ public class TestOrcFile {
     // look through the file with no rows selected
     sarg = SearchArgumentFactory.newBuilder()
         .startAnd()
-          .lessThan("int1", PredicateLeaf.Type.INTEGER, 0)
+          .lessThan("int1", PredicateLeaf.Type.LONG, 0L)
         .end()
         .build();
     rows = reader.rowsOptions(new Reader.Options()
@@ -1959,9 +1959,9 @@ public class TestOrcFile {
     // select first 100 and last 100 rows
     sarg = SearchArgumentFactory.newBuilder()
         .startOr()
-          .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 100)
+          .lessThan("int1", PredicateLeaf.Type.LONG, 300L * 100)
           .startNot()
-            .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 3400)
+            .lessThan("int1", PredicateLeaf.Type.LONG, 300L * 3400)
           .end()
         .end()
         .build();

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
index 7957cb4..839bbc6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
@@ -445,7 +445,7 @@ public class TestRecordReaderImpl {
   @Test
   public void testPredEvalWithStringStats() throws Exception {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 100, null);
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 100L, null);
     assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
 
@@ -550,7 +550,7 @@ public class TestRecordReaderImpl {
   @Test
   public void testPredEvalWithDecimalStats() throws Exception {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15, null);
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
     assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
 
@@ -590,7 +590,7 @@ public class TestRecordReaderImpl {
   @Test
   public void testPredEvalWithTimestampStats() throws Exception {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15, null);
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
     assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
 
@@ -637,8 +637,8 @@ public class TestRecordReaderImpl {
   @Test
   public void testEquals() throws Exception {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
-        (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER,
-            "x", 15, null);
+        (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG,
+            "x", 15L, null);
     assertEquals(TruthValue.NO_NULL,
         RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
     assertEquals(TruthValue.YES_NO_NULL,
@@ -656,8 +656,8 @@ public class TestRecordReaderImpl {
   @Test
   public void testNullSafeEquals() throws Exception {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
-        (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER,
-            "x", 15, null);
+        (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG,
+            "x", 15L, null);
     assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
     assertEquals(TruthValue.YES_NO,
@@ -675,8 +675,8 @@ public class TestRecordReaderImpl {
   @Test
   public void testLessThan() throws Exception {
     PredicateLeaf lessThan = TestSearchArgumentImpl.createPredicateLeaf
-        (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.INTEGER,
-            "x", 15, null);
+        (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.LONG,
+            "x", 15L, null);
     assertEquals(TruthValue.NO_NULL,
         RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null));
     assertEquals(TruthValue.NO_NULL,
@@ -692,8 +692,8 @@ public class TestRecordReaderImpl {
   @Test
   public void testLessThanEquals() throws Exception {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
-        (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.INTEGER,
-            "x", 15, null);
+        (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.LONG,
+            "x", 15L, null);
     assertEquals(TruthValue.NO_NULL,
         RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
     assertEquals(TruthValue.YES_NO_NULL,
@@ -709,10 +709,10 @@ public class TestRecordReaderImpl {
   @Test
   public void testIn() throws Exception {
     List<Object> args = new ArrayList<Object>();
-    args.add(10);
-    args.add(20);
+    args.add(10L);
+    args.add(20L);
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER,
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
             "x", null, args);
     assertEquals(TruthValue.YES_NULL,
         RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null));
@@ -727,10 +727,10 @@ public class TestRecordReaderImpl {
   @Test
   public void testBetween() throws Exception {
     List<Object> args = new ArrayList<Object>();
-    args.add(10);
-    args.add(20);
+    args.add(10L);
+    args.add(20L);
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
-        (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.INTEGER,
+        (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.LONG,
             "x", null, args);
     assertEquals(TruthValue.NO_NULL,
         RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null));
@@ -751,7 +751,7 @@ public class TestRecordReaderImpl {
   @Test
   public void testIsNull() throws Exception {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
-        (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.INTEGER,
+        (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.LONG,
             "x", null, null);
     assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
@@ -1306,10 +1306,10 @@ public class TestRecordReaderImpl {
   @Test
   public void testIntInBloomFilter() throws Exception {
     List<Object> args = new ArrayList<Object>();
-    args.add(15);
-    args.add(19);
+    args.add(15L);
+    args.add(19L);
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER,
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
             "x", null, args);
     BloomFilterIO bf = new BloomFilterIO(10000);
     for (int i = 20; i < 1000; i++) {

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
index f9ca528..e92b696 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
@@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
 import org.junit.Test;
 
 import java.sql.Date;
@@ -48,15 +50,19 @@ public class TestParquetRecordReaderWrapper {
      SearchArgument sarg = SearchArgumentFactory.newBuilder()
         .startNot()
         .startOr()
-        .isNull("x", PredicateLeaf.Type.INTEGER)
-        .between("y", PredicateLeaf.Type.INTEGER, 10, 20)
-        .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+        .isNull("x", PredicateLeaf.Type.LONG)
+        .between("y", PredicateLeaf.Type.LONG, 10L, 20L)
+        .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
         .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
         .end()
         .end()
         .build();
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    MessageType schema = MessageTypeParser.parseMessageType("message test {" +
+        " optional int32 x; required int32 y; required int32 z;" +
+        " optional binary a;}");
+    FilterPredicate p =
+        ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String expected =
       "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " +
         "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
@@ -75,23 +81,27 @@ public class TestParquetRecordReaderWrapper {
             .equals("z", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("1.0"))
             .end()
             .build();
+    MessageType schema = MessageTypeParser.parseMessageType("message test {" +
+        " required int32 x; required binary y; required binary z;}");
     assertEquals("lteq(y, Binary{\"hi        \"})",
-        ParquetFilterPredicateConverter.toFilterPredicate(sarg).toString());
+        ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema).toString());
 
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
         .startOr()
-        .isNull("x", PredicateLeaf.Type.INTEGER)
+        .isNull("x", PredicateLeaf.Type.LONG)
         .between("y", PredicateLeaf.Type.DECIMAL,
             new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0"))
-        .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+        .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
         .nullSafeEquals("a", PredicateLeaf.Type.STRING,
             new HiveVarchar("stinger", 100).toString())
         .end()
         .end()
         .build();
-
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    schema = MessageTypeParser.parseMessageType("message test {" +
+        " optional int32 x; required binary y; required int32 z;" +
+        " optional binary a;}");
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String expected =
         "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
         "not(eq(a, Binary{\"stinger\"})))";
@@ -110,23 +120,28 @@ public class TestParquetRecordReaderWrapper {
                 new HiveDecimalWritable("1.0"))
             .end()
             .build();
+    MessageType schema = MessageTypeParser.parseMessageType("message test {" +
+        " required int32 x; required binary y; required binary z;}");
     assertEquals("lteq(y, Binary{\"hi        \"})",
-        ParquetFilterPredicateConverter.toFilterPredicate(sarg).toString());
+        ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema).toString());
 
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
         .startOr()
-        .isNull("x", PredicateLeaf.Type.INTEGER)
+        .isNull("x", PredicateLeaf.Type.LONG)
         .between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"),
             new HiveDecimalWritable("20.0"))
-        .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+        .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
         .nullSafeEquals("a", PredicateLeaf.Type.STRING,
             new HiveVarchar("stinger", 100).toString())
         .end()
         .end()
         .build();
+    schema = MessageTypeParser.parseMessageType("message test {" +
+        " optional int32 x; required binary y; required int32 z;" +
+        " optional binary a;}");
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
         "not(eq(a, Binary{\"stinger\"})))";
     assertEquals(expected, p.toString());
@@ -137,16 +152,19 @@ public class TestParquetRecordReaderWrapper {
     SearchArgument sarg =
         SearchArgumentFactory.newBuilder()
             .startAnd()
-            .lessThan("x", PredicateLeaf.Type.INTEGER, new Integer((short) 22))
-            .lessThan("x1", PredicateLeaf.Type.INTEGER, new Integer(22))
+            .lessThan("x", PredicateLeaf.Type.LONG, 22L)
+            .lessThan("x1", PredicateLeaf.Type.LONG, 22L)
             .lessThanEquals("y", PredicateLeaf.Type.STRING,
                 new HiveChar("hi", 10).toString())
             .equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22))
             .equals("z1", PredicateLeaf.Type.FLOAT, new Double(0.22))
             .end()
             .build();
+    MessageType schema = MessageTypeParser.parseMessageType("message test {" +
+        " required int32 x; required int32 x1;" +
+        " required binary y; required float z; required float z1;}");
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," +
         " lteq(y, Binary{\"hi        \"})), eq(z, " +
         "0.22)), eq(z1, 0.22))";

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
index 847a02b..ac5c1a0 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
@@ -35,9 +35,9 @@ public class TestParquetFilterPredicate {
     SearchArgument sarg = SearchArgumentFactory.newBuilder()
         .startNot()
         .startOr()
-        .isNull("a", PredicateLeaf.Type.INTEGER)
-        .between("y", PredicateLeaf.Type.INTEGER, 10, 20) // Column will be removed from filter
-        .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) // Column will be removed from filter
+        .isNull("a", PredicateLeaf.Type.LONG)
+        .between("y", PredicateLeaf.Type.LONG, 10L, 20L) // Column will be removed from filter
+        .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L) // Column will be removed from filter
         .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
         .end()
         .end()

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
index 9e8425a..e72789d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
@@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
 import org.junit.Test;
 
 import java.beans.XMLDecoder;
@@ -550,7 +552,11 @@ public class TestConvertAstToSearchArg {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(9, leaves.size());
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    MessageType schema =
+        MessageTypeParser.parseMessageType("message test { required int32 id;" +
+            " required binary first_name; }");
+
+      FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String[] conditions = new String[]{
       "eq(first_name, Binary{\"john\"})",    /* first_name = 'john' */
       "not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */
@@ -586,34 +592,34 @@ public class TestConvertAstToSearchArg {
     assertEquals("alan", leaf.getLiteral());
 
     leaf = leaves.get(3);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(12, leaf.getLiteral());
+    assertEquals(12L, leaf.getLiteral());
 
     leaf = leaves.get(4);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(13, leaf.getLiteral());
+    assertEquals(13L, leaf.getLiteral());
 
     leaf = leaves.get(5);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(15, leaf.getLiteral());
+    assertEquals(15L, leaf.getLiteral());
 
     leaf = leaves.get(6);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(16, leaf.getLiteral());
+    assertEquals(16L, leaf.getLiteral());
 
     leaf = leaves.get(7);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(30, leaf.getLiteral());
+    assertEquals(30L, leaf.getLiteral());
 
     leaf = leaves.get(8);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
@@ -842,7 +848,10 @@ public class TestConvertAstToSearchArg {
       "lteq(id, 4)"                         /* id <= 4             */
     };
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    MessageType schema =
+        MessageTypeParser.parseMessageType("message test { required int32 id;" +
+            " required binary first_name; }");
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions);
     assertEquals(expected, p.toString());
 
@@ -860,16 +869,16 @@ public class TestConvertAstToSearchArg {
     assertEquals("sue", leaf.getLiteral());
 
     leaf = leaves.get(2);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(12, leaf.getLiteral());
+    assertEquals(12L, leaf.getLiteral());
 
     leaf = leaves.get(3);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(4, leaf.getLiteral());
+    assertEquals(4L, leaf.getLiteral());
 
     assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)",
         sarg.getExpression().toString());
@@ -1271,18 +1280,21 @@ public class TestConvertAstToSearchArg {
       "eq(first_name, Binary{\"alan\"})",   /* first_name = 'alan'  */
       "eq(last_name, Binary{\"smith\"})"    /* 'smith' = last_name  */
     };
+    MessageType schema =
+        MessageTypeParser.parseMessageType("message test { required int32 id;" +
+            " required binary first_name; required binary last_name;}");
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions);
     assertEquals(expected, p.toString());
 
     PredicateLeaf leaf = leaves.get(0);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
     assertEquals(null, leaf.getLiteral());
-    assertEquals(23, leaf.getLiteralList().get(0));
-    assertEquals(45, leaf.getLiteralList().get(1));
+    assertEquals(23L, leaf.getLiteralList().get(0));
+    assertEquals(45L, leaf.getLiteralList().get(1));
 
     leaf = leaves.get(1);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
@@ -1493,15 +1505,19 @@ public class TestConvertAstToSearchArg {
       "or(eq(id, 34), eq(id, 50))" /* id in (34,50) */
     };
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    MessageType schema =
+        MessageTypeParser.parseMessageType("message test { required int32 id;" +
+            " required binary first_name; }");
+    FilterPredicate p =
+        ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions);
     assertEquals(expected, p.toString());
 
     PredicateLeaf leaf = leaves.get(0);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(12, leaf.getLiteral());
+    assertEquals(12L, leaf.getLiteral());
 
     leaf = leaves.get(1);
     assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
@@ -1511,11 +1527,11 @@ public class TestConvertAstToSearchArg {
     assertEquals("sue", leaf.getLiteralList().get(1));
 
     leaf = leaves.get(2);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(34, leaf.getLiteralList().get(0));
-    assertEquals(50, leaf.getLiteralList().get(1));
+    assertEquals(34L, leaf.getLiteralList().get(0));
+    assertEquals(50L, leaf.getLiteralList().get(1));
 
     assertEquals("(and (not leaf-0) leaf-1 leaf-2)",
         sarg.getExpression().toString());
@@ -1752,7 +1768,10 @@ public class TestConvertAstToSearchArg {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(1, leaves.size());
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    MessageType schema =
+        MessageTypeParser.parseMessageType("message test { required int32 id;" +
+            " required binary first_name; }");
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String expected =
       "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))";
     assertEquals(p.toString(), expected);
@@ -2232,7 +2251,10 @@ public class TestConvertAstToSearchArg {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(9, leaves.size());
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    MessageType schema =
+        MessageTypeParser.parseMessageType("message test { required int32 id;" +
+            " required binary first_name; }");
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" +
       "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " +
       "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " +
@@ -2255,58 +2277,58 @@ public class TestConvertAstToSearchArg {
     assertEquals(p.toString(), expected);
 
     PredicateLeaf leaf = leaves.get(0);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(18, leaf.getLiteral());
+    assertEquals(18L, leaf.getLiteral());
 
     leaf = leaves.get(1);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(10, leaf.getLiteral());
+    assertEquals(10L, leaf.getLiteral());
 
     leaf = leaves.get(2);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(13, leaf.getLiteral());
+    assertEquals(13L, leaf.getLiteral());
 
     leaf = leaves.get(3);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(16, leaf.getLiteral());
+    assertEquals(16L, leaf.getLiteral());
 
     leaf = leaves.get(4);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(11, leaf.getLiteral());
+    assertEquals(11L, leaf.getLiteral());
 
     leaf = leaves.get(5);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(12, leaf.getLiteral());
+    assertEquals(12L, leaf.getLiteral());
 
     leaf = leaves.get(6);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(14, leaf.getLiteral());
+    assertEquals(14L, leaf.getLiteral());
 
     leaf = leaves.get(7);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(15, leaf.getLiteral());
+    assertEquals(15L, leaf.getLiteral());
 
     leaf = leaves.get(8);
-    assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
     assertEquals("id", leaf.getColumnName());
-    assertEquals(17, leaf.getLiteral());
+    assertEquals(17L, leaf.getLiteral());
 
     assertEquals("(and" +
         " (or leaf-0 leaf-1 leaf-2 leaf-3)" +
@@ -2388,7 +2410,10 @@ public class TestConvertAstToSearchArg {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(0, leaves.size());
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    MessageType schema =
+        MessageTypeParser.parseMessageType("message test { required int32 id;" +
+            " required binary first_name; }");
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     assertNull(p);
 
     assertEquals("YES_NO_NULL",
@@ -2643,15 +2668,18 @@ public class TestConvertAstToSearchArg {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(1, leaves.size());
 
-    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+    MessageType schema =
+        MessageTypeParser.parseMessageType("message test { required int32 id;" +
+            " required binary first_name; }");
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     String expected = "and(not(lt(id, 10)), not(lt(id, 10)))";
     assertEquals(expected, p.toString());
 
-    assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getType());
+    assertEquals(PredicateLeaf.Type.LONG, leaves.get(0).getType());
     assertEquals(PredicateLeaf.Operator.LESS_THAN,
         leaves.get(0).getOperator());
     assertEquals("id", leaves.get(0).getColumnName());
-    assertEquals(10, leaves.get(0).getLiteral());
+    assertEquals(10L, leaves.get(0).getLiteral());
 
     assertEquals("(and (not leaf-0) (not leaf-0))",
         sarg.getExpression().toString());

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
index 20de846..573d5c6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
@@ -330,7 +330,7 @@ public class TestSearchArgumentImpl {
     SearchArgument sarg =
         SearchArgumentFactory.newBuilder()
             .startAnd()
-            .lessThan("x", PredicateLeaf.Type.INTEGER, 10)
+            .lessThan("x", PredicateLeaf.Type.LONG, 10L)
             .lessThanEquals("y", PredicateLeaf.Type.STRING, "hi")
             .equals("z", PredicateLeaf.Type.FLOAT, 1.0)
             .end()
@@ -342,9 +342,9 @@ public class TestSearchArgumentImpl {
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
         .startOr()
-        .isNull("x", PredicateLeaf.Type.INTEGER)
-        .between("y", PredicateLeaf.Type.INTEGER, 10, 20)
-        .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+        .isNull("x", PredicateLeaf.Type.LONG)
+        .between("y", PredicateLeaf.Type.LONG, 10L, 20L)
+        .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
         .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
         .end()
         .end()
@@ -376,10 +376,10 @@ public class TestSearchArgumentImpl {
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
         .startOr()
-        .isNull("x", PredicateLeaf.Type.INTEGER)
+        .isNull("x", PredicateLeaf.Type.LONG)
         .between("y", PredicateLeaf.Type.DECIMAL,
             new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0"))
-        .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+        .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
         .nullSafeEquals("a", PredicateLeaf.Type.STRING,
             new HiveVarchar("stinger", 100).toString())
         .end()
@@ -413,10 +413,10 @@ public class TestSearchArgumentImpl {
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
         .startOr()
-        .isNull("x", PredicateLeaf.Type.INTEGER)
+        .isNull("x", PredicateLeaf.Type.LONG)
         .between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"),
             new HiveDecimalWritable("20.0"))
-        .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+        .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
         .nullSafeEquals("a", PredicateLeaf.Type.STRING,
             new HiveVarchar("stinger", 100).toString())
         .end()
@@ -435,8 +435,8 @@ public class TestSearchArgumentImpl {
     SearchArgument sarg =
         SearchArgumentFactory.newBuilder()
             .startAnd()
-            .lessThan("x", PredicateLeaf.Type.INTEGER, new Integer((short) 22))
-            .lessThan("x1", PredicateLeaf.Type.INTEGER, new Integer(22))
+            .lessThan("x", PredicateLeaf.Type.LONG, 22L)
+            .lessThan("x1", PredicateLeaf.Type.LONG, 22L)
             .lessThanEquals("y", PredicateLeaf.Type.STRING,
                 new HiveChar("hi", 10).toString())
             .equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22))
@@ -480,7 +480,7 @@ public class TestSearchArgumentImpl {
     SearchArgument sarg =
         SearchArgumentFactory.newBuilder()
         .startAnd()
-        .lessThan("x", PredicateLeaf.Type.INTEGER, "hi")
+        .lessThan("x", PredicateLeaf.Type.LONG, "hi")
         .end()
         .build();
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
index 3a92565..dc71db4 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
@@ -47,8 +47,7 @@ public interface PredicateLeaf {
    * The possible types for sargs.
    */
   public static enum Type {
-    INTEGER(Integer.class), // all of the integer types except long
-    LONG(Long.class),
+    LONG(Long.class),      // all of the integer types
     FLOAT(Double.class),   // float and double
     STRING(String.class),  // string, char, varchar
     DATE(Date.class),

[47/50] [abbrv] hive git commit: HIVE-11617: Explain plan for multiple lateral views is very slow (Aihua Xu, reviewed by Jesus Camacho Rodriguez)

Posted by xu...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
index 71d8f41..566b451 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
@@ -58,10 +58,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Output: default@ss
 POSTHOOK: Lineage: ss.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: ss.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: ss.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: ss.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
 POSTHOOK: Lineage: ss.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: ss.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: INSERT OVERWRITE TABLE sr
 SELECT x.key,x.value,y.key,y.value,z.key,z.value
 FROM src1 x 
@@ -86,10 +86,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@sr
 POSTHOOK: Lineage: sr.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: sr.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: sr.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: sr.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: sr.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
 POSTHOOK: Lineage: sr.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: sr.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: sr.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: INSERT OVERWRITE TABLE cs
 SELECT x.key,x.value,y.key,y.value,z.key,z.value
 FROM src1 x 
@@ -115,10 +115,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@cs
 POSTHOOK: Lineage: cs.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: cs.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: cs.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: cs.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: cs.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: cs.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: cs.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
 POSTHOOK: Lineage: cs.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: ANALYZE TABLE ss COMPUTE STATISTICS
 PREHOOK: type: QUERY

[29/50] [abbrv] hive git commit: HIVE-11440: Create Parquet predicate push down (PPD) unit tests and q-tests (Ferdinand Xu, reviewed by Sergio Pena)

Posted by xu...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
new file mode 100644
index 0000000..745237d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
@@ -0,0 +1,292 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.ts EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: -- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: -- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1033237945500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1033237945500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1033237945500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1033237945500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
new file mode 100644
index 0000000..23e3cd0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
@@ -0,0 +1,220 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.da EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: -- varchar data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where v="bee"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: -- varchar data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where v="bee"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v="bee"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v="bee"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v="bee   "
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v="bee   "
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v="bee   "
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v="bee   "
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL

[30/50] [abbrv] hive git commit: HIVE-11440: Create Parquet predicate push down (PPD) unit tests and q-tests (Ferdinand Xu, reviewed by Sergio Pena)

Posted by xu...@apache.org.

HIVE-11440: Create Parquet predicate push down (PPD) unit tests and q-tests (Ferdinand Xu, reviewed by Sergio Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f2056a13
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f2056a13
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f2056a13

Branch: refs/heads/beeline-cli
Commit: f2056a13e734ed2a00e185c069a85e514bb175b1
Parents: f530f44
Author: Ferdinand Xu <ch...@intel.com>
Authored: Wed Sep 2 00:34:45 2015 -0400
Committer: Ferdinand Xu <ch...@intel.com>
Committed: Wed Sep 2 00:39:21 2015 -0400

----------------------------------------------------------------------
 .../clientpositive/parquet_ppd_boolean.q        |  35 ++
 .../queries/clientpositive/parquet_ppd_char.q   |  76 +++
 .../queries/clientpositive/parquet_ppd_date.q   | 101 ++++
 .../clientpositive/parquet_ppd_decimal.q        | 163 ++++++
 .../clientpositive/parquet_ppd_timestamp.q      |  98 ++++
 .../clientpositive/parquet_ppd_varchar.q        |  76 +++
 .../clientpositive/parquet_ppd_boolean.q.out    | 200 ++++++++
 .../clientpositive/parquet_ppd_char.q.out       | 220 +++++++++
 .../clientpositive/parquet_ppd_date.q.out       | 301 ++++++++++++
 .../clientpositive/parquet_ppd_decimal.q.out    | 490 +++++++++++++++++++
 .../clientpositive/parquet_ppd_timestamp.q.out  | 292 +++++++++++
 .../clientpositive/parquet_ppd_varchar.q.out    | 220 +++++++++
 12 files changed, 2272 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q
new file mode 100644
index 0000000..05c6c50
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q
@@ -0,0 +1,35 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2) uniontbl;
+
+SET hive.optimize.ppd=true;
+SET hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where b=true;
+select sum(hash(*)) from newtypestbl where b!=true;
+select sum(hash(*)) from newtypestbl where b<true;
+select sum(hash(*)) from newtypestbl where b>true;
+select sum(hash(*)) from newtypestbl where b<=true;
+
+select sum(hash(*)) from newtypestbl where b=false;
+select sum(hash(*)) from newtypestbl where b!=false;
+select sum(hash(*)) from newtypestbl where b<false;
+select sum(hash(*)) from newtypestbl where b>false;
+select sum(hash(*)) from newtypestbl where b<=false;
+
+
+SET hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where b=true;
+select sum(hash(*)) from newtypestbl where b!=true;
+select sum(hash(*)) from newtypestbl where b<true;
+select sum(hash(*)) from newtypestbl where b>true;
+select sum(hash(*)) from newtypestbl where b<=true;
+
+select sum(hash(*)) from newtypestbl where b=false;
+select sum(hash(*)) from newtypestbl where b!=false;
+select sum(hash(*)) from newtypestbl where b<false;
+select sum(hash(*)) from newtypestbl where b>false;
+select sum(hash(*)) from newtypestbl where b<=false;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_char.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_char.q b/ql/src/test/queries/clientpositive/parquet_ppd_char.q
new file mode 100644
index 0000000..b01612c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_char.q
@@ -0,0 +1,76 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.ppd=true;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+set hive.optimize.index.filter=false;
+
+-- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where c="apple";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c="apple";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c!="apple";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c!="apple";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c<"hello";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c<"hello";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c<="hello";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c<="hello";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c="apple ";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c="apple ";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c in ("apple", "carrot");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c in ("apple", "carrot");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c in ("apple", "hello");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c in ("apple", "hello");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c in ("carrot");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c in ("carrot");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c between "apple" and "carrot";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c between "apple" and "carrot";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c between "apple" and "zombie";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c between "apple" and "zombie";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1";
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_date.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_date.q b/ql/src/test/queries/clientpositive/parquet_ppd_date.q
new file mode 100644
index 0000000..a18a9cf
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_date.q
@@ -0,0 +1,101 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.ppd=true;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+-- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where da='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da= date '1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da!='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da!='1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da<'1970-02-27';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da<'1970-02-27';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da<'1970-02-29';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da<'1970-02-29';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da<'1970-02-15';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da<'1970-02-15';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da<='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da<='1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da<='1970-02-27';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da<='1970-02-27';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19';

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q
new file mode 100644
index 0000000..679164b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q
@@ -0,0 +1,163 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+-- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where d=0.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d=0.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d='0.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d='0.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d=cast('0.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d=cast('0.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d!=0.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d!=0.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d!='0.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d!='0.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<11.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<11.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<'11.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<'11.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<cast('11.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<cast('11.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<1;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<1;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<=11.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<=11.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<='11.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<='11.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<=11.22BD;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<=11.22BD;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<=12;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<=12;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d between 0 and 1;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d between 0 and 1;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d between 0 and 1000;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d between 0 and 1000;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d between 0 and '2.0';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d between 0 and '2.0';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10));

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q
new file mode 100644
index 0000000..e0802a0
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q
@@ -0,0 +1,98 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.ppd=true;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2) uniontbl;
+
+-- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp);

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q
new file mode 100644
index 0000000..be50ca2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q
@@ -0,0 +1,76 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.ppd=true;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+set hive.optimize.index.filter=false;
+
+-- varchar data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where v="bee";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v="bee";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v!="bee";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v!="bee";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v<"world";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v<"world";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v<="world";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v<="world";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v="bee   ";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v="bee   ";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v in ("bee", "orange");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v in ("bee", "orange");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v in ("bee", "world");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v in ("bee", "world");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v in ("orange");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v in ("orange");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v between "bee" and "orange";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v between "bee" and "orange";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v between "bee" and "zombie";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v between "bee" and "zombie";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v between "orange" and "pine";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v between "orange" and "pine";
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out
new file mode 100644
index 0000000..78b7aa6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out
@@ -0,0 +1,200 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.b EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b!=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b!=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b>true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b>true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475822500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b!=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b!=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b>false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b>false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b!=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b!=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b>true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b>true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475822500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b!=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b!=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b>false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b>false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
new file mode 100644
index 0000000..e62462c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
@@ -0,0 +1,220 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.da EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: -- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where c="apple"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: -- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where c="apple"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c="apple"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c="apple"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c!="apple"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c!="apple"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c!="apple"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c!="apple"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c<"hello"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c<"hello"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c<"hello"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c<"hello"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c<="hello"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c<="hello"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c<="hello"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c<="hello"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c="apple "
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c="apple "
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c="apple "
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c="apple "
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "carrot")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "carrot")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "carrot")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "carrot")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "hello")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "hello")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "hello")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "hello")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("carrot")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("carrot")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("carrot")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("carrot")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "carrot"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "carrot"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "carrot"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "carrot"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "zombie"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "zombie"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "zombie"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "zombie"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out
new file mode 100644
index 0000000..aba302e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out
@@ -0,0 +1,301 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.da EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: -- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where da='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: -- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where da='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da= date '1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da= date '1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da!='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da!='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da!='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da!='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-27'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-27'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-27'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-27'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-29'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-29'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-29'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-29'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-15'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-15'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-15'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-15'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-27'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-27'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-27'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-27'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL

http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out
new file mode 100644
index 0000000..9e48df8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out
@@ -0,0 +1,490 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.da EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where d=0.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where d=0.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d=0.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d=0.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d='0.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d='0.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d='0.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d='0.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d=cast('0.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d=cast('0.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d=cast('0.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d=cast('0.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!=0.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!=0.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!=0.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!=0.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!='0.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!='0.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!='0.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!='0.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<11.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<11.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<11.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<11.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<'11.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<'11.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<'11.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<'11.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<cast('11.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<cast('11.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<cast('11.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<cast('11.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<='11.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<='11.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<='11.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<='11.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22BD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22BD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22BD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22BD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=12
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=12
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=12
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=12
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1000
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1000
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1000
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1000
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and '2.0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and '2.0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and '2.0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and '2.0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500

[39/50] [abbrv] hive git commit: HIVE-11712: Duplicate groupby keys cause ClassCastException (Jimmy, reviewed by Xuefu)

Posted by xu...@apache.org.

HIVE-11712: Duplicate groupby keys cause ClassCastException (Jimmy, reviewed by Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bb4f5e70
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bb4f5e70
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bb4f5e70

Branch: refs/heads/beeline-cli
Commit: bb4f5e702b11720ca54e43ba4a6c3aff099b0f4c
Parents: c40382d
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Tue Sep 1 11:48:36 2015 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Thu Sep 3 09:57:23 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   4 +
 .../queries/clientpositive/groupby1_map_nomap.q |   2 +
 ql/src/test/queries/clientpositive/groupby6.q   |   2 +
 .../clientpositive/groupby_grouping_id2.q       |   2 +
 .../clientpositive/groupby_ppr_multi_distinct.q |   2 +
 ql/src/test/queries/clientpositive/having2.q    |  27 +
 .../clientpositive/groupby1_map_nomap.q.out     |   8 +-
 .../test/results/clientpositive/groupby6.q.out  |   8 +-
 .../clientpositive/groupby_duplicate_key.q.out  |  16 +-
 .../clientpositive/groupby_grouping_id2.q.out   |  28 +-
 .../groupby_ppr_multi_distinct.q.out            |   8 +-
 .../test/results/clientpositive/having2.q.out   | 353 ++++++++++++
 .../spark/groupby1_map_nomap.q.out              | 564 ++++++++++---------
 .../results/clientpositive/spark/groupby6.q.out |  20 +-
 .../spark/groupby_grouping_id2.q.out            |  38 +-
 .../spark/groupby_ppr_multi_distinct.q.out      |  16 +-
 16 files changed, 761 insertions(+), 337 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index b809a23..778c7b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -4620,6 +4620,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr,
           groupByInputRowResolver);
 
+      if (ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
+        // Skip duplicated grouping keys
+        continue;
+      }
       groupByKeys.add(grpByExprNode);
       String field = getColumnInternalName(i);
       outputColumnNames.add(field);

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby1_map_nomap.q b/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
index eb09a9c..b22a61e 100644
--- a/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
+++ b/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
@@ -2,6 +2,8 @@ set hive.map.aggr=true;
 set hive.groupby.skewindata=false;
 set hive.groupby.mapaggr.checkinterval=20;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;
 
 EXPLAIN

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby6.q b/ql/src/test/queries/clientpositive/groupby6.q
index 3a3cc58..17597cb 100755
--- a/ql/src/test/queries/clientpositive/groupby6.q
+++ b/ql/src/test/queries/clientpositive/groupby6.q
@@ -1,6 +1,8 @@
 set hive.map.aggr=false;
 set hive.groupby.skewindata=true;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE;
 
 EXPLAIN

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
index f451f17..5c05aad 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
@@ -4,6 +4,8 @@ LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;
 
 set hive.groupby.skewindata = true;
 
+-- SORT_QUERY_RESULTS
+
 SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP;
 
 SELECT GROUPING__ID, count(*)

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q b/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
index 20c73bd..1249853 100644
--- a/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
+++ b/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
@@ -1,6 +1,8 @@
 set hive.map.aggr=false;
 set hive.groupby.skewindata=false;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;
 
 EXPLAIN EXTENDED

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/having2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/having2.q b/ql/src/test/queries/clientpositive/having2.q
index 282b2c0..83ae1e1 100644
--- a/ql/src/test/queries/clientpositive/having2.q
+++ b/ql/src/test/queries/clientpositive/having2.q
@@ -63,3 +63,30 @@ SELECT customer_name, SUM(customer_balance), SUM(order_quantity) FROM default.te
 (SUM(customer_balance) <= 4074689.000000041)
 AND (COUNT(s1.discount) <= 822)
 );
+
+explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+);
+
+explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name, s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+);
+
+explain
+SELECT distinct s1.customer_name as x, s1.customer_name as y
+FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+);

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
index cc985a5..7cdf240 100644
--- a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
+++ b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@dest1

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out
index b790224..d8cb2ac 100644
--- a/ql/src/test/results/clientpositive/groupby6.q.out
+++ b/ql/src/test/results/clientpositive/groupby6.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@dest1

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out b/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
index 2f2a6e6..fc95f41 100644
--- a/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
+++ b/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
@@ -21,14 +21,14 @@ STAGE PLANS:
               outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: key (type: string), '' (type: string), '' (type: string)
+                keys: key (type: string), '' (type: string)
                 mode: hash
-                outputColumnNames: _col0, _col1, _col2
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
@@ -99,14 +99,14 @@ STAGE PLANS:
               outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: key (type: string), 'X' (type: string), 'X' (type: string)
+                keys: key (type: string), 'X' (type: string)
                 mode: hash
-                outputColumnNames: _col0, _col1, _col2
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out b/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
index 4a0a9d2..544a7ae 100644
--- a/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
+++ b/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
@@ -14,25 +14,29 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@t1
-PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
 PREHOOK: type: QUERY
 PREHOOK: Input: default@t1
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
-NULL	NULL	0	6
+1	1	3	1
 1	NULL	1	2
 1	NULL	3	1
-1	1	3	1
-2	NULL	1	1
 2	2	3	1
+2	NULL	1	1
+3	3	3	1
 3	NULL	1	2
 3	NULL	3	1
-3	3	3	1
-4	NULL	1	1
 4	5	3	1
+4	NULL	1	1
+NULL	NULL	0	6
 PREHOOK: query: SELECT GROUPING__ID, count(*)
 FROM
 (
@@ -129,17 +133,17 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key,
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
-NULL	NULL	0	6
+1	1	3	1
 1	NULL	1	2
 1	NULL	3	1
-1	1	3	1
-2	NULL	1	1
 2	2	3	1
+2	NULL	1	1
+3	3	3	1
 3	NULL	1	2
 3	NULL	3	1
-3	3	3	1
-4	NULL	1	1
 4	5	3	1
+4	NULL	1	1
+NULL	NULL	0	6
 PREHOOK: query: SELECT GROUPING__ID, count(*)
 FROM
 (

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
index c50abde..6eb3f66 100644
--- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@dest1

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/having2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/having2.q.out b/ql/src/test/results/clientpositive/having2.q.out
index aafd3b6..ba601f9 100644
--- a/ql/src/test/results/clientpositive/having2.q.out
+++ b/ql/src/test/results/clientpositive/having2.q.out
@@ -242,3 +242,356 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: customer_name is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Reduce Output Operator
+                key expressions: customer_name (type: string)
+                sort order: +
+                Map-reduce partition columns: customer_name (type: string)
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                value expressions: discount (type: double), customer_balance (type: double)
+          TableScan
+            alias: s2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                value expressions: value (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 customer_name (type: string)
+            1 key (type: string)
+          outputColumnNames: _col6, _col18, _col21, _col54
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double), _col54 (type: string)
+            outputColumnNames: _col18, _col21, _col6, _col54
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: sum(_col21), avg(_col6), count(_col54)
+              keys: _col18 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (((_col1 <= 4074689.000000041) and (_col2 <= 822.0)) and (_col3 > 4)) (type: boolean)
+            Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name, s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name, s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: customer_name is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Reduce Output Operator
+                key expressions: customer_name (type: string)
+                sort order: +
+                Map-reduce partition columns: customer_name (type: string)
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                value expressions: discount (type: double), customer_balance (type: double)
+          TableScan
+            alias: s2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                value expressions: value (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 customer_name (type: string)
+            1 key (type: string)
+          outputColumnNames: _col6, _col18, _col21, _col54
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double), _col54 (type: string)
+            outputColumnNames: _col18, _col21, _col6, _col54
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: sum(_col21), avg(_col6), count(_col54)
+              keys: _col18 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+          keys: KEY._col0 (type: string), KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: bigint)
+            outputColumnNames: _col1, _col2, _col3, _col4
+            Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((_col2 <= 4074689.000000041) and (_col3 <= 822.0)) and (_col4 > 4)) (type: boolean)
+              Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: _col1 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+SELECT distinct s1.customer_name as x, s1.customer_name as y
+FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT distinct s1.customer_name as x, s1.customer_name as y
+FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: customer_name is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Reduce Output Operator
+                key expressions: customer_name (type: string)
+                sort order: +
+                Map-reduce partition columns: customer_name (type: string)
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                value expressions: discount (type: double), customer_balance (type: double)
+          TableScan
+            alias: s2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                value expressions: value (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 customer_name (type: string)
+            1 key (type: string)
+          outputColumnNames: _col6, _col18, _col21, _col54
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double), _col54 (type: string)
+            outputColumnNames: _col18, _col21, _col6, _col54
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: sum(_col21), avg(_col6), count(_col54)
+              keys: _col18 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+          keys: KEY._col0 (type: string), KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: bigint)
+            outputColumnNames: _col1, _col2, _col3, _col4
+            Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((_col2 <= 4074689.000000041) and (_col3 <= 822.0)) and (_col4 > 4)) (type: boolean)
+              Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: _col1 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
index 8fd9661..0799ff5 100644
--- a/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@dest1
@@ -97,312 +101,312 @@ POSTHOOK: query: SELECT dest1.* FROM dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
 #### A masked pattern was here ####
-273	819.0
-275	275.0
-419	419.0
-118	236.0
-202	202.0
-282	564.0
-82	82.0
+0	0.0
+10	10.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+11	11.0
+111	111.0
+113	226.0
+114	114.0
 116	116.0
-345	345.0
-332	332.0
-19	19.0
-42	84.0
-459	918.0
-190	190.0
-257	257.0
+118	236.0
+119	357.0
+12	24.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
 134	268.0
-165	330.0
+136	136.0
+137	274.0
 138	552.0
-222	222.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+15	30.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
 163	163.0
-219	438.0
-411	411.0
-305	305.0
-479	479.0
-28	28.0
-318	954.0
-244	244.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+17	17.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+18	36.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+19	19.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+2	2.0
+20	20.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
 208	624.0
-136	136.0
-24	48.0
-239	478.0
-84	168.0
-11	11.0
-367	734.0
-288	576.0
-150	150.0
-402	402.0
-466	1398.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
 224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
 237	474.0
-105	105.0
-484	484.0
-20	20.0
-400	400.0
-97	194.0
-280	560.0
-255	510.0
-103	206.0
+238	476.0
+239	478.0
+24	48.0
+241	241.0
 242	484.0
-323	323.0
-309	618.0
-365	365.0
-178	178.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
 26	52.0
-404	808.0
-196	196.0
-448	448.0
-462	924.0
-389	389.0
-338	338.0
-167	501.0
-493	493.0
-33	33.0
-152	304.0
-477	477.0
-431	1293.0
-316	948.0
-125	250.0
-444	444.0
-457	457.0
-446	446.0
-310	310.0
-129	258.0
-183	183.0
-392	392.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+27	27.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
 277	1108.0
-4	4.0
-80	80.0
-228	228.0
-145	145.0
-356	356.0
+278	556.0
+28	28.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
 284	284.0
-455	455.0
-53	53.0
-149	298.0
-424	848.0
-37	74.0
+285	285.0
 286	286.0
-327	981.0
-170	170.0
-187	561.0
-86	86.0
+287	287.0
+288	576.0
+289	289.0
 291	291.0
-233	466.0
-439	878.0
-266	266.0
-2	2.0
-396	1188.0
+292	292.0
+296	296.0
+298	894.0
+30	30.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+33	33.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
 336	336.0
-226	226.0
-176	352.0
-66	66.0
-497	497.0
-172	344.0
-491	491.0
-44	44.0
-200	400.0
-235	235.0
-77	77.0
-260	260.0
-406	1624.0
-460	460.0
-495	495.0
-143	143.0
-189	189.0
-453	453.0
-64	64.0
-158	158.0
+338	338.0
+339	339.0
+34	34.0
 341	341.0
-475	475.0
-8	8.0
-394	394.0
-57	57.0
-169	676.0
-15	30.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
 35	105.0
-174	348.0
-325	650.0
-0	0.0
-248	248.0
-468	1872.0
-435	435.0
-51	102.0
-321	642.0
-413	826.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
 369	1107.0
-480	1440.0
-156	156.0
-192	192.0
-213	426.0
+37	74.0
+373	373.0
 374	374.0
-437	437.0
-17	17.0
-181	181.0
-482	482.0
-307	614.0
-194	194.0
-217	434.0
-95	190.0
-114	114.0
-262	262.0
+375	375.0
+377	377.0
 378	378.0
-417	1251.0
-281	562.0
-180	180.0
-467	467.0
-201	201.0
-432	432.0
-238	476.0
-96	96.0
+379	379.0
+382	764.0
+384	1152.0
 386	386.0
-283	283.0
-168	168.0
-209	418.0
-463	926.0
-377	377.0
-317	634.0
-252	252.0
-104	208.0
-373	373.0
-131	131.0
-494	494.0
-230	1150.0
-83	166.0
-191	382.0
-41	41.0
-193	579.0
-436	436.0
-496	496.0
-166	166.0
-229	458.0
-298	894.0
-133	133.0
-333	666.0
-65	65.0
-292	292.0
-364	364.0
-472	472.0
-274	274.0
-47	47.0
-401	2005.0
-67	134.0
-5	15.0
-18	36.0
-27	27.0
-344	688.0
-409	1227.0
-256	512.0
-85	85.0
-72	144.0
-54	54.0
+389	389.0
+392	392.0
 393	393.0
-160	160.0
-438	1314.0
-263	263.0
-351	351.0
-207	414.0
-449	449.0
-111	111.0
-128	384.0
-289	289.0
-399	798.0
-489	1956.0
-205	410.0
-177	177.0
-119	357.0
-331	662.0
-348	1740.0
-478	956.0
-76	152.0
-458	916.0
-382	764.0
-157	157.0
-315	315.0
-469	2345.0
-302	302.0
+394	394.0
 395	790.0
-384	1152.0
-162	162.0
-113	226.0
-98	196.0
-221	442.0
-203	406.0
-199	597.0
-454	1362.0
-218	218.0
-241	241.0
-272	544.0
-120	240.0
+396	1188.0
+397	794.0
+399	798.0
+4	4.0
+400	400.0
+401	2005.0
+402	402.0
 403	1209.0
-366	366.0
-249	249.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+41	41.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+42	84.0
 421	421.0
-214	214.0
-92	92.0
-487	487.0
-258	258.0
+424	848.0
+427	427.0
 429	858.0
-265	530.0
-175	350.0
-34	34.0
-368	368.0
-69	69.0
-414	828.0
-30	30.0
-492	984.0
-9	9.0
-296	296.0
-311	933.0
-247	247.0
-164	328.0
-306	306.0
-153	153.0
-339	339.0
-322	644.0
-10	10.0
+43	43.0
 430	1290.0
-155	155.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+44	44.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
 452	452.0
-179	358.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+47	47.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
 485	485.0
+487	487.0
+489	1956.0
 490	490.0
-443	443.0
-379	379.0
-186	186.0
-100	200.0
-137	274.0
-483	483.0
-90	270.0
-481	481.0
-287	287.0
-146	292.0
-216	432.0
-342	684.0
-470	470.0
-362	362.0
-375	375.0
-407	407.0
-397	794.0
-58	116.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
 498	1494.0
-87	87.0
-195	390.0
-197	394.0
-78	78.0
-278	556.0
-12	24.0
-335	335.0
-360	360.0
-308	308.0
-223	446.0
-418	418.0
-43	43.0
-353	706.0
-74	74.0
-427	427.0
+5	15.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
 70	210.0
-285	285.0
-126	126.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+8	8.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+9	9.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/spark/groupby6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby6.q.out b/ql/src/test/results/clientpositive/spark/groupby6.q.out
index c3caccd..bb9b315 100644
--- a/ql/src/test/results/clientpositive/spark/groupby6.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby6.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@dest1
@@ -101,13 +105,13 @@ POSTHOOK: query: SELECT dest1.* FROM dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
 #### A masked pattern was here ####
-4
-8
-6
 0
+1
 2
-7
+3
+4
 5
+6
+7
+8
 9
-3
-1

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out b/ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out
index 9a5c832..544a7ae 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out
@@ -14,25 +14,29 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@t1
-PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
 PREHOOK: type: QUERY
 PREHOOK: Input: default@t1
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
-NULL	NULL	0	6
 1	1	3	1
-2	NULL	1	1
-2	2	3	1
-3	3	3	1
-4	NULL	1	1
 1	NULL	1	2
 1	NULL	3	1
+2	2	3	1
+2	NULL	1	1
+3	3	3	1
 3	NULL	1	2
 3	NULL	3	1
 4	5	3	1
+4	NULL	1	1
+NULL	NULL	0	6
 PREHOOK: query: SELECT GROUPING__ID, count(*)
 FROM
 (
@@ -52,8 +56,8 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
 0	1
-3	6
 1	4
+3	6
 PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1  GROUP BY key,value WITH ROLLUP) t1
 JOIN 
 (SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
@@ -129,17 +133,17 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key,
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
-1	NULL	3	1
+1	1	3	1
 1	NULL	1	2
-NULL	NULL	0	6
-4	5	3	1
-3	NULL	3	1
-3	NULL	1	2
-4	NULL	1	1
-2	NULL	1	1
+1	NULL	3	1
 2	2	3	1
-1	1	3	1
+2	NULL	1	1
 3	3	3	1
+3	NULL	1	2
+3	NULL	3	1
+4	5	3	1
+4	NULL	1	1
+NULL	NULL	0	6
 PREHOOK: query: SELECT GROUPING__ID, count(*)
 FROM
 (
@@ -159,8 +163,8 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
 0	1
-3	6
 1	4
+3	6
 PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1  GROUP BY key,value WITH ROLLUP) t1
 JOIN 
 (SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2

http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
index 01ea4ea..ef1cba2 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@dest1
@@ -335,12 +339,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
 #### A masked pattern was here ####
 0	1	00.0	0	1
-2	69	251142.0	15780	69
-4	74	4105526.0	30965	74
-6	5	6796.0	331	5
-8	8	81524.0	595	8
 1	71	132828.0	10044	71
+2	69	251142.0	15780	69
 3	62	364008.0	20119	62
+4	74	4105526.0	30965	74
 5	6	5794.0	278	6
+6	5	6796.0	331	5
 7	6	71470.0	447	6
+8	8	81524.0	595	8
 9	7	92094.0	577	7

[45/50] [abbrv] hive git commit: HIVE-11646: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix multiple window spec for PTF operator (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

Posted by xu...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/2d3316b9/ql/src/test/results/clientpositive/cbo_rp_windowing_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_windowing_2.q.out b/ql/src/test/results/clientpositive/cbo_rp_windowing_2.q.out
new file mode 100644
index 0000000..aa34d3d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cbo_rp_windowing_2.q.out
@@ -0,0 +1,2338 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+-- 1. testWindowing
+select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+-- 1. testWindowing
+select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	1173.15
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	2346.3
+Manufacturer#1	almond antique chartreuse lavender yellow	34	3	2	4100.06
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	4	3	5702.650000000001
+Manufacturer#1	almond aquamarine burnished black steel	28	5	4	7117.070000000001
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	6	5	8749.730000000001
+Manufacturer#2	almond antique violet chocolate turquoise	14	1	1	1690.68
+Manufacturer#2	almond antique violet turquoise frosted	40	2	2	3491.38
+Manufacturer#2	almond aquamarine midnight light salmon	2	3	3	5523.360000000001
+Manufacturer#2	almond aquamarine rose maroon antique	25	4	4	7222.02
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5	5	8923.62
+Manufacturer#3	almond antique chartreuse khaki white	17	1	1	1671.68
+Manufacturer#3	almond antique forest lavender goldenrod	14	2	2	2861.95
+Manufacturer#3	almond antique metallic orange dim	19	3	3	4272.34
+Manufacturer#3	almond antique misty red olive	1	4	4	6195.32
+Manufacturer#3	almond antique olive coral navajo	45	5	5	7532.61
+Manufacturer#4	almond antique gainsboro frosted violet	10	1	1	1620.67
+Manufacturer#4	almond antique violet mint lemon	39	2	2	2996.09
+Manufacturer#4	almond aquamarine floral ivory bisque	27	3	3	4202.35
+Manufacturer#4	almond aquamarine yellow dodger mint	7	4	4	6047.27
+Manufacturer#4	almond azure aquamarine papaya violet	12	5	5	7337.620000000001
+Manufacturer#5	almond antique blue firebrick mint	31	1	1	1789.69
+Manufacturer#5	almond antique medium spring khaki	6	2	2	3401.3500000000004
+Manufacturer#5	almond antique sky peru orange	2	3	3	5190.08
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	4	4	6208.18
+Manufacturer#5	almond azure blanched chiffon midnight	23	5	5	7672.66
+PREHOOK: query: -- 2. testGroupByWithPartitioning
+select p_mfgr, p_name, p_size, 
+min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name)as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 2. testGroupByWithPartitioning
+select p_mfgr, p_name, p_size, 
+min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name)as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1173.15	1	1	2	0
+Manufacturer#1	almond antique chartreuse lavender yellow	34	1753.76	2	2	34	32
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	1602.59	3	3	6	-28
+Manufacturer#1	almond aquamarine burnished black steel	28	1414.42	4	4	28	22
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	1632.66	5	5	42	14
+Manufacturer#2	almond antique violet chocolate turquoise	14	1690.68	1	1	14	0
+Manufacturer#2	almond antique violet turquoise frosted	40	1800.7	2	2	40	26
+Manufacturer#2	almond aquamarine midnight light salmon	2	2031.98	3	3	2	-38
+Manufacturer#2	almond aquamarine rose maroon antique	25	1698.66	4	4	25	23
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	1701.6	5	5	18	-7
+Manufacturer#3	almond antique chartreuse khaki white	17	1671.68	1	1	17	0
+Manufacturer#3	almond antique forest lavender goldenrod	14	1190.27	2	2	14	-3
+Manufacturer#3	almond antique metallic orange dim	19	1410.39	3	3	19	5
+Manufacturer#3	almond antique misty red olive	1	1922.98	4	4	1	-18
+Manufacturer#3	almond antique olive coral navajo	45	1337.29	5	5	45	44
+Manufacturer#4	almond antique gainsboro frosted violet	10	1620.67	1	1	10	0
+Manufacturer#4	almond antique violet mint lemon	39	1375.42	2	2	39	29
+Manufacturer#4	almond aquamarine floral ivory bisque	27	1206.26	3	3	27	-12
+Manufacturer#4	almond aquamarine yellow dodger mint	7	1844.92	4	4	7	-20
+Manufacturer#4	almond azure aquamarine papaya violet	12	1290.35	5	5	12	5
+Manufacturer#5	almond antique blue firebrick mint	31	1789.69	1	1	31	0
+Manufacturer#5	almond antique medium spring khaki	6	1611.66	2	2	6	-25
+Manufacturer#5	almond antique sky peru orange	2	1788.73	3	3	2	-4
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	1018.1	4	4	46	44
+Manufacturer#5	almond azure blanched chiffon midnight	23	1464.48	5	5	23	-23
+PREHOOK: query: -- 3. testGroupByHavingWithSWQ
+select p_mfgr, p_name, p_size, min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+having p_size > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 3. testGroupByHavingWithSWQ
+select p_mfgr, p_name, p_size, min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+having p_size > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1173.15	1	1	2	0
+Manufacturer#1	almond antique chartreuse lavender yellow	34	1753.76	2	2	34	32
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	1602.59	3	3	6	-28
+Manufacturer#1	almond aquamarine burnished black steel	28	1414.42	4	4	28	22
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	1632.66	5	5	42	14
+Manufacturer#2	almond antique violet chocolate turquoise	14	1690.68	1	1	14	0
+Manufacturer#2	almond antique violet turquoise frosted	40	1800.7	2	2	40	26
+Manufacturer#2	almond aquamarine midnight light salmon	2	2031.98	3	3	2	-38
+Manufacturer#2	almond aquamarine rose maroon antique	25	1698.66	4	4	25	23
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	1701.6	5	5	18	-7
+Manufacturer#3	almond antique chartreuse khaki white	17	1671.68	1	1	17	0
+Manufacturer#3	almond antique forest lavender goldenrod	14	1190.27	2	2	14	-3
+Manufacturer#3	almond antique metallic orange dim	19	1410.39	3	3	19	5
+Manufacturer#3	almond antique misty red olive	1	1922.98	4	4	1	-18
+Manufacturer#3	almond antique olive coral navajo	45	1337.29	5	5	45	44
+Manufacturer#4	almond antique gainsboro frosted violet	10	1620.67	1	1	10	0
+Manufacturer#4	almond antique violet mint lemon	39	1375.42	2	2	39	29
+Manufacturer#4	almond aquamarine floral ivory bisque	27	1206.26	3	3	27	-12
+Manufacturer#4	almond aquamarine yellow dodger mint	7	1844.92	4	4	7	-20
+Manufacturer#4	almond azure aquamarine papaya violet	12	1290.35	5	5	12	5
+Manufacturer#5	almond antique blue firebrick mint	31	1789.69	1	1	31	0
+Manufacturer#5	almond antique medium spring khaki	6	1611.66	2	2	6	-25
+Manufacturer#5	almond antique sky peru orange	2	1788.73	3	3	2	-4
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	1018.1	4	4	46	44
+Manufacturer#5	almond azure blanched chiffon midnight	23	1464.48	5	5	23	-23
+PREHOOK: query: -- 4. testCount
+select p_mfgr, p_name, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd 
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 4. testCount
+select p_mfgr, p_name, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd 
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2
+Manufacturer#1	almond antique burnished rose metallic	2
+Manufacturer#1	almond antique chartreuse lavender yellow	3
+Manufacturer#1	almond antique salmon chartreuse burlywood	4
+Manufacturer#1	almond aquamarine burnished black steel	5
+Manufacturer#1	almond aquamarine pink moccasin thistle	6
+Manufacturer#2	almond antique violet chocolate turquoise	1
+Manufacturer#2	almond antique violet turquoise frosted	2
+Manufacturer#2	almond aquamarine midnight light salmon	3
+Manufacturer#2	almond aquamarine rose maroon antique	4
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	5
+Manufacturer#3	almond antique chartreuse khaki white	1
+Manufacturer#3	almond antique forest lavender goldenrod	2
+Manufacturer#3	almond antique metallic orange dim	3
+Manufacturer#3	almond antique misty red olive	4
+Manufacturer#3	almond antique olive coral navajo	5
+Manufacturer#4	almond antique gainsboro frosted violet	1
+Manufacturer#4	almond antique violet mint lemon	2
+Manufacturer#4	almond aquamarine floral ivory bisque	3
+Manufacturer#4	almond aquamarine yellow dodger mint	4
+Manufacturer#4	almond azure aquamarine papaya violet	5
+Manufacturer#5	almond antique blue firebrick mint	1
+Manufacturer#5	almond antique medium spring khaki	2
+Manufacturer#5	almond antique sky peru orange	3
+Manufacturer#5	almond aquamarine dodger light gainsboro	4
+Manufacturer#5	almond azure blanched chiffon midnight	5
+PREHOOK: query: -- 5. testCountWithWindowingUDAF
+select p_mfgr, p_name, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd, 
+p_retailprice, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1, 
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz 
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 5. testCountWithWindowingUDAF
+select p_mfgr, p_name, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd, 
+p_retailprice, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1, 
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz 
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	1	1	2	1173.15	1173.15	2	0
+Manufacturer#1	almond antique burnished rose metallic	1	1	2	1173.15	2346.3	2	0
+Manufacturer#1	almond antique chartreuse lavender yellow	3	2	3	1753.76	4100.06	34	32
+Manufacturer#1	almond antique salmon chartreuse burlywood	4	3	4	1602.59	5702.650000000001	6	-28
+Manufacturer#1	almond aquamarine burnished black steel	5	4	5	1414.42	7117.070000000001	28	22
+Manufacturer#1	almond aquamarine pink moccasin thistle	6	5	6	1632.66	8749.730000000001	42	14
+Manufacturer#2	almond antique violet chocolate turquoise	1	1	1	1690.68	1690.68	14	0
+Manufacturer#2	almond antique violet turquoise frosted	2	2	2	1800.7	3491.38	40	26
+Manufacturer#2	almond aquamarine midnight light salmon	3	3	3	2031.98	5523.360000000001	2	-38
+Manufacturer#2	almond aquamarine rose maroon antique	4	4	4	1698.66	7222.02	25	23
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	5	5	5	1701.6	8923.62	18	-7
+Manufacturer#3	almond antique chartreuse khaki white	1	1	1	1671.68	1671.68	17	0
+Manufacturer#3	almond antique forest lavender goldenrod	2	2	2	1190.27	2861.95	14	-3
+Manufacturer#3	almond antique metallic orange dim	3	3	3	1410.39	4272.34	19	5
+Manufacturer#3	almond antique misty red olive	4	4	4	1922.98	6195.32	1	-18
+Manufacturer#3	almond antique olive coral navajo	5	5	5	1337.29	7532.61	45	44
+Manufacturer#4	almond antique gainsboro frosted violet	1	1	1	1620.67	1620.67	10	0
+Manufacturer#4	almond antique violet mint lemon	2	2	2	1375.42	2996.09	39	29
+Manufacturer#4	almond aquamarine floral ivory bisque	3	3	3	1206.26	4202.35	27	-12
+Manufacturer#4	almond aquamarine yellow dodger mint	4	4	4	1844.92	6047.27	7	-20
+Manufacturer#4	almond azure aquamarine papaya violet	5	5	5	1290.35	7337.620000000001	12	5
+Manufacturer#5	almond antique blue firebrick mint	1	1	1	1789.69	1789.69	31	0
+Manufacturer#5	almond antique medium spring khaki	2	2	2	1611.66	3401.3500000000004	6	-25
+Manufacturer#5	almond antique sky peru orange	3	3	3	1788.73	5190.08	2	-4
+Manufacturer#5	almond aquamarine dodger light gainsboro	4	4	4	1018.1	6208.18	46	44
+Manufacturer#5	almond azure blanched chiffon midnight	5	5	5	1464.48	7672.66	23	-23
+PREHOOK: query: -- 6. testCountInSubQ
+select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz 
+from (select p_mfgr, p_name, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd, 
+p_retailprice, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1, 
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz 
+from part 
+) sub1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 6. testCountInSubQ
+select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz 
+from (select p_mfgr, p_name, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd, 
+p_retailprice, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1, 
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz 
+from part 
+) sub1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+1	1	1	1620.67	0
+1	1	1	1671.68	0
+1	1	1	1690.68	0
+1	1	1	1789.69	0
+1	1	2	1173.15	0
+1	1	2	2346.3	0
+2	2	2	2861.95	-3
+2	2	2	2996.09	29
+2	2	2	3401.3500000000004	-25
+2	2	2	3491.38	26
+3	2	3	4100.06	32
+3	3	3	4202.35	-12
+3	3	3	4272.34	5
+3	3	3	5190.08	-4
+3	3	3	5523.360000000001	-38
+4	3	4	5702.650000000001	-28
+4	4	4	6047.27	-20
+4	4	4	6195.32	-18
+4	4	4	6208.18	44
+4	4	4	7222.02	23
+5	4	5	7117.070000000001	22
+5	5	5	7337.620000000001	5
+5	5	5	7532.61	44
+5	5	5	7672.66	-23
+5	5	5	8923.62	-7
+6	5	6	8749.730000000001	14
+PREHOOK: query: -- 7. testJoinWithWindowingAndPTF
+select abc.p_mfgr, abc.p_name, 
+rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, 
+dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, 
+abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, 
+abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz 
+from noop(on part 
+partition by p_mfgr 
+order by p_name 
+) abc join part p1 on abc.p_partkey = p1.p_partkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 7. testJoinWithWindowingAndPTF
+select abc.p_mfgr, abc.p_name, 
+rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, 
+dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, 
+abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, 
+abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz 
+from noop(on part 
+partition by p_mfgr 
+order by p_name 
+) abc join part p1 on abc.p_partkey = p1.p_partkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	1	1	1173.15	1173.15	2	0
+Manufacturer#1	almond antique burnished rose metallic	1	1	1173.15	2346.3	2	0
+Manufacturer#1	almond antique burnished rose metallic	1	1	1173.15	3519.4500000000003	2	0
+Manufacturer#1	almond antique burnished rose metallic	1	1	1173.15	4692.6	2	0
+Manufacturer#1	almond antique chartreuse lavender yellow	5	2	1753.76	6446.360000000001	34	32
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	3	1602.59	8048.950000000001	6	-28
+Manufacturer#1	almond aquamarine burnished black steel	7	4	1414.42	9463.37	28	22
+Manufacturer#1	almond aquamarine pink moccasin thistle	8	5	1632.66	11096.03	42	14
+Manufacturer#2	almond antique violet chocolate turquoise	1	1	1690.68	1690.68	14	0
+Manufacturer#2	almond antique violet turquoise frosted	2	2	1800.7	3491.38	40	26
+Manufacturer#2	almond aquamarine midnight light salmon	3	3	2031.98	5523.360000000001	2	-38
+Manufacturer#2	almond aquamarine rose maroon antique	4	4	1698.66	7222.02	25	23
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	5	5	1701.6	8923.62	18	-7
+Manufacturer#3	almond antique chartreuse khaki white	1	1	1671.68	1671.68	17	0
+Manufacturer#3	almond antique forest lavender goldenrod	2	2	1190.27	2861.95	14	-3
+Manufacturer#3	almond antique metallic orange dim	3	3	1410.39	4272.34	19	5
+Manufacturer#3	almond antique misty red olive	4	4	1922.98	6195.32	1	-18
+Manufacturer#3	almond antique olive coral navajo	5	5	1337.29	7532.61	45	44
+Manufacturer#4	almond antique gainsboro frosted violet	1	1	1620.67	1620.67	10	0
+Manufacturer#4	almond antique violet mint lemon	2	2	1375.42	2996.09	39	29
+Manufacturer#4	almond aquamarine floral ivory bisque	3	3	1206.26	4202.35	27	-12
+Manufacturer#4	almond aquamarine yellow dodger mint	4	4	1844.92	6047.27	7	-20
+Manufacturer#4	almond azure aquamarine papaya violet	5	5	1290.35	7337.620000000001	12	5
+Manufacturer#5	almond antique blue firebrick mint	1	1	1789.69	1789.69	31	0
+Manufacturer#5	almond antique medium spring khaki	2	2	1611.66	3401.3500000000004	6	-25
+Manufacturer#5	almond antique sky peru orange	3	3	1788.73	5190.08	2	-4
+Manufacturer#5	almond aquamarine dodger light gainsboro	4	4	1018.1	6208.18	46	44
+Manufacturer#5	almond azure blanched chiffon midnight	5	5	1464.48	7672.66	23	-23
+PREHOOK: query: -- 8. testMixedCaseAlias
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 8. testMixedCaseAlias
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1
+Manufacturer#1	almond antique burnished rose metallic	2	1
+Manufacturer#1	almond antique chartreuse lavender yellow	34	3
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	4
+Manufacturer#1	almond aquamarine burnished black steel	28	5
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	6
+Manufacturer#2	almond antique violet chocolate turquoise	14	1
+Manufacturer#2	almond antique violet turquoise frosted	40	2
+Manufacturer#2	almond aquamarine midnight light salmon	2	3
+Manufacturer#2	almond aquamarine rose maroon antique	25	4
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5
+Manufacturer#3	almond antique chartreuse khaki white	17	1
+Manufacturer#3	almond antique forest lavender goldenrod	14	2
+Manufacturer#3	almond antique metallic orange dim	19	3
+Manufacturer#3	almond antique misty red olive	1	4
+Manufacturer#3	almond antique olive coral navajo	45	5
+Manufacturer#4	almond antique gainsboro frosted violet	10	1
+Manufacturer#4	almond antique violet mint lemon	39	2
+Manufacturer#4	almond aquamarine floral ivory bisque	27	3
+Manufacturer#4	almond aquamarine yellow dodger mint	7	4
+Manufacturer#4	almond azure aquamarine papaya violet	12	5
+Manufacturer#5	almond antique blue firebrick mint	31	1
+Manufacturer#5	almond antique medium spring khaki	6	2
+Manufacturer#5	almond antique sky peru orange	2	3
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	4
+Manufacturer#5	almond azure blanched chiffon midnight	23	5
+PREHOOK: query: -- 9. testHavingWithWindowingNoGBY
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row)  as s1
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 9. testHavingWithWindowingNoGBY
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row)  as s1
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	1173.15
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	2346.3
+Manufacturer#1	almond antique chartreuse lavender yellow	34	3	2	4100.06
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	4	3	5702.650000000001
+Manufacturer#1	almond aquamarine burnished black steel	28	5	4	7117.070000000001
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	6	5	8749.730000000001
+Manufacturer#2	almond antique violet chocolate turquoise	14	1	1	1690.68
+Manufacturer#2	almond antique violet turquoise frosted	40	2	2	3491.38
+Manufacturer#2	almond aquamarine midnight light salmon	2	3	3	5523.360000000001
+Manufacturer#2	almond aquamarine rose maroon antique	25	4	4	7222.02
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5	5	8923.62
+Manufacturer#3	almond antique chartreuse khaki white	17	1	1	1671.68
+Manufacturer#3	almond antique forest lavender goldenrod	14	2	2	2861.95
+Manufacturer#3	almond antique metallic orange dim	19	3	3	4272.34
+Manufacturer#3	almond antique misty red olive	1	4	4	6195.32
+Manufacturer#3	almond antique olive coral navajo	45	5	5	7532.61
+Manufacturer#4	almond antique gainsboro frosted violet	10	1	1	1620.67
+Manufacturer#4	almond antique violet mint lemon	39	2	2	2996.09
+Manufacturer#4	almond aquamarine floral ivory bisque	27	3	3	4202.35
+Manufacturer#4	almond aquamarine yellow dodger mint	7	4	4	6047.27
+Manufacturer#4	almond azure aquamarine papaya violet	12	5	5	7337.620000000001
+Manufacturer#5	almond antique blue firebrick mint	31	1	1	1789.69
+Manufacturer#5	almond antique medium spring khaki	6	2	2	3401.3500000000004
+Manufacturer#5	almond antique sky peru orange	2	3	3	5190.08
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	4	4	6208.18
+Manufacturer#5	almond azure blanched chiffon midnight	23	5	5	7672.66
+PREHOOK: query: -- 10. testHavingWithWindowingCondRankNoGBY
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 10. testHavingWithWindowingCondRankNoGBY
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	1173.15
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	2346.3
+Manufacturer#1	almond antique chartreuse lavender yellow	34	3	2	4100.06
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	4	3	5702.650000000001
+Manufacturer#1	almond aquamarine burnished black steel	28	5	4	7117.070000000001
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	6	5	8749.730000000001
+Manufacturer#2	almond antique violet chocolate turquoise	14	1	1	1690.68
+Manufacturer#2	almond antique violet turquoise frosted	40	2	2	3491.38
+Manufacturer#2	almond aquamarine midnight light salmon	2	3	3	5523.360000000001
+Manufacturer#2	almond aquamarine rose maroon antique	25	4	4	7222.02
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5	5	8923.62
+Manufacturer#3	almond antique chartreuse khaki white	17	1	1	1671.68
+Manufacturer#3	almond antique forest lavender goldenrod	14	2	2	2861.95
+Manufacturer#3	almond antique metallic orange dim	19	3	3	4272.34
+Manufacturer#3	almond antique misty red olive	1	4	4	6195.32
+Manufacturer#3	almond antique olive coral navajo	45	5	5	7532.61
+Manufacturer#4	almond antique gainsboro frosted violet	10	1	1	1620.67
+Manufacturer#4	almond antique violet mint lemon	39	2	2	2996.09
+Manufacturer#4	almond aquamarine floral ivory bisque	27	3	3	4202.35
+Manufacturer#4	almond aquamarine yellow dodger mint	7	4	4	6047.27
+Manufacturer#4	almond azure aquamarine papaya violet	12	5	5	7337.620000000001
+Manufacturer#5	almond antique blue firebrick mint	31	1	1	1789.69
+Manufacturer#5	almond antique medium spring khaki	6	2	2	3401.3500000000004
+Manufacturer#5	almond antique sky peru orange	2	3	3	5190.08
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	4	4	6208.18
+Manufacturer#5	almond azure blanched chiffon midnight	23	5	5	7672.66
+PREHOOK: query: -- 11. testFirstLast   
+select  p_mfgr,p_name, p_size, 
+sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, 
+first_value(p_size) over w1  as f, 
+last_value(p_size, false) over w1  as l 
+from part 
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 11. testFirstLast   
+select  p_mfgr,p_name, p_size, 
+sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, 
+first_value(p_size) over w1  as f, 
+last_value(p_size, false) over w1  as l 
+from part 
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	2	2	34
+Manufacturer#1	almond antique burnished rose metallic	2	2	2	6
+Manufacturer#1	almond antique chartreuse lavender yellow	34	34	2	28
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	6	2	42
+Manufacturer#1	almond aquamarine burnished black steel	28	28	34	42
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	42	6	42
+Manufacturer#2	almond antique violet chocolate turquoise	14	14	14	2
+Manufacturer#2	almond antique violet turquoise frosted	40	40	14	25
+Manufacturer#2	almond aquamarine midnight light salmon	2	2	14	18
+Manufacturer#2	almond aquamarine rose maroon antique	25	25	40	18
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	18	2	18
+Manufacturer#3	almond antique chartreuse khaki white	17	17	17	19
+Manufacturer#3	almond antique forest lavender goldenrod	14	14	17	1
+Manufacturer#3	almond antique metallic orange dim	19	19	17	45
+Manufacturer#3	almond antique misty red olive	1	1	14	45
+Manufacturer#3	almond antique olive coral navajo	45	45	19	45
+Manufacturer#4	almond antique gainsboro frosted violet	10	10	10	27
+Manufacturer#4	almond antique violet mint lemon	39	39	10	7
+Manufacturer#4	almond aquamarine floral ivory bisque	27	27	10	12
+Manufacturer#4	almond aquamarine yellow dodger mint	7	7	39	12
+Manufacturer#4	almond azure aquamarine papaya violet	12	12	27	12
+Manufacturer#5	almond antique blue firebrick mint	31	31	31	2
+Manufacturer#5	almond antique medium spring khaki	6	6	31	46
+Manufacturer#5	almond antique sky peru orange	2	2	31	23
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	46	6	23
+Manufacturer#5	almond azure blanched chiffon midnight	23	23	2	23
+PREHOOK: query: -- 12. testFirstLastWithWhere
+select  p_mfgr,p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, 
+first_value(p_size) over w1 as f,  
+last_value(p_size, false) over w1 as l 
+from part 
+where p_mfgr = 'Manufacturer#3'  
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 12. testFirstLastWithWhere
+select  p_mfgr,p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, 
+first_value(p_size) over w1 as f,  
+last_value(p_size, false) over w1 as l 
+from part 
+where p_mfgr = 'Manufacturer#3'  
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#3	almond antique chartreuse khaki white	17	1	17	17	19
+Manufacturer#3	almond antique forest lavender goldenrod	14	2	14	17	1
+Manufacturer#3	almond antique metallic orange dim	19	3	19	17	45
+Manufacturer#3	almond antique misty red olive	1	4	1	14	45
+Manufacturer#3	almond antique olive coral navajo	45	5	45	19	45
+PREHOOK: query: -- 13. testSumWindow
+select  p_mfgr,p_name, p_size,  
+sum(p_size) over w1 as s1, 
+sum(p_size) over (distribute by p_mfgr  sort by p_name rows between current row and current row)  as s2 
+from part 
+window w1 as (distribute by p_mfgr  sort by p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 13. testSumWindow
+select  p_mfgr,p_name, p_size,  
+sum(p_size) over w1 as s1, 
+sum(p_size) over (distribute by p_mfgr  sort by p_name rows between current row and current row)  as s2 
+from part 
+window w1 as (distribute by p_mfgr  sort by p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	38	2
+Manufacturer#1	almond antique burnished rose metallic	2	44	2
+Manufacturer#1	almond antique chartreuse lavender yellow	34	72	34
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	112	6
+Manufacturer#1	almond aquamarine burnished black steel	28	110	28
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	76	42
+Manufacturer#2	almond antique violet chocolate turquoise	14	56	14
+Manufacturer#2	almond antique violet turquoise frosted	40	81	40
+Manufacturer#2	almond aquamarine midnight light salmon	2	99	2
+Manufacturer#2	almond aquamarine rose maroon antique	25	85	25
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	45	18
+Manufacturer#3	almond antique chartreuse khaki white	17	50	17
+Manufacturer#3	almond antique forest lavender goldenrod	14	51	14
+Manufacturer#3	almond antique metallic orange dim	19	96	19
+Manufacturer#3	almond antique misty red olive	1	79	1
+Manufacturer#3	almond antique olive coral navajo	45	65	45
+Manufacturer#4	almond antique gainsboro frosted violet	10	76	10
+Manufacturer#4	almond antique violet mint lemon	39	83	39
+Manufacturer#4	almond aquamarine floral ivory bisque	27	95	27
+Manufacturer#4	almond aquamarine yellow dodger mint	7	85	7
+Manufacturer#4	almond azure aquamarine papaya violet	12	46	12
+Manufacturer#5	almond antique blue firebrick mint	31	39	31
+Manufacturer#5	almond antique medium spring khaki	6	85	6
+Manufacturer#5	almond antique sky peru orange	2	108	2
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	77	46
+Manufacturer#5	almond azure blanched chiffon midnight	23	71	23
+PREHOOK: query: -- 14. testNoSortClause
+select  p_mfgr,p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr 
+from part  
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 14. testNoSortClause
+select  p_mfgr,p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr 
+from part  
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1	1
+Manufacturer#1	almond antique burnished rose metallic	2	1	1
+Manufacturer#1	almond antique chartreuse lavender yellow	34	3	2
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	4	3
+Manufacturer#1	almond aquamarine burnished black steel	28	5	4
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	6	5
+Manufacturer#2	almond antique violet chocolate turquoise	14	1	1
+Manufacturer#2	almond antique violet turquoise frosted	40	2	2
+Manufacturer#2	almond aquamarine midnight light salmon	2	3	3
+Manufacturer#2	almond aquamarine rose maroon antique	25	4	4
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5	5
+Manufacturer#3	almond antique chartreuse khaki white	17	1	1
+Manufacturer#3	almond antique forest lavender goldenrod	14	2	2
+Manufacturer#3	almond antique metallic orange dim	19	3	3
+Manufacturer#3	almond antique misty red olive	1	4	4
+Manufacturer#3	almond antique olive coral navajo	45	5	5
+Manufacturer#4	almond antique gainsboro frosted violet	10	1	1
+Manufacturer#4	almond antique violet mint lemon	39	2	2
+Manufacturer#4	almond aquamarine floral ivory bisque	27	3	3
+Manufacturer#4	almond aquamarine yellow dodger mint	7	4	4
+Manufacturer#4	almond azure aquamarine papaya violet	12	5	5
+Manufacturer#5	almond antique blue firebrick mint	31	1	1
+Manufacturer#5	almond antique medium spring khaki	6	2	2
+Manufacturer#5	almond antique sky peru orange	2	3	3
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	4	4
+Manufacturer#5	almond azure blanched chiffon midnight	23	5	5
+PREHOOK: query: -- 15. testExpressions
+select  p_mfgr,p_name, p_size,  
+rank() over(distribute by p_mfgr sort by p_name) as r,  
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+cume_dist() over(distribute by p_mfgr sort by p_name) as cud, 
+percent_rank() over(distribute by p_mfgr sort by p_name) as pr, 
+ntile(3) over(distribute by p_mfgr sort by p_name) as nt, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as ca, 
+avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, 
+stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, 
+first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, 
+last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, 
+first_value(p_size) over w1  as fvW1
+from part 
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 15. testExpressions
+select  p_mfgr,p_name, p_size,  
+rank() over(distribute by p_mfgr sort by p_name) as r,  
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+cume_dist() over(distribute by p_mfgr sort by p_name) as cud, 
+percent_rank() over(distribute by p_mfgr sort by p_name) as pr, 
+ntile(3) over(distribute by p_mfgr sort by p_name) as nt, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as ca, 
+avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, 
+stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, 
+first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, 
+last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, 
+first_value(p_size) over w1  as fvW1
+from part 
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	0	0.0	1	2	2.0	0.0	2	2	2
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	0	0.0	1	2	2.0	0.0	2	2	2
+Manufacturer#1	almond antique chartreuse lavender yellow	34	3	2	0	0.4	2	3	12.666666666666666	15.084944665313014	2	34	2
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	4	3	0	0.6	2	4	11.0	13.379088160259652	2	6	2
+Manufacturer#1	almond aquamarine burnished black steel	28	5	4	0	0.8	3	5	14.4	13.763720427268202	2	28	34
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	6	5	1	1.0	3	6	19.0	16.237815945091466	2	42	6
+Manufacturer#2	almond antique violet chocolate turquoise	14	1	1	0	0.0	1	1	14.0	0.0	4	14	14
+Manufacturer#2	almond antique violet turquoise frosted	40	2	2	0	0.25	1	2	27.0	13.0	4	40	14
+Manufacturer#2	almond aquamarine midnight light salmon	2	3	3	0	0.5	2	3	18.666666666666668	15.86050300449376	4	2	14
+Manufacturer#2	almond aquamarine rose maroon antique	25	4	4	0	0.75	2	4	20.25	14.00669482783144	4	25	40
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5	5	1	1.0	3	5	19.8	12.560254774486067	4	18	2
+Manufacturer#3	almond antique chartreuse khaki white	17	1	1	0	0.0	1	1	17.0	0.0	2	17	17
+Manufacturer#3	almond antique forest lavender goldenrod	14	2	2	0	0.25	1	2	15.5	1.5	2	14	17
+Manufacturer#3	almond antique metallic orange dim	19	3	3	0	0.5	2	3	16.666666666666668	2.0548046676563256	2	19	17
+Manufacturer#3	almond antique misty red olive	1	4	4	0	0.75	2	4	12.75	7.013380069552769	2	1	14
+Manufacturer#3	almond antique olive coral navajo	45	5	5	1	1.0	3	5	19.2	14.344336861632886	2	45	19
+Manufacturer#4	almond antique gainsboro frosted violet	10	1	1	0	0.0	1	1	10.0	0.0	0	10	10
+Manufacturer#4	almond antique violet mint lemon	39	2	2	0	0.25	1	2	24.5	14.5	0	39	10
+Manufacturer#4	almond aquamarine floral ivory bisque	27	3	3	0	0.5	2	3	25.333333333333332	11.897712198383164	0	27	10
+Manufacturer#4	almond aquamarine yellow dodger mint	7	4	4	0	0.75	2	4	20.75	13.007209539328564	0	7	39
+Manufacturer#4	almond azure aquamarine papaya violet	12	5	5	1	1.0	3	5	19.0	12.149074038789951	0	12	27
+Manufacturer#5	almond antique blue firebrick mint	31	1	1	0	0.0	1	1	31.0	0.0	1	31	31
+Manufacturer#5	almond antique medium spring khaki	6	2	2	0	0.25	1	2	18.5	12.5	1	6	31
+Manufacturer#5	almond antique sky peru orange	2	3	3	0	0.5	2	3	13.0	12.832251036613439	1	2	31
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	4	4	0	0.75	2	4	21.25	18.102140757380052	1	46	6
+Manufacturer#5	almond azure blanched chiffon midnight	23	5	5	1	1.0	3	5	21.6	16.206171663906314	1	23	2
+PREHOOK: query: -- 16. testMultipleWindows
+select  p_mfgr,p_name, p_size,  
+  rank() over(distribute by p_mfgr sort by p_name) as r, 
+  dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+cume_dist() over(distribute by p_mfgr sort by p_name) as cud, 
+sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, 
+sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, 
+first_value(p_size) over w1  as fv1
+from part 
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 16. testMultipleWindows
+select  p_mfgr,p_name, p_size,  
+  rank() over(distribute by p_mfgr sort by p_name) as r, 
+  dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+cume_dist() over(distribute by p_mfgr sort by p_name) as cud, 
+sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, 
+sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, 
+first_value(p_size) over w1  as fv1
+from part 
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	0	4	4	2
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	0	4	4	2
+Manufacturer#1	almond antique chartreuse lavender yellow	34	3	2	0	38	34	2
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	4	3	0	44	10	2
+Manufacturer#1	almond aquamarine burnished black steel	28	5	4	0	72	28	34
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	6	5	1	114	42	6
+Manufacturer#2	almond antique violet chocolate turquoise	14	1	1	0	14	14	14
+Manufacturer#2	almond antique violet turquoise frosted	40	2	2	0	54	40	14
+Manufacturer#2	almond aquamarine midnight light salmon	2	3	3	0	56	2	14
+Manufacturer#2	almond aquamarine rose maroon antique	25	4	4	0	81	25	40
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5	5	1	99	32	2
+Manufacturer#3	almond antique chartreuse khaki white	17	1	1	0	17	31	17
+Manufacturer#3	almond antique forest lavender goldenrod	14	2	2	0	31	14	17
+Manufacturer#3	almond antique metallic orange dim	19	3	3	0	50	50	17
+Manufacturer#3	almond antique misty red olive	1	4	4	0	51	1	14
+Manufacturer#3	almond antique olive coral navajo	45	5	5	1	96	45	19
+Manufacturer#4	almond antique gainsboro frosted violet	10	1	1	0	10	17	10
+Manufacturer#4	almond antique violet mint lemon	39	2	2	0	49	39	10
+Manufacturer#4	almond aquamarine floral ivory bisque	27	3	3	0	76	27	10
+Manufacturer#4	almond aquamarine yellow dodger mint	7	4	4	0	83	7	39
+Manufacturer#4	almond azure aquamarine papaya violet	12	5	5	1	95	29	27
+Manufacturer#5	almond antique blue firebrick mint	31	1	1	0	31	31	31
+Manufacturer#5	almond antique medium spring khaki	6	2	2	0	37	8	31
+Manufacturer#5	almond antique sky peru orange	2	3	3	0	39	2	31
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	4	4	0	85	46	6
+Manufacturer#5	almond azure blanched chiffon midnight	23	5	5	1	108	23	2
+PREHOOK: query: -- 17. testCountStar
+select  p_mfgr,p_name, p_size,
+count(*) over(distribute by p_mfgr sort by p_name ) as c, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as ca, 
+first_value(p_size) over w1  as fvW1
+from part 
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 17. testCountStar
+select  p_mfgr,p_name, p_size,
+count(*) over(distribute by p_mfgr sort by p_name ) as c, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as ca, 
+first_value(p_size) over w1  as fvW1
+from part 
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	2	2	2
+Manufacturer#1	almond antique burnished rose metallic	2	2	2	2
+Manufacturer#1	almond antique chartreuse lavender yellow	34	3	3	2
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	4	4	2
+Manufacturer#1	almond aquamarine burnished black steel	28	5	5	34
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	6	6	6
+Manufacturer#2	almond antique violet chocolate turquoise	14	1	1	14
+Manufacturer#2	almond antique violet turquoise frosted	40	2	2	14
+Manufacturer#2	almond aquamarine midnight light salmon	2	3	3	14
+Manufacturer#2	almond aquamarine rose maroon antique	25	4	4	40
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5	5	2
+Manufacturer#3	almond antique chartreuse khaki white	17	1	1	17
+Manufacturer#3	almond antique forest lavender goldenrod	14	2	2	17
+Manufacturer#3	almond antique metallic orange dim	19	3	3	17
+Manufacturer#3	almond antique misty red olive	1	4	4	14
+Manufacturer#3	almond antique olive coral navajo	45	5	5	19
+Manufacturer#4	almond antique gainsboro frosted violet	10	1	1	10
+Manufacturer#4	almond antique violet mint lemon	39	2	2	10
+Manufacturer#4	almond aquamarine floral ivory bisque	27	3	3	10
+Manufacturer#4	almond aquamarine yellow dodger mint	7	4	4	39
+Manufacturer#4	almond azure aquamarine papaya violet	12	5	5	27
+Manufacturer#5	almond antique blue firebrick mint	31	1	1	31
+Manufacturer#5	almond antique medium spring khaki	6	2	2	31
+Manufacturer#5	almond antique sky peru orange	2	3	3	31
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	4	4	6
+Manufacturer#5	almond azure blanched chiffon midnight	23	5	5	2
+PREHOOK: query: -- 18. testUDAFs
+select  p_mfgr,p_name, p_size, 
+sum(p_retailprice) over w1 as s, 
+min(p_retailprice) over w1 as mi,
+max(p_retailprice) over w1 as ma,
+avg(p_retailprice) over w1 as ag
+from part
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 18. testUDAFs
+select  p_mfgr,p_name, p_size, 
+sum(p_retailprice) over w1 as s, 
+min(p_retailprice) over w1 as mi,
+max(p_retailprice) over w1 as ma,
+avg(p_retailprice) over w1 as ag
+from part
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	4100.06	1173.15	1753.76	1366.6866666666667
+Manufacturer#1	almond antique burnished rose metallic	2	5702.650000000001	1173.15	1753.76	1425.6625000000001
+Manufacturer#1	almond antique chartreuse lavender yellow	34	7117.070000000001	1173.15	1753.76	1423.4140000000002
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	7576.580000000002	1173.15	1753.76	1515.3160000000003
+Manufacturer#1	almond aquamarine burnished black steel	28	6403.430000000001	1414.42	1753.76	1600.8575000000003
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	4649.670000000001	1414.42	1632.66	1549.8900000000003
+Manufacturer#2	almond antique violet chocolate turquoise	14	5523.360000000001	1690.68	2031.98	1841.1200000000001
+Manufacturer#2	almond antique violet turquoise frosted	40	7222.02	1690.68	2031.98	1805.505
+Manufacturer#2	almond aquamarine midnight light salmon	2	8923.62	1690.68	2031.98	1784.7240000000002
+Manufacturer#2	almond aquamarine rose maroon antique	25	7232.9400000000005	1698.66	2031.98	1808.2350000000001
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5432.240000000001	1698.66	2031.98	1810.746666666667
+Manufacturer#3	almond antique chartreuse khaki white	17	4272.34	1190.27	1671.68	1424.1133333333335
+Manufacturer#3	almond antique forest lavender goldenrod	14	6195.32	1190.27	1922.98	1548.83
+Manufacturer#3	almond antique metallic orange dim	19	7532.61	1190.27	1922.98	1506.522
+Manufacturer#3	almond antique misty red olive	1	5860.929999999999	1190.27	1922.98	1465.2324999999998
+Manufacturer#3	almond antique olive coral navajo	45	4670.66	1337.29	1922.98	1556.8866666666665
+Manufacturer#4	almond antique gainsboro frosted violet	10	4202.35	1206.26	1620.67	1400.7833333333335
+Manufacturer#4	almond antique violet mint lemon	39	6047.27	1206.26	1844.92	1511.8175
+Manufacturer#4	almond aquamarine floral ivory bisque	27	7337.620000000001	1206.26	1844.92	1467.5240000000001
+Manufacturer#4	almond aquamarine yellow dodger mint	7	5716.950000000001	1206.26	1844.92	1429.2375000000002
+Manufacturer#4	almond azure aquamarine papaya violet	12	4341.530000000001	1206.26	1844.92	1447.176666666667
+Manufacturer#5	almond antique blue firebrick mint	31	5190.08	1611.66	1789.69	1730.0266666666666
+Manufacturer#5	almond antique medium spring khaki	6	6208.18	1018.1	1789.69	1552.045
+Manufacturer#5	almond antique sky peru orange	2	7672.66	1018.1	1789.69	1534.532
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	5882.969999999999	1018.1	1788.73	1470.7424999999998
+Manufacturer#5	almond azure blanched chiffon midnight	23	4271.3099999999995	1018.1	1788.73	1423.7699999999998
+PREHOOK: query: -- 19. testUDAFsWithGBY
+select  p_mfgr,p_name, p_size, p_retailprice, 
+sum(p_retailprice) over w1 as s, 
+min(p_retailprice) as mi ,
+max(p_retailprice) as ma ,
+avg(p_retailprice) over w1 as ag
+from part
+group by p_mfgr,p_name, p_size, p_retailprice
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 19. testUDAFsWithGBY
+select  p_mfgr,p_name, p_size, p_retailprice, 
+sum(p_retailprice) over w1 as s, 
+min(p_retailprice) as mi ,
+max(p_retailprice) as ma ,
+avg(p_retailprice) over w1 as ag
+from part
+group by p_mfgr,p_name, p_size, p_retailprice
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1173.15	4529.5	1173.15	1173.15	1509.8333333333333
+Manufacturer#1	almond antique chartreuse lavender yellow	34	1753.76	5943.92	1753.76	1753.76	1485.98
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	1602.59	7576.58	1602.59	1602.59	1515.316
+Manufacturer#1	almond aquamarine burnished black steel	28	1414.42	6403.43	1414.42	1414.42	1600.8575
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	1632.66	4649.67	1632.66	1632.66	1549.89
+Manufacturer#2	almond antique violet chocolate turquoise	14	1690.68	5523.360000000001	1690.68	1690.68	1841.1200000000001
+Manufacturer#2	almond antique violet turquoise frosted	40	1800.7	7222.02	1800.7	1800.7	1805.505
+Manufacturer#2	almond aquamarine midnight light salmon	2	2031.98	8923.62	2031.98	2031.98	1784.7240000000002
+Manufacturer#2	almond aquamarine rose maroon antique	25	1698.66	7232.9400000000005	1698.66	1698.66	1808.2350000000001
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	1701.6	5432.240000000001	1701.6	1701.6	1810.746666666667
+Manufacturer#3	almond antique chartreuse khaki white	17	1671.68	4272.34	1671.68	1671.68	1424.1133333333335
+Manufacturer#3	almond antique forest lavender goldenrod	14	1190.27	6195.32	1190.27	1190.27	1548.83
+Manufacturer#3	almond antique metallic orange dim	19	1410.39	7532.61	1410.39	1410.39	1506.522
+Manufacturer#3	almond antique misty red olive	1	1922.98	5860.929999999999	1922.98	1922.98	1465.2324999999998
+Manufacturer#3	almond antique olive coral navajo	45	1337.29	4670.66	1337.29	1337.29	1556.8866666666665
+Manufacturer#4	almond antique gainsboro frosted violet	10	1620.67	4202.35	1620.67	1620.67	1400.7833333333335
+Manufacturer#4	almond antique violet mint lemon	39	1375.42	6047.27	1375.42	1375.42	1511.8175
+Manufacturer#4	almond aquamarine floral ivory bisque	27	1206.26	7337.620000000001	1206.26	1206.26	1467.5240000000001
+Manufacturer#4	almond aquamarine yellow dodger mint	7	1844.92	5716.950000000001	1844.92	1844.92	1429.2375000000002
+Manufacturer#4	almond azure aquamarine papaya violet	12	1290.35	4341.530000000001	1290.35	1290.35	1447.176666666667
+Manufacturer#5	almond antique blue firebrick mint	31	1789.69	5190.08	1789.69	1789.69	1730.0266666666666
+Manufacturer#5	almond antique medium spring khaki	6	1611.66	6208.18	1611.66	1611.66	1552.045
+Manufacturer#5	almond antique sky peru orange	2	1788.73	7672.66	1788.73	1788.73	1534.532
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	1018.1	5882.969999999999	1018.1	1018.1	1470.7424999999998
+Manufacturer#5	almond azure blanched chiffon midnight	23	1464.48	4271.3099999999995	1464.48	1464.48	1423.7699999999998
+PREHOOK: query: -- 20. testSTATs
+select  p_mfgr,p_name, p_size, 
+stddev(p_retailprice) over w1 as sdev, 
+stddev_pop(p_retailprice) over w1 as sdev_pop, 
+collect_set(p_size) over w1 as uniq_size, 
+variance(p_retailprice) over w1 as var,
+corr(p_size, p_retailprice) over w1 as cor,
+covar_pop(p_size, p_retailprice) over w1 as covarp
+from part
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 20. testSTATs
+select  p_mfgr,p_name, p_size, 
+stddev(p_retailprice) over w1 as sdev, 
+stddev_pop(p_retailprice) over w1 as sdev_pop, 
+collect_set(p_size) over w1 as uniq_size, 
+variance(p_retailprice) over w1 as var,
+corr(p_size, p_retailprice) over w1 as cor,
+covar_pop(p_size, p_retailprice) over w1 as covarp
+from part
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	258.10677784349235	258.10677784349235	[2,34,6]	66619.10876874991	0.811328754177887	2801.7074999999995
+Manufacturer#1	almond antique burnished rose metallic	2	273.70217881648074	273.70217881648074	[2,34]	74912.8826888888	1.0	4128.782222222221
+Manufacturer#1	almond antique chartreuse lavender yellow	34	230.90151585470358	230.90151585470358	[2,34,6,28]	53315.51002399992	0.695639377397664	2210.7864
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	202.73109328368946	202.73109328368946	[2,34,6,28,42]	41099.896184	0.630785977101214	2009.9536000000007
+Manufacturer#1	almond aquamarine burnished black steel	28	121.6064517973862	121.6064517973862	[34,6,28,42]	14788.129118750014	0.2036684720435979	331.1337500000004
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	96.5751586416853	96.5751586416853	[6,28,42]	9326.761266666683	-1.4442181184933883E-4	-0.20666666666708502
+Manufacturer#2	almond antique violet chocolate turquoise	14	142.2363169751898	142.2363169751898	[14,40,2]	20231.169866666663	-0.49369526554523185	-1113.7466666666658
+Manufacturer#2	almond antique violet turquoise frosted	40	137.76306498840682	137.76306498840682	[14,40,2,25]	18978.662075	-0.5205630897335946	-1004.4812499999995
+Manufacturer#2	almond aquamarine midnight light salmon	2	130.03972279269132	130.03972279269132	[14,40,2,25,18]	16910.329504000005	-0.46908967495720255	-766.1791999999995
+Manufacturer#2	almond aquamarine rose maroon antique	25	135.55100986344584	135.55100986344584	[40,2,25,18]	18374.07627499999	-0.6091405874714462	-1128.1787499999987
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	156.44019460768044	156.44019460768044	[2,25,18]	24473.534488888927	-0.9571686373491608	-1441.4466666666676
+Manufacturer#3	almond antique chartreuse khaki white	17	196.7742266885805	196.7742266885805	[17,14,19]	38720.09628888887	0.5557168646224995	224.6944444444446
+Manufacturer#3	almond antique forest lavender goldenrod	14	275.14144189852607	275.14144189852607	[17,14,19,1]	75702.81305	-0.6720833036576083	-1296.9000000000003
+Manufacturer#3	almond antique metallic orange dim	19	260.23473614412046	260.23473614412046	[17,14,19,1,45]	67722.117896	-0.5703526513979519	-2129.0664
+Manufacturer#3	almond antique misty red olive	1	275.9139962356932	275.9139962356932	[14,19,1,45]	76128.53331875012	-0.577476899644802	-2547.7868749999993
+Manufacturer#3	almond antique olive coral navajo	45	260.5815918713796	260.5815918713796	[19,1,45]	67902.76602222225	-0.8710736366736884	-4099.731111111111
+Manufacturer#4	almond antique gainsboro frosted violet	10	170.13011889596618	170.13011889596618	[10,39,27]	28944.25735555559	-0.6656975320098423	-1347.4777777777779
+Manufacturer#4	almond antique violet mint lemon	39	242.26834609323197	242.26834609323197	[10,39,27,7]	58693.95151875002	-0.8051852719193339	-2537.328125
+Manufacturer#4	almond aquamarine floral ivory bisque	27	234.10001662537326	234.10001662537326	[10,39,27,7,12]	54802.817784000035	-0.6046935574240581	-1719.8079999999995
+Manufacturer#4	almond aquamarine yellow dodger mint	7	247.3342714197732	247.3342714197732	[39,27,7,12]	61174.24181875003	-0.5508665654707869	-1719.0368749999975
+Manufacturer#4	almond azure aquamarine papaya violet	12	283.3344330566893	283.3344330566893	[27,7,12]	80278.40095555557	-0.7755740084632333	-1867.4888888888881
+Manufacturer#5	almond antique blue firebrick mint	31	83.69879024746363	83.69879024746363	[31,6,2]	7005.487488888913	0.39004303087285047	418.9233333333353
+Manufacturer#5	almond antique medium spring khaki	6	316.68049612345885	316.68049612345885	[31,6,2,46]	100286.53662500004	-0.713612911776183	-4090.853749999999
+Manufacturer#5	almond antique sky peru orange	2	285.40506298242155	285.40506298242155	[31,6,2,46,23]	81456.04997600002	-0.712858514567818	-3297.2011999999986
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	285.43749038756283	285.43749038756283	[6,2,46,23]	81474.56091875004	-0.984128787153391	-4871.028125000002
+Manufacturer#5	almond azure blanched chiffon midnight	23	315.9225931564038	315.9225931564038	[2,46,23]	99807.08486666664	-0.9978877469246936	-5664.856666666666
+PREHOOK: query: -- 21. testDISTs
+select  p_mfgr,p_name, p_size, 
+histogram_numeric(p_retailprice, 5) over w1 as hist, 
+percentile(p_partkey, 0.5) over w1 as per,
+row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn
+from part
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 21. testDISTs
+select  p_mfgr,p_name, p_size, 
+histogram_numeric(p_retailprice, 5) over w1 as hist, 
+percentile(p_partkey, 0.5) over w1 as per,
+row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn
+from part
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	[{"x":1173.15,"y":2.0},{"x":1602.59,"y":1.0},{"x":1753.76,"y":1.0}]	115872.0	2
+Manufacturer#1	almond antique burnished rose metallic	2	[{"x":1173.15,"y":2.0},{"x":1753.76,"y":1.0}]	121152.0	1
+Manufacturer#1	almond antique chartreuse lavender yellow	34	[{"x":1173.15,"y":2.0},{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1753.76,"y":1.0}]	110592.0	3
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	[{"x":1173.15,"y":1.0},{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0},{"x":1753.76,"y":1.0}]	86428.0	4
+Manufacturer#1	almond aquamarine burnished black steel	28	[{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0},{"x":1753.76,"y":1.0}]	86098.0	5
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	[{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0}]	86428.0	6
+Manufacturer#2	almond antique violet chocolate turquoise	14	[{"x":1690.68,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}]	146985.0	1
+Manufacturer#2	almond antique violet turquoise frosted	40	[{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}]	139825.5	2
+Manufacturer#2	almond aquamarine midnight light salmon	2	[{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}]	146985.0	3
+Manufacturer#2	almond aquamarine rose maroon antique	25	[{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}]	169347.0	4
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	[{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":2031.98,"y":1.0}]	146985.0	5
+Manufacturer#3	almond antique chartreuse khaki white	17	[{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0}]	90681.0	1
+Manufacturer#3	almond antique forest lavender goldenrod	14	[{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}]	65831.5	2
+Manufacturer#3	almond antique metallic orange dim	19	[{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}]	90681.0	3
+Manufacturer#3	almond antique misty red olive	1	[{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}]	76690.0	4
+Manufacturer#3	almond antique olive coral navajo	45	[{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}]	112398.0	5
+Manufacturer#4	almond antique gainsboro frosted violet	10	[{"x":1206.26,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0}]	48427.0	1
+Manufacturer#4	almond antique violet mint lemon	39	[{"x":1206.26,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0},{"x":1844.92,"y":1.0}]	46844.0	2
+Manufacturer#4	almond aquamarine floral ivory bisque	27	[{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0},{"x":1844.92,"y":1.0}]	45261.0	3
+Manufacturer#4	almond aquamarine yellow dodger mint	7	[{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1375.42,"y":1.0},{"x":1844.92,"y":1.0}]	39309.0	4
+Manufacturer#4	almond azure aquamarine papaya violet	12	[{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1844.92,"y":1.0}]	33357.0	5
+Manufacturer#5	almond antique blue firebrick mint	31	[{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}]	155733.0	1
+Manufacturer#5	almond antique medium spring khaki	6	[{"x":1018.1,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}]	99201.0	2
+Manufacturer#5	almond antique sky peru orange	2	[{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}]	78486.0	3
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	[{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0}]	60577.5	4
+Manufacturer#5	almond azure blanched chiffon midnight	23	[{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1788.73,"y":1.0}]	78486.0	5
+PREHOOK: query: -- 22. testViewAsTableInputWithWindowing
+create view IF NOT EXISTS mfgr_price_view as 
+select p_mfgr, p_brand, 
+round(sum(p_retailprice),2) as s 
+from part 
+group by p_mfgr, p_brand
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@part
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mfgr_price_view
+POSTHOOK: query: -- 22. testViewAsTableInputWithWindowing
+create view IF NOT EXISTS mfgr_price_view as 
+select p_mfgr, p_brand, 
+round(sum(p_retailprice),2) as s 
+from part 
+group by p_mfgr, p_brand
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@part
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mfgr_price_view
+PREHOOK: query: select * 
+from (
+select p_mfgr, p_brand, s, 
+round(sum(s) over w1 , 2)  as s1
+from mfgr_price_view 
+window w1 as (distribute by p_mfgr sort by p_mfgr )
+) sq
+order by p_mfgr, p_brand
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mfgr_price_view
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * 
+from (
+select p_mfgr, p_brand, s, 
+round(sum(s) over w1 , 2)  as s1
+from mfgr_price_view 
+window w1 as (distribute by p_mfgr sort by p_mfgr )
+) sq
+order by p_mfgr, p_brand
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mfgr_price_view
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	Brand#12	4800.84	8749.73
+Manufacturer#1	Brand#14	2346.3	8749.73
+Manufacturer#1	Brand#15	1602.59	8749.73
+Manufacturer#2	Brand#22	3491.38	8923.62
+Manufacturer#2	Brand#23	2031.98	8923.62
+Manufacturer#2	Brand#24	1698.66	8923.62
+Manufacturer#2	Brand#25	1701.6	8923.62
+Manufacturer#3	Brand#31	1671.68	7532.61
+Manufacturer#3	Brand#32	3333.37	7532.61
+Manufacturer#3	Brand#34	1337.29	7532.61
+Manufacturer#3	Brand#35	1190.27	7532.61
+Manufacturer#4	Brand#41	4755.94	7337.62
+Manufacturer#4	Brand#42	2581.68	7337.62
+Manufacturer#5	Brand#51	1611.66	7672.66
+Manufacturer#5	Brand#52	3254.17	7672.66
+Manufacturer#5	Brand#53	2806.83	7672.66
+PREHOOK: query: select p_mfgr, p_brand, s, 
+round(sum(s) over w1 ,2)  as s1
+from mfgr_price_view 
+window w1 as (distribute by p_mfgr sort by p_brand rows between 2 preceding and current row)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mfgr_price_view
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, p_brand, s, 
+round(sum(s) over w1 ,2)  as s1
+from mfgr_price_view 
+window w1 as (distribute by p_mfgr sort by p_brand rows between 2 preceding and current row)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mfgr_price_view
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	Brand#12	4800.84	4800.84
+Manufacturer#1	Brand#14	2346.3	7147.14
+Manufacturer#1	Brand#15	1602.59	8749.73
+Manufacturer#2	Brand#22	3491.38	3491.38
+Manufacturer#2	Brand#23	2031.98	5523.36
+Manufacturer#2	Brand#24	1698.66	7222.02
+Manufacturer#2	Brand#25	1701.6	5432.24
+Manufacturer#3	Brand#31	1671.68	1671.68
+Manufacturer#3	Brand#32	3333.37	5005.05
+Manufacturer#3	Brand#34	1337.29	6342.34
+Manufacturer#3	Brand#35	1190.27	5860.93
+Manufacturer#4	Brand#41	4755.94	4755.94
+Manufacturer#4	Brand#42	2581.68	7337.62
+Manufacturer#5	Brand#51	1611.66	1611.66
+Manufacturer#5	Brand#52	3254.17	4865.83
+Manufacturer#5	Brand#53	2806.83	7672.66
+PREHOOK: query: -- 23. testCreateViewWithWindowingQuery
+create view IF NOT EXISTS mfgr_brand_price_view as 
+select p_mfgr, p_brand, 
+sum(p_retailprice) over w1  as s
+from part 
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row)
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@part
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mfgr_brand_price_view
+POSTHOOK: query: -- 23. testCreateViewWithWindowingQuery
+create view IF NOT EXISTS mfgr_brand_price_view as 
+select p_mfgr, p_brand, 
+sum(p_retailprice) over w1  as s
+from part 
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row)
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@part
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mfgr_brand_price_view
+PREHOOK: query: select * from mfgr_brand_price_view
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mfgr_brand_price_view
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * from mfgr_brand_price_view
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mfgr_brand_price_view
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	Brand#12	4100.06
+Manufacturer#1	Brand#12	4649.670000000001
+Manufacturer#1	Brand#12	4770.77
+Manufacturer#1	Brand#14	1173.15
+Manufacturer#1	Brand#14	2346.3
+Manufacturer#1	Brand#15	4529.5
+Manufacturer#2	Brand#22	1690.68
+Manufacturer#2	Brand#22	3491.38
+Manufacturer#2	Brand#23	5523.360000000001
+Manufacturer#2	Brand#24	5531.34
+Manufacturer#2	Brand#25	5432.240000000001
+Manufacturer#3	Brand#31	1671.68
+Manufacturer#3	Brand#32	4272.34
+Manufacturer#3	Brand#32	4523.639999999999
+Manufacturer#3	Brand#34	4670.66
+Manufacturer#3	Brand#35	2861.95
+Manufacturer#4	Brand#41	1620.67
+Manufacturer#4	Brand#41	4341.530000000001
+Manufacturer#4	Brand#41	4426.6
+Manufacturer#4	Brand#42	2996.09
+Manufacturer#4	Brand#42	4202.35
+Manufacturer#5	Brand#51	3401.3500000000004
+Manufacturer#5	Brand#52	1789.69
+Manufacturer#5	Brand#52	4271.3099999999995
+Manufacturer#5	Brand#53	4418.49
+Manufacturer#5	Brand#53	5190.08
+PREHOOK: query: -- 24. testLateralViews
+select p_mfgr, p_name, 
+lv_col, p_size, sum(p_size) over w1   as s
+from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p 
+lateral view explode(arr) part_lv as lv_col
+window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- 24. testLateralViews
+select p_mfgr, p_name, 
+lv_col, p_size, sum(p_size) over w1   as s
+from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p 
+lateral view explode(arr) part_lv as lv_col
+window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	1	2	2
+Manufacturer#1	almond antique burnished rose metallic	1	2	4
+Manufacturer#1	almond antique burnished rose metallic	2	2	6
+Manufacturer#1	almond antique burnished rose metallic	2	2	6
+Manufacturer#1	almond antique burnished rose metallic	3	2	6
+Manufacturer#1	almond antique burnished rose metallic	3	2	6
+Manufacturer#1	almond antique chartreuse lavender yellow	1	34	90
+Manufacturer#1	almond antique chartreuse lavender yellow	2	34	96
+Manufacturer#1	almond antique chartreuse lavender yellow	3	34	102
+Manufacturer#1	almond antique salmon chartreuse burlywood	1	6	10
+Manufacturer#1	almond antique salmon chartreuse burlywood	2	6	14
+Manufacturer#1	almond antique salmon chartreuse burlywood	3	6	18
+Manufacturer#1	almond aquamarine burnished black steel	1	28	40
+Manufacturer#1	almond aquamarine burnished black steel	2	28	62
+Manufacturer#1	almond aquamarine burnished black steel	3	28	84
+Manufacturer#1	almond aquamarine pink moccasin thistle	1	42	110
+Manufacturer#1	almond aquamarine pink moccasin thistle	2	42	118
+Manufacturer#1	almond aquamarine pink moccasin thistle	3	42	126
+Manufacturer#2	almond antique violet chocolate turquoise	1	14	18
+Manufacturer#2	almond antique violet chocolate turquoise	2	14	30
+Manufacturer#2	almond antique violet chocolate turquoise	3	14	42
+Manufacturer#2	almond antique violet turquoise frosted	1	40	90
+Manufacturer#2	almond antique violet turquoise frosted	2	40	105
+Manufacturer#2	almond antique violet turquoise frosted	3	40	120
+Manufacturer#2	almond aquamarine midnight light salmon	1	2	2
+Manufacturer#2	almond aquamarine midnight light salmon	2	2	4
+Manufacturer#2	almond aquamarine midnight light salmon	3	2	6
+Manufacturer#2	almond aquamarine rose maroon antique	1	25	61
+Manufacturer#2	almond aquamarine rose maroon antique	2	25	68
+Manufacturer#2	almond aquamarine rose maroon antique	3	25	75
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	1	18	46
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	2	18	50
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	3	18	54
+Manufacturer#3	almond antique chartreuse khaki white	1	17	45
+Manufacturer#3	almond antique chartreuse khaki white	2	17	48
+Manufacturer#3	almond antique chartreuse khaki white	3	17	51
+Manufacturer#3	almond antique forest lavender goldenrod	1	14	16
+Manufacturer#3	almond antique forest lavender goldenrod	2	14	29
+Manufacturer#3	almond antique forest lavender goldenrod	3	14	42
+Manufacturer#3	almond antique metallic orange dim	1	19	53
+Manufacturer#3	almond antique metallic orange dim	2	19	55
+Manufacturer#3	almond antique metallic orange dim	3	19	57
+Manufacturer#3	almond antique misty red olive	1	1	1
+Manufacturer#3	almond antique misty red olive	2	1	2
+Manufacturer#3	almond antique misty red olive	3	1	3
+Manufacturer#3	almond antique olive coral navajo	1	45	83
+Manufacturer#3	almond antique olive coral navajo	2	45	109
+Manufacturer#3	almond antique olive coral navajo	3	45	135
+Manufacturer#4	almond antique gainsboro frosted violet	1	10	24
+Manufacturer#4	almond antique gainsboro frosted violet	2	10	27
+Manufacturer#4	almond antique gainsboro frosted violet	3	10	30
+Manufacturer#4	almond antique violet mint lemon	1	39	93
+Manufacturer#4	almond antique violet mint lemon	2	39	105
+Manufacturer#4	almond antique violet mint lemon	3	39	117
+Manufacturer#4	almond aquamarine floral ivory bisque	1	27	51
+Manufacturer#4	almond aquamarine floral ivory bisque	2	27	66
+Manufacturer#4	almond aquamarine floral ivory bisque	3	27	81
+Manufacturer#4	almond aquamarine yellow dodger mint	1	7	7
+Manufacturer#4	almond aquamarine yellow dodger mint	2	7	14
+Manufacturer#4	almond aquamarine yellow dodger mint	3	7	21
+Manufacturer#4	almond azure aquamarine papaya violet	1	12	32
+Manufacturer#4	almond azure aquamarine papaya violet	2	12	34
+Manufacturer#4	almond azure aquamarine papaya violet	3	12	36
+Manufacturer#5	almond antique blue firebrick mint	1	31	77
+Manufacturer#5	almond antique blue firebrick mint	2	31	85
+Manufacturer#5	almond antique blue firebrick mint	3	31	93
+Manufacturer#5	almond antique medium spring khaki	1	6	10
+Manufacturer#5	almond antique medium spring khaki	2	6	14
+Manufacturer#5	almond antique medium spring khaki	3	6	18
+Manufacturer#5	almond antique sky peru orange	1	2	2
+Manufacturer#5	almond antique sky peru orange	2	2	4
+Manufacturer#5	almond antique sky peru orange	3	2	6
+Manufacturer#5	almond aquamarine dodger light gainsboro	1	46	108
+Manufacturer#5	almond aquamarine dodger light gainsboro	2	46	123
+Manufacturer#5	almond aquamarine dodger light gainsboro	3	46	138
+Manufacturer#5	almond azure blanched chiffon midnight	1	23	35
+Manufacturer#5	almond azure blanched chiffon midnight	2	23	52
+Manufacturer#5	almond azure blanched chiffon midnight	3	23	69
+PREHOOK: query: -- 25. testMultipleInserts3SWQs
+CREATE TABLE part_1( 
+p_mfgr STRING, 
+p_name STRING, 
+p_size INT, 
+r INT, 
+dr INT, 
+s DOUBLE)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_1
+POSTHOOK: query: -- 25. testMultipleInserts3SWQs
+CREATE TABLE part_1( 
+p_mfgr STRING, 
+p_name STRING, 
+p_size INT, 
+r INT, 
+dr INT, 
+s DOUBLE)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_1
+PREHOOK: query: CREATE TABLE part_2( 
+p_mfgr STRING, 
+p_name STRING, 
+p_size INT, 
+r INT, 
+dr INT, 
+cud INT,  
+s2 DOUBLE, 
+fv1 INT)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_2
+POSTHOOK: query: CREATE TABLE part_2( 
+p_mfgr STRING, 
+p_name STRING, 
+p_size INT, 
+r INT, 
+dr INT, 
+cud INT,  
+s2 DOUBLE, 
+fv1 INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_2
+PREHOOK: query: CREATE TABLE part_3( 
+p_mfgr STRING, 
+p_name STRING, 
+p_size INT, 
+c INT, 
+ca INT, 
+fv INT)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_3
+POSTHOOK: query: CREATE TABLE part_3( 
+p_mfgr STRING, 
+p_name STRING, 
+p_size INT, 
+c INT, 
+ca INT, 
+fv INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_3
+PREHOOK: query: from part 
+INSERT OVERWRITE TABLE part_1 
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name ) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, 
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row)  as s
+INSERT OVERWRITE TABLE part_2 
+select  p_mfgr,p_name, p_size,  
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+cume_dist() over(distribute by p_mfgr sort by p_name) as cud, 
+round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, 
+first_value(p_size) over w1  as fv1
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) 
+INSERT OVERWRITE TABLE part_3 
+select  p_mfgr,p_name, p_size,  
+count(*) over(distribute by p_mfgr sort by p_name) as c, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as ca, 
+first_value(p_size) over w1  as fv
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Output: default@part_1
+PREHOOK: Output: default@part_2
+PREHOOK: Output: default@part_3
+POSTHOOK: query: from part 
+INSERT OVERWRITE TABLE part_1 
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name ) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, 
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row)  as s
+INSERT OVERWRITE TABLE part_2 
+select  p_mfgr,p_name, p_size,  
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+cume_dist() over(distribute by p_mfgr sort by p_name) as cud, 
+round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, 
+first_value(p_size) over w1  as fv1
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) 
+INSERT OVERWRITE TABLE part_3 
+select  p_mfgr,p_name, p_size,  
+count(*) over(distribute by p_mfgr sort by p_name) as c, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as ca, 
+first_value(p_size) over w1  as fv
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Output: default@part_1
+POSTHOOK: Output: default@part_2
+POSTHOOK: Output: default@part_3
+POSTHOOK: Lineage: part_1.dr SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_1.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ]
+POSTHOOK: Lineage: part_1.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ]
+POSTHOOK: Lineage: part_1.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ]
+POSTHOOK: Lineage: part_1.r SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_1.s SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_2.cud SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_2.dr SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_2.fv1 SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_2.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ]
+POSTHOOK: Lineage: part_2.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ]
+POSTHOOK: Lineage: part_2.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ]
+POSTHOOK: Lineage: part_2.r SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_2.s2 SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_3.c SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_3.ca SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_3.fv SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_3.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ]
+POSTHOOK: Lineage: part_3.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ]
+POSTHOOK: Lineage: part_3.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ]
+PREHOOK: query: select * from part_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_1
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	1173.15
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	2346.3
+Manufacturer#1	almond antique chartreuse lavender yellow	34	3	2	4100.06
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	4	3	5702.650000000001
+Manufacturer#1	almond aquamarine burnished black steel	28	5	4	7117.070000000001
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	6	5	8749.730000000001
+Manufacturer#2	almond antique violet chocolate turquoise	14	1	1	1690.68
+Manufacturer#2	almond antique violet turquoise frosted	40	2	2	3491.38
+Manufacturer#2	almond aquamarine midnight light salmon	2	3	3	5523.360000000001
+Manufacturer#2	almond aquamarine rose maroon antique	25	4	4	7222.02
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5	5	8923.62
+Manufacturer#3	almond antique chartreuse khaki white	17	1	1	1671.68
+Manufacturer#3	almond antique forest lavender goldenrod	14	2	2	2861.95
+Manufacturer#3	almond antique metallic orange dim	19	3	3	4272.34
+Manufacturer#3	almond antique misty red olive	1	4	4	6195.32
+Manufacturer#3	almond antique olive coral navajo	45	5	5	7532.61
+Manufacturer#4	almond antique gainsboro frosted violet	10	1	1	1620.67
+Manufacturer#4	almond antique violet mint lemon	39	2	2	2996.09
+Manufacturer#4	almond aquamarine floral ivory bisque	27	3	3	4202.35
+Manufacturer#4	almond aquamarine yellow dodger mint	7	4	4	6047.27
+Manufacturer#4	almond azure aquamarine papaya violet	12	5	5	7337.620000000001
+Manufacturer#5	almond antique blue firebrick mint	31	1	1	1789.69
+Manufacturer#5	almond antique medium spring khaki	6	2	2	3401.3500000000004
+Manufacturer#5	almond antique sky peru orange	2	3	3	5190.08
+Manufacturer#5	almond aquamarine dodger light gainsboro	46	4	4	6208.18
+Manufacturer#5	almond azure blanched chiffon midnight	23	5	5	7672.66
+PREHOOK: query: select * from part_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_2
+#### A masked pattern was here ####
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	0	4.0	2
+Manufacturer#1	almond antique burnished rose metallic	2	1	1	0	4.0	2
+Manufacturer#1	almond antique chartreuse lavender yellow	34	3	2	0	34.0	2
+Manufacturer#1	almond antique salmon chartreuse burlywood	6	4	3	0	10.0	2
+Manufacturer#1	almond aquamarine burnished black steel	28	5	4	0	28.0	34
+Manufacturer#1	almond aquamarine pink moccasin thistle	42	6	5	1	42.0	6
+Manufacturer#2	almond antique violet chocolate turquoise	14	1	1	0	14.0	14
+Manufacturer#2	almond antique violet turquoise frosted	40	2	2	0	40.0	14
+Manufacturer#2	almond aquamarine midnight light salmon	2	3	3	0	2.0	14
+Manufacturer#2	almond aquamarine rose maroon antique	25	4	4	0	25.0	40
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18	5	5	1	32.0	2
+Manufacturer#3	almond antique chartreuse khaki white	17	1	1	0	31.0	17
+Manufacturer#3	almond antique forest 

<TRUNCATED>

[49/50] [abbrv] hive git commit: HIVE-11329: Column prefix in key of hbase column prefix map (Wojciech Indyk, reviewed by Sergio Pena)

Posted by xu...@apache.org.

HIVE-11329: Column prefix in key of hbase column prefix map (Wojciech Indyk, reviewed by Sergio Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d51c62a4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d51c62a4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d51c62a4

Branch: refs/heads/beeline-cli
Commit: d51c62a455eb08ee49f10ea2e117ca90de0bf47b
Parents: 7281a46
Author: Sergio Pena <se...@cloudera.com>
Authored: Tue Sep 8 08:39:10 2015 -0500
Committer: Sergio Pena <se...@cloudera.com>
Committed: Tue Sep 8 08:39:10 2015 -0500

----------------------------------------------------------------------
 .../hadoop/hive/hbase/ColumnMappings.java       |  5 ++
 .../apache/hadoop/hive/hbase/HBaseSerDe.java    | 19 +++++-
 .../hadoop/hive/hbase/HBaseSerDeParameters.java |  8 ++-
 .../hadoop/hive/hbase/HBaseStorageHandler.java  | 13 ++--
 .../hadoop/hive/hbase/LazyHBaseCellMap.java     | 19 +++++-
 .../apache/hadoop/hive/hbase/LazyHBaseRow.java  |  5 +-
 .../hadoop/hive/hbase/LazyHBaseCellMapTest.java | 72 ++++++++++++++++++++
 .../positive/hbase_binary_map_queries_prefix.q  | 15 +++-
 .../hbase_binary_map_queries_prefix.q.out       | 40 +++++++++++
 9 files changed, 177 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d51c62a4/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
----------------------------------------------------------------------
diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
index 5cb3752..d7d4461 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
@@ -354,6 +354,7 @@ public class ColumnMappings implements Iterable<ColumnMappings.ColumnMapping> {
     String mappingSpec;
     String qualifierPrefix;
     byte[] qualifierPrefixBytes;
+    boolean doPrefixCut;
 
     public String getColumnName() {
       return columnName;
@@ -399,6 +400,10 @@ public class ColumnMappings implements Iterable<ColumnMappings.ColumnMapping> {
       return qualifierPrefixBytes;
     }
 
+    public boolean isDoPrefixCut(){
+      return doPrefixCut;
+    }
+
     public boolean isCategory(ObjectInspector.Category category) {
       return columnType.getCategory() == category;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/d51c62a4/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
----------------------------------------------------------------------
diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
index e843bd8..ed2df5f 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
@@ -93,6 +93,13 @@ public class HBaseSerDe extends AbstractSerDe {
    **/
   public static final String SERIALIZATION_TYPE = "serialization.type";
 
+  /**
+   * Defines if the prefix column from hbase should be hidden.
+   * It works only when @HBASE_COLUMNS_REGEX_MATCHING is true.
+   * Default value of this parameter is false
+   */
+  public static final String HBASE_COLUMNS_PREFIX_HIDE = "hbase.columns.mapping.prefix.hide";
+
   private ObjectInspector cachedObjectInspector;
   private LazyHBaseRow cachedHBaseRow;
 
@@ -136,6 +143,11 @@ public class HBaseSerDe extends AbstractSerDe {
       throws SerDeException {
     return parseColumnsMapping(columnsMappingSpec, true);
   }
+
+  public static ColumnMappings parseColumnsMapping(
+          String columnsMappingSpec, boolean doColumnRegexMatching) throws SerDeException {
+	return parseColumnsMapping(columnsMappingSpec, doColumnRegexMatching, false);
+  }
   /**
    * Parses the HBase columns mapping specifier to identify the column families, qualifiers
    * and also caches the byte arrays corresponding to them. One of the Hive table
@@ -143,11 +155,12 @@ public class HBaseSerDe extends AbstractSerDe {
    *
    * @param columnsMappingSpec string hbase.columns.mapping specified when creating table
    * @param doColumnRegexMatching whether to do a regex matching on the columns or not
+   * @param hideColumnPrefix whether to hide a prefix of column mapping in key name in a map (works only if @doColumnRegexMatching is true)
    * @return List<ColumnMapping> which contains the column mapping information by position
    * @throws org.apache.hadoop.hive.serde2.SerDeException
    */
   public static ColumnMappings parseColumnsMapping(
-      String columnsMappingSpec, boolean doColumnRegexMatching) throws SerDeException {
+      String columnsMappingSpec, boolean doColumnRegexMatching, boolean hideColumnPrefix) throws SerDeException {
 
     if (columnsMappingSpec == null) {
       throw new SerDeException("Error: hbase.columns.mapping missing for this HBase table.");
@@ -206,6 +219,8 @@ public class HBaseSerDe extends AbstractSerDe {
             // we have a prefix with a wildcard
             columnMapping.qualifierPrefix = parts[1].substring(0, parts[1].length() - 2);
             columnMapping.qualifierPrefixBytes = Bytes.toBytes(columnMapping.qualifierPrefix);
+            //pass a flag to hide prefixes
+            columnMapping.doPrefixCut=hideColumnPrefix;
             // we weren't provided any actual qualifier name. Set these to
             // null.
             columnMapping.qualifierName = null;
@@ -214,6 +229,8 @@ public class HBaseSerDe extends AbstractSerDe {
             // set the regular provided qualifier names
             columnMapping.qualifierName = parts[1];
             columnMapping.qualifierNameBytes = Bytes.toBytes(parts[1]);
+            //if there is no prefix then we don't cut anything
+            columnMapping.doPrefixCut=false;
           }
         } else {
           columnMapping.qualifierName = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/d51c62a4/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
----------------------------------------------------------------------
diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
index 71f5da5..43c1f0c 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
@@ -22,6 +22,8 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Properties;
 
+import javax.annotation.Nullable;
+
 import org.apache.avro.Schema;
 import org.apache.avro.reflect.ReflectData;
 import org.apache.hadoop.conf.Configuration;
@@ -38,8 +40,6 @@ import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.util.ReflectionUtils;
 
-import javax.annotation.Nullable;
-
 /**
  * HBaseSerDeParameters encapsulates SerDeParameters and additional configurations that are specific for
  * HBaseSerDe.
@@ -57,6 +57,7 @@ public class HBaseSerDeParameters {
   private final String columnMappingString;
   private final ColumnMappings columnMappings;
   private final boolean doColumnRegexMatching;
+  private final boolean doColumnPrefixCut;
 
   private final long putTimestamp;
   private final HBaseKeyFactory keyFactory;
@@ -69,8 +70,9 @@ public class HBaseSerDeParameters {
     columnMappingString = tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING);
     doColumnRegexMatching =
         Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
+    doColumnPrefixCut = Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_PREFIX_HIDE, "false"));
     // Parse and initialize the HBase columns mapping
-    columnMappings = HBaseSerDe.parseColumnsMapping(columnMappingString, doColumnRegexMatching);
+    columnMappings = HBaseSerDe.parseColumnsMapping(columnMappingString, doColumnRegexMatching, doColumnPrefixCut);
 
     // Build the type property string if not supplied
     String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);

http://git-wip-us.apache.org/repos/asf/hive/blob/d51c62a4/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
----------------------------------------------------------------------
diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
index c0eff23..cedb6e0 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
@@ -44,13 +44,10 @@ import org.apache.hadoop.hbase.mapred.TableOutputFormat;
 import org.apache.hadoop.hbase.mapreduce.TableInputFormatBase;
 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
 import org.apache.hadoop.hbase.security.User;
-import org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier;
-import org.apache.hadoop.hbase.security.token.AuthenticationTokenSelector;
+import org.apache.hadoop.hbase.security.token.TokenUtil;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
-import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
-import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.hadoop.hive.metastore.HiveMetaHook;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -66,15 +63,11 @@ import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.hbase.security.token.TokenUtil;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.OutputFormat;
 import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.StringUtils;
-import org.apache.zookeeper.KeeperException;
 
 import com.yammer.metrics.core.MetricsRegistry;
 
@@ -348,7 +341,9 @@ public class HBaseStorageHandler extends DefaultStorageHandler
       HBaseSerDe.HBASE_COLUMNS_MAPPING,
       tableProperties.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING));
     jobProperties.put(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING,
-        tableProperties.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
+            tableProperties.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
+    jobProperties.put(HBaseSerDe.HBASE_COLUMNS_PREFIX_HIDE,
+            tableProperties.getProperty(HBaseSerDe.HBASE_COLUMNS_PREFIX_HIDE, "false"));
     jobProperties.put(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE,
       tableProperties.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE,"string"));
     String scanCache = tableProperties.getProperty(HBaseSerDe.HBASE_SCAN_CACHE);

http://git-wip-us.apache.org/repos/asf/hive/blob/d51c62a4/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
----------------------------------------------------------------------
diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
index 09cbf52..459d934 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
@@ -45,8 +45,9 @@ public class LazyHBaseCellMap extends LazyMap {
   private byte [] columnFamilyBytes;
   private byte[] qualPrefix;
   private List<Boolean> binaryStorage;
+  private boolean hideQualPrefix;
 
-  /**
+	/**
    * Construct a LazyCellMap object with the ObjectInspector.
    * @param oi
    */
@@ -62,15 +63,23 @@ public class LazyHBaseCellMap extends LazyMap {
     init(r, columnFamilyBytes, binaryStorage, null);
   }
 
+	public void init(
+			Result r,
+			byte [] columnFamilyBytes,
+			List<Boolean> binaryStorage, byte[] qualPrefix) {
+		init(r, columnFamilyBytes, binaryStorage, qualPrefix, false);
+	}
+
   public void init(
       Result r,
       byte [] columnFamilyBytes,
-      List<Boolean> binaryStorage, byte[] qualPrefix) {
+      List<Boolean> binaryStorage, byte[] qualPrefix, boolean hideQualPrefix) {
     this.isNull = false;
     this.result = r;
     this.columnFamilyBytes = columnFamilyBytes;
     this.binaryStorage = binaryStorage;
     this.qualPrefix = qualPrefix;
+    this.hideQualPrefix = hideQualPrefix;
     setParsed(false);
   }
 
@@ -106,7 +115,11 @@ public class LazyHBaseCellMap extends LazyMap {
               binaryStorage.get(0));
 
         ByteArrayRef keyRef = new ByteArrayRef();
-        keyRef.setData(e.getKey());
+		  if (qualPrefix!=null && hideQualPrefix){
+			  keyRef.setData(Bytes.tail(e.getKey(), e.getKey().length-qualPrefix.length)); //cut prefix from hive's map key
+		  }else{
+			  keyRef.setData(e.getKey()); //for non-prefix maps
+		  }
         key.init(keyRef, 0, keyRef.getData().length);
 
         // Value

http://git-wip-us.apache.org/repos/asf/hive/blob/d51c62a4/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java
----------------------------------------------------------------------
diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java
index 868d81f..003dd1c 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java
@@ -22,7 +22,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
-import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
 import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
@@ -36,6 +35,8 @@ import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp;
 import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * LazyObject for storing an HBase row.  The field of an HBase row can be
  * primitive or non-primitive.
@@ -148,7 +149,7 @@ public class LazyHBaseRow extends LazyStruct {
         // qualifier prefix to cherry pick the qualifiers that match the prefix instead of picking
         // up everything
         ((LazyHBaseCellMap) fields[fieldID]).init(
-            result, colMap.familyNameBytes, colMap.binaryStorage, colMap.qualifierPrefixBytes);
+            result, colMap.familyNameBytes, colMap.binaryStorage, colMap.qualifierPrefixBytes, colMap.isDoPrefixCut());
         return fields[fieldID].getObject();
       }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d51c62a4/hbase-handler/src/test/org/apache/hadoop/hive/hbase/LazyHBaseCellMapTest.java
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/LazyHBaseCellMapTest.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/LazyHBaseCellMapTest.java
new file mode 100644
index 0000000..eb13e14
--- /dev/null
+++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/LazyHBaseCellMapTest.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.hbase;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.Text;
+
+import junit.framework.TestCase;
+
+public class LazyHBaseCellMapTest extends TestCase {
+	public static final byte[] TEST_ROW = Bytes.toBytes("test-row");
+	public static final byte[] COLUMN_FAMILY = Bytes.toBytes("a");
+	public static final String QUAL_PREFIX = "col_";
+
+
+	public void testInitColumnPrefix() throws Exception {
+		Text nullSequence = new Text("\\N");
+		ObjectInspector oi = LazyFactory.createLazyObjectInspector(
+				TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0),
+				new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
+
+		LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
+
+		// Initialize a result
+		Cell[] cells = new KeyValue[2];
+
+		final String col1="1";
+		final String col2="2";
+		cells[0] = new KeyValue(TEST_ROW, COLUMN_FAMILY,
+				Bytes.toBytes(QUAL_PREFIX+col1), Bytes.toBytes("cfacol1"));
+		cells[1]=new KeyValue(TEST_ROW, COLUMN_FAMILY,
+				Bytes.toBytes(QUAL_PREFIX+col2), Bytes.toBytes("cfacol2"));
+
+		Result r = Result.create(cells);
+
+		List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
+		mapBinaryStorage.add(false);
+		mapBinaryStorage.add(false);
+
+		b.init(r, COLUMN_FAMILY, mapBinaryStorage, Bytes.toBytes(QUAL_PREFIX), true);
+
+		assertNotNull(b.getMapValueElement(new Text(col1)));
+		assertNotNull(b.getMapValueElement(new Text(col2)));
+
+	}
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/d51c62a4/hbase-handler/src/test/queries/positive/hbase_binary_map_queries_prefix.q
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/queries/positive/hbase_binary_map_queries_prefix.q b/hbase-handler/src/test/queries/positive/hbase_binary_map_queries_prefix.q
index 3d3f5ea..9ff4366 100644
--- a/hbase-handler/src/test/queries/positive/hbase_binary_map_queries_prefix.q
+++ b/hbase-handler/src/test/queries/positive/hbase_binary_map_queries_prefix.q
@@ -49,4 +49,17 @@ TBLPROPERTIES ("hbase.table.name"="t_hive_maps");
 
 SELECT * FROM t_ext_hbase_maps ORDER BY key;
 
-DROP TABLE t_ext_hbase_maps;
\ No newline at end of file
+DROP TABLE t_ext_hbase_maps;
+
+DROP TABLE t_ext_hbase_maps_cut_prefix;
+
+CREATE EXTERNAL TABLE t_ext_hbase_maps_cut_prefix(key STRING,
+                                       string_map_cols MAP<STRING, STRING>, simple_string_col STRING)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-string:string_.*,cf-string:simple_string_col"
+    ,"hbase.columns.mapping.prefix.hide"="true")
+TBLPROPERTIES ("hbase.table.name"="t_hive_maps");
+
+SELECT * FROM t_ext_hbase_maps_cut_prefix ORDER BY key;
+
+DROP TABLE t_ext_hbase_maps_cut_prefix;

http://git-wip-us.apache.org/repos/asf/hive/blob/d51c62a4/hbase-handler/src/test/results/positive/hbase_binary_map_queries_prefix.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/hbase_binary_map_queries_prefix.q.out b/hbase-handler/src/test/results/positive/hbase_binary_map_queries_prefix.q.out
index 5d21134..f6432b3 100644
--- a/hbase-handler/src/test/results/positive/hbase_binary_map_queries_prefix.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_binary_map_queries_prefix.q.out
@@ -152,3 +152,43 @@ POSTHOOK: query: DROP TABLE t_ext_hbase_maps
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@t_ext_hbase_maps
 POSTHOOK: Output: default@t_ext_hbase_maps
+PREHOOK: query: DROP TABLE t_ext_hbase_maps_cut_prefix
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE t_ext_hbase_maps_cut_prefix
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_cut_prefix(key STRING,
+                                       string_map_cols MAP<STRING, STRING>, simple_string_col STRING)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-string:string_.*,cf-string:simple_string_col"
+    ,"hbase.columns.mapping.prefix.hide"="true")
+TBLPROPERTIES ("hbase.table.name"="t_hive_maps")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_ext_hbase_maps_cut_prefix
+POSTHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_cut_prefix(key STRING,
+                                       string_map_cols MAP<STRING, STRING>, simple_string_col STRING)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-string:string_.*,cf-string:simple_string_col"
+    ,"hbase.columns.mapping.prefix.hide"="true")
+TBLPROPERTIES ("hbase.table.name"="t_hive_maps")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_ext_hbase_maps_cut_prefix
+PREHOOK: query: SELECT * FROM t_ext_hbase_maps_cut_prefix ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_ext_hbase_maps_cut_prefix
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM t_ext_hbase_maps_cut_prefix ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_ext_hbase_maps_cut_prefix
+#### A masked pattern was here ####
+125	{"col":"val_125"}	val_125
+126	{"col":"val_126"}	val_126
+PREHOOK: query: DROP TABLE t_ext_hbase_maps_cut_prefix
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_ext_hbase_maps_cut_prefix
+PREHOOK: Output: default@t_ext_hbase_maps_cut_prefix
+POSTHOOK: query: DROP TABLE t_ext_hbase_maps_cut_prefix
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_ext_hbase_maps_cut_prefix
+POSTHOOK: Output: default@t_ext_hbase_maps_cut_prefix

[42/50] [abbrv] hive git commit: HIVE-11593 Add aes_encrypt and aes_decrypt UDFs (Alexander Pivovarov, reviewed by Jason Dere)

Posted by xu...@apache.org.

HIVE-11593 Add aes_encrypt and aes_decrypt UDFs (Alexander Pivovarov, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f4acb44f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f4acb44f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f4acb44f

Branch: refs/heads/beeline-cli
Commit: f4acb44f67b64e85f53de6d8503ef2ae6e0fa524
Parents: 8bed378
Author: Alexander Pivovarov <ap...@gmail.com>
Authored: Sun Aug 16 18:36:32 2015 -0700
Committer: Alexander Pivovarov <ap...@gmail.com>
Committed: Sat Sep 5 19:24:50 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |   2 +
 .../hive/ql/udf/generic/GenericUDFAesBase.java  | 205 ++++++++++++++++
 .../ql/udf/generic/GenericUDFAesDecrypt.java    |  50 ++++
 .../ql/udf/generic/GenericUDFAesEncrypt.java    |  50 ++++
 .../ql/udf/generic/GenericUDFParamUtils.java    |   8 +-
 .../udf/generic/TestGenericUDFAesDecrypt.java   | 233 +++++++++++++++++++
 .../udf/generic/TestGenericUDFAesEncrypt.java   | 228 ++++++++++++++++++
 .../queries/clientpositive/udf_aes_decrypt.q    |  21 ++
 .../queries/clientpositive/udf_aes_encrypt.q    |  21 ++
 .../results/clientpositive/show_functions.q.out |   2 +
 .../clientpositive/udf_aes_decrypt.q.out        |  79 +++++++
 .../clientpositive/udf_aes_encrypt.q.out        |  79 +++++++
 12 files changed, 977 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 4c1c53e..f1fe30d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -234,6 +234,8 @@ public final class FunctionRegistry {
     system.registerUDF("md5", UDFMd5.class, false);
     system.registerUDF("sha1", UDFSha1.class, false);
     system.registerUDF("sha", UDFSha1.class, false);
+    system.registerGenericUDF("aes_encrypt", GenericUDFAesEncrypt.class);
+    system.registerGenericUDF("aes_decrypt", GenericUDFAesDecrypt.class);
 
     system.registerGenericUDF("encode", GenericUDFEncode.class);
     system.registerGenericUDF("decode", GenericUDFDecode.class);

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesBase.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesBase.java
new file mode 100644
index 0000000..66a4457
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesBase.java
@@ -0,0 +1,205 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.BINARY_GROUP;
+import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP;
+
+import java.security.GeneralSecurityException;
+import java.security.NoSuchAlgorithmException;
+
+import javax.crypto.Cipher;
+import javax.crypto.NoSuchPaddingException;
+import javax.crypto.SecretKey;
+import javax.crypto.spec.SecretKeySpec;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+
+/**
+ * GenericUDFAesBase.
+ *
+ */
+public abstract class GenericUDFAesBase extends GenericUDF {
+  protected transient Converter[] converters = new Converter[2];
+  protected transient PrimitiveCategory[] inputTypes = new PrimitiveCategory[2];
+  protected final BytesWritable output = new BytesWritable();
+  protected transient boolean isStr0;
+  protected transient boolean isStr1;
+  protected transient boolean isKeyConstant;
+  protected transient Cipher cipher;
+  protected transient SecretKey secretKey;
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+    checkArgsSize(arguments, 2, 2);
+
+    checkArgPrimitive(arguments, 0);
+    checkArgPrimitive(arguments, 1);
+
+    // the function should support both string and binary input types
+    if (canParam0BeStr()) {
+      checkArgGroups(arguments, 0, inputTypes, STRING_GROUP, BINARY_GROUP);
+    } else {
+      checkArgGroups(arguments, 0, inputTypes, BINARY_GROUP);
+    }
+    checkArgGroups(arguments, 1, inputTypes, STRING_GROUP, BINARY_GROUP);
+
+    if (isStr0 = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(inputTypes[0]) == STRING_GROUP) {
+      obtainStringConverter(arguments, 0, inputTypes, converters);
+    } else {
+      GenericUDFParamUtils.obtainBinaryConverter(arguments, 0, inputTypes, converters);
+    }
+
+    isKeyConstant = arguments[1] instanceof ConstantObjectInspector;
+    byte[] key = null;
+    int keyLength = 0;
+
+    if (isStr1 = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(inputTypes[1]) == STRING_GROUP) {
+      if (isKeyConstant) {
+        String keyStr = getConstantStringValue(arguments, 1);
+        if (keyStr != null) {
+          key = keyStr.getBytes();
+          keyLength = key.length;
+        }
+      } else {
+        obtainStringConverter(arguments, 1, inputTypes, converters);
+      }
+    } else {
+      if (isKeyConstant) {
+        BytesWritable keyWr = GenericUDFParamUtils.getConstantBytesValue(arguments, 1);
+        if (keyWr != null) {
+          key = keyWr.getBytes();
+          keyLength = keyWr.getLength();
+        }
+      } else {
+        GenericUDFParamUtils.obtainBinaryConverter(arguments, 1, inputTypes, converters);
+      }
+    }
+
+    if (key != null) {
+      secretKey = getSecretKey(key, keyLength);
+    }
+
+    try {
+      cipher = Cipher.getInstance("AES");
+    } catch (NoSuchPaddingException | NoSuchAlgorithmException e) {
+      throw new RuntimeException(e);
+    }
+
+    ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    return outputOI;
+  }
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    byte[] input;
+    int inputLength;
+
+    if (isStr0) {
+      Text n = GenericUDFParamUtils.getTextValue(arguments, 0, converters);
+      if (n == null) {
+        return null;
+      }
+      input = n.getBytes();
+      inputLength = n.getLength();
+    } else {
+      BytesWritable bWr = GenericUDFParamUtils.getBinaryValue(arguments, 0, converters);
+      if (bWr == null) {
+        return null;
+      }
+      input = bWr.getBytes();
+      inputLength = bWr.getLength();
+    }
+
+    if (input == null) {
+      return null;
+    }
+
+    SecretKey secretKey;
+    if (isKeyConstant) {
+      secretKey = this.secretKey;
+    } else {
+      byte[] key;
+      int keyLength;
+      if (isStr1) {
+        Text n = GenericUDFParamUtils.getTextValue(arguments, 1, converters);
+        if (n == null) {
+          return null;
+        }
+        key = n.getBytes();
+        keyLength = n.getLength();
+      } else {
+        BytesWritable bWr = GenericUDFParamUtils.getBinaryValue(arguments, 1, converters);
+        if (bWr == null) {
+          return null;
+        }
+        key = bWr.getBytes();
+        keyLength = bWr.getLength();
+      }
+      secretKey = getSecretKey(key, keyLength);
+    }
+
+    if (secretKey == null) {
+      return null;
+    }
+
+    byte[] res = aesFunction(input, inputLength, secretKey);
+
+    if (res == null) {
+      return null;
+    }
+
+    output.set(res, 0, res.length);
+    return output;
+  }
+
+  protected SecretKey getSecretKey(byte[] key, int keyLength) {
+    if (keyLength == 16 || keyLength == 32 || keyLength == 24) {
+      return new SecretKeySpec(key, 0, keyLength, "AES");
+    }
+    return null;
+  }
+
+  protected byte[] aesFunction(byte[] input, int inputLength, SecretKey secretKey) {
+    try {
+      cipher.init(getCipherMode(), secretKey);
+      byte[] res = cipher.doFinal(input, 0, inputLength);
+      return res;
+    } catch (GeneralSecurityException e) {
+      return null;
+    }
+  }
+
+  abstract protected int getCipherMode();
+
+  abstract protected boolean canParam0BeStr();
+
+  @Override
+  public String getDisplayString(String[] children) {
+    return getStandardDisplayString(getFuncName(), children);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesDecrypt.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesDecrypt.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesDecrypt.java
new file mode 100644
index 0000000..d83fd2d
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesDecrypt.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import javax.crypto.Cipher;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+
+/**
+ * GenericUDFAesDecrypt.
+ *
+ */
+@Description(name = "aes_decrypt", value = "_FUNC_(input binary, key string/binary) - Decrypt input using AES.",
+    extended = "AES (Advanced Encryption Standard) algorithm. "
+    + "Key lengths of 128, 192 or 256 bits can be used. 192 and 256 bits keys can be used if "
+    + "Java Cryptography Extension (JCE) Unlimited Strength Jurisdiction Policy Files are installed. "
+    + "If either argument is NULL or the key length is not one of the permitted values, the return value is NULL.\n"
+    + "Example: > SELECT _FUNC_(unbase64('y6Ss+zCYObpCbgfWfyNWTw=='), '1234567890123456');\n 'ABC'")
+public class GenericUDFAesDecrypt extends GenericUDFAesBase {
+
+  @Override
+  protected int getCipherMode() {
+    return Cipher.DECRYPT_MODE;
+  }
+
+  @Override
+  protected boolean canParam0BeStr() {
+    return false;
+  }
+
+  @Override
+  protected String getFuncName() {
+    return "aes_decrypt";
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesEncrypt.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesEncrypt.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesEncrypt.java
new file mode 100644
index 0000000..d916441
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAesEncrypt.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import javax.crypto.Cipher;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+
+/**
+ * GenericUDFAesEncrypt.
+ *
+ */
+@Description(name = "aes_encrypt", value = "_FUNC_(input string/binary, key string/binary) - Encrypt input using AES.",
+    extended = "AES (Advanced Encryption Standard) algorithm. "
+    + "Key lengths of 128, 192 or 256 bits can be used. 192 and 256 bits keys can be used if "
+    + "Java Cryptography Extension (JCE) Unlimited Strength Jurisdiction Policy Files are installed. "
+    + "If either argument is NULL or the key length is not one of the permitted values, the return value is NULL.\n"
+    + "Example: > SELECT base64(_FUNC_('ABC', '1234567890123456'));\n 'y6Ss+zCYObpCbgfWfyNWTw=='")
+public class GenericUDFAesEncrypt extends GenericUDFAesBase {
+
+  @Override
+  protected int getCipherMode() {
+    return Cipher.ENCRYPT_MODE;
+  }
+
+  @Override
+  protected boolean canParam0BeStr() {
+    return true;
+  }
+
+  @Override
+  protected String getFuncName() {
+    return "aes_encrypt";
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFParamUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFParamUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFParamUtils.java
index cdbc6ea..0e7eb8e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFParamUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFParamUtils.java
@@ -21,10 +21,11 @@ package org.apache.hadoop.hive.ql.udf.generic;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.io.BytesWritable;
@@ -68,4 +69,9 @@ public class GenericUDFParamUtils {
     converters[i] = converter;
     inputTypes[i] = inputType;
   }
+
+  public static BytesWritable getConstantBytesValue(ObjectInspector[] arguments, int i) {
+    Object constValue = ((ConstantObjectInspector) arguments[i]).getWritableConstantValue();
+    return (BytesWritable) constValue;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAesDecrypt.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAesDecrypt.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAesDecrypt.java
new file mode 100644
index 0000000..729f2fc
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAesDecrypt.java
@@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import static org.junit.Assert.assertEquals;
+
+import java.security.NoSuchAlgorithmException;
+
+import javax.crypto.Cipher;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+public class TestGenericUDFAesDecrypt {
+
+  @Test
+  public void testAesDec128ConstStr() throws HiveException {
+    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    Text keyWr = new Text("1234567890123456");
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, keyWr);
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    runAndVerifyStr("y6Ss+zCYObpCbgfWfyNWTw==", keyWr, "ABC", udf);
+    runAndVerifyStr("BQGHoM3lqYcsurCRq3PlUw==", keyWr, "", udf);
+    // null
+    runAndVerifyStr(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesDec256ConstStr() throws HiveException, NoSuchAlgorithmException {
+    int maxKeyLen = Cipher.getMaxAllowedKeyLength("AES");
+    // skip the test if Java Cryptography Extension (JCE) Unlimited Strength
+    // Jurisdiction Policy Files not installed
+    if (maxKeyLen < 256) {
+      return;
+    }
+    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    Text keyWr = new Text("1234567890123456" + "1234567890123456");
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, keyWr);
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    runAndVerifyStr("nYfCuJeRd5eD60yXDw7WEA==", keyWr, "ABC", udf);
+    runAndVerifyStr("mVClVqZ6W4VF6b842FOgCA==", keyWr, "", udf);
+    // null
+    runAndVerifyStr(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesDec128Str() throws HiveException {
+    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    Text keyWr = new Text("1234567890123456");
+    runAndVerifyStr("y6Ss+zCYObpCbgfWfyNWTw==", keyWr, "ABC", udf);
+    runAndVerifyStr("BQGHoM3lqYcsurCRq3PlUw==", keyWr, "", udf);
+    // null
+    runAndVerifyStr(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesDec128ConstBin() throws HiveException {
+    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    BytesWritable keyWr = new BytesWritable("1234567890123456".getBytes());
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.binaryTypeInfo, keyWr);
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    runAndVerifyBin("y6Ss+zCYObpCbgfWfyNWTw==", keyWr, "ABC", udf);
+    runAndVerifyBin("BQGHoM3lqYcsurCRq3PlUw==", keyWr, "", udf);
+    // null
+    runAndVerifyBin(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesDec128Bin() throws HiveException {
+    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    BytesWritable keyWr = new BytesWritable("1234567890123456".getBytes());
+    runAndVerifyBin("y6Ss+zCYObpCbgfWfyNWTw==", keyWr, "ABC", udf);
+    runAndVerifyBin("BQGHoM3lqYcsurCRq3PlUw==", keyWr, "", udf);
+    // null
+    runAndVerifyBin(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesDec192Bin() throws HiveException, NoSuchAlgorithmException {
+    int maxKeyLen = Cipher.getMaxAllowedKeyLength("AES");
+    // skip the test if Java Cryptography Extension (JCE) Unlimited Strength
+    // Jurisdiction Policy Files not installed
+    if (maxKeyLen < 192) {
+      return;
+    }
+    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    BytesWritable keyWr = new BytesWritable(("1234567890123456" + "12345678").getBytes());
+    runAndVerifyBin("ucvvpP9r2/LfQ6BilQuFtA==", keyWr, "ABC", udf);
+    runAndVerifyBin("KqMT3cF6VwSISMaUVUB4Qw==", keyWr, "", udf);
+    // null
+    runAndVerifyBin(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesDecKeyNullConstStr() throws HiveException {
+    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    Text keyWr = null;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, keyWr);
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    runAndVerifyStr("y6Ss+zCYObpCbgfWfyNWTw==", keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesDecKeyNullStr() throws HiveException {
+    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    Text keyWr = null;
+    runAndVerifyStr("y6Ss+zCYObpCbgfWfyNWTw==", keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesDecKeyNullConstBin() throws HiveException {
+    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    BytesWritable keyWr = null;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.binaryTypeInfo, keyWr);
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    runAndVerifyBin("y6Ss+zCYObpCbgfWfyNWTw==", keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesDecKeyNullBin() throws HiveException {
+    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    BytesWritable keyWr = null;
+    runAndVerifyBin("y6Ss+zCYObpCbgfWfyNWTw==", keyWr, null, udf);
+  }
+
+  private void runAndVerifyStr(String strBase64, Text keyWr, String expResult, GenericUDFAesDecrypt udf)
+      throws HiveException {
+    DeferredObject valueObj0 = new DeferredJavaObject(
+        strBase64 != null ? new BytesWritable(Base64.decodeBase64(strBase64)) : null);
+    DeferredObject valueObj1 = new DeferredJavaObject(keyWr);
+    DeferredObject[] args = { valueObj0, valueObj1 };
+    BytesWritable output = (BytesWritable) udf.evaluate(args);
+    String expResultHex = expResult == null ? null : Hex.encodeHexString(expResult.getBytes());
+    assertEquals("aes_decrypt() test ", expResultHex, output != null ? copyBytesAndHex(output) : null);
+  }
+
+  private void runAndVerifyBin(String strBase64, BytesWritable keyWr, String expResult, GenericUDFAesDecrypt udf)
+      throws HiveException {
+    DeferredObject valueObj0 = new DeferredJavaObject(
+        strBase64 != null ? new BytesWritable(Base64.decodeBase64(strBase64)) : null);
+    DeferredObject valueObj1 = new DeferredJavaObject(keyWr);
+    DeferredObject[] args = { valueObj0, valueObj1 };
+    BytesWritable output = (BytesWritable) udf.evaluate(args);
+    String expResultHex = expResult == null ? null : Hex.encodeHexString(expResult.getBytes());
+    assertEquals("aes_decrypt() test ", expResultHex, output != null ? copyBytesAndHex(output) : null);
+  }
+
+  private String copyBytesAndHex(BytesWritable bw) {
+    int size = bw.getLength();
+    byte[] bytes = new byte[size];
+    System.arraycopy(bw.getBytes(), 0, bytes, 0, size);
+    return Hex.encodeHexString(bytes);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAesEncrypt.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAesEncrypt.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAesEncrypt.java
new file mode 100644
index 0000000..5e5914d
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAesEncrypt.java
@@ -0,0 +1,228 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import static org.junit.Assert.assertEquals;
+
+import java.security.NoSuchAlgorithmException;
+
+import javax.crypto.Cipher;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+public class TestGenericUDFAesEncrypt {
+
+  @Test
+  public void testAesEnc128ConstStr() throws HiveException {
+    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    Text keyWr = new Text("1234567890123456");
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, keyWr);
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    runAndVerifyStr("ABC", keyWr, "y6Ss+zCYObpCbgfWfyNWTw==", udf);
+    runAndVerifyStr("", keyWr, "BQGHoM3lqYcsurCRq3PlUw==", udf);
+    // null
+    runAndVerifyStr(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesEnc256ConstStr() throws HiveException, NoSuchAlgorithmException {
+    int maxKeyLen = Cipher.getMaxAllowedKeyLength("AES");
+    // skip the test if Java Cryptography Extension (JCE) Unlimited Strength
+    // Jurisdiction Policy Files not installed
+    if (maxKeyLen < 256) {
+      return;
+    }
+    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    Text keyWr = new Text("1234567890123456" + "1234567890123456");
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, keyWr);
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    runAndVerifyStr("ABC", keyWr, "nYfCuJeRd5eD60yXDw7WEA==", udf);
+    runAndVerifyStr("", keyWr, "mVClVqZ6W4VF6b842FOgCA==", udf);
+    // null
+    runAndVerifyStr(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesEnc128Str() throws HiveException {
+    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    Text keyWr = new Text("1234567890123456");
+    runAndVerifyStr("ABC", keyWr, "y6Ss+zCYObpCbgfWfyNWTw==", udf);
+    runAndVerifyStr("", keyWr, "BQGHoM3lqYcsurCRq3PlUw==", udf);
+    // null
+    runAndVerifyStr(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesEnc128ConstBin() throws HiveException {
+    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    BytesWritable keyWr = new BytesWritable("1234567890123456".getBytes());
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.binaryTypeInfo, keyWr);
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    runAndVerifyBin(new byte[] { 65, 66, 67 }, keyWr, "y6Ss+zCYObpCbgfWfyNWTw==", udf);
+    runAndVerifyBin(new byte[0], keyWr, "BQGHoM3lqYcsurCRq3PlUw==", udf);
+    // null
+    runAndVerifyBin(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesEnc128Bin() throws HiveException {
+    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    BytesWritable keyWr = new BytesWritable("1234567890123456".getBytes());
+    runAndVerifyBin(new byte[] { 65, 66, 67 }, keyWr, "y6Ss+zCYObpCbgfWfyNWTw==", udf);
+    runAndVerifyBin(new byte[0], keyWr, "BQGHoM3lqYcsurCRq3PlUw==", udf);
+    // null
+    runAndVerifyBin(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesEnc192Bin() throws HiveException, NoSuchAlgorithmException {
+    int maxKeyLen = Cipher.getMaxAllowedKeyLength("AES");
+    // skip the test if Java Cryptography Extension (JCE) Unlimited Strength
+    // Jurisdiction Policy Files not installed
+    if (maxKeyLen < 192) {
+      return;
+    }
+    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    BytesWritable keyWr = new BytesWritable(("1234567890123456" + "12345678").getBytes());
+    runAndVerifyBin(new byte[] { 65, 66, 67 }, keyWr, "ucvvpP9r2/LfQ6BilQuFtA==", udf);
+    runAndVerifyBin(new byte[0], keyWr, "KqMT3cF6VwSISMaUVUB4Qw==", udf);
+    // null
+    runAndVerifyBin(null, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesEncKeyNullConstStr() throws HiveException {
+    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    Text keyWr = null;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, keyWr);
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    runAndVerifyStr("ABC", keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesEncKeyNullStr() throws HiveException {
+    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    Text keyWr = null;
+    runAndVerifyStr("ABC", keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesEncKeyNullConstBin() throws HiveException {
+    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    BytesWritable keyWr = null;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.binaryTypeInfo, keyWr);
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    runAndVerifyBin(new byte[] { 65, 66, 67 }, keyWr, null, udf);
+  }
+
+  @Test
+  public void testAesEncKeyNullBin() throws HiveException {
+    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
+    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    ObjectInspector[] arguments = { valueOI0, valueOI1 };
+
+    udf.initialize(arguments);
+
+    BytesWritable keyWr = null;
+    runAndVerifyBin(new byte[] { 65, 66, 67 }, keyWr, null, udf);
+  }
+
+  private void runAndVerifyStr(String str, Text keyWr, String expResultBase64, GenericUDFAesEncrypt udf)
+      throws HiveException {
+    DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null);
+    DeferredObject valueObj1 = new DeferredJavaObject(keyWr);
+    DeferredObject[] args = { valueObj0, valueObj1 };
+    BytesWritable output = (BytesWritable) udf.evaluate(args);
+    assertEquals("aes_encrypt() test ", expResultBase64, output != null ? copyBytesAndBase64(output) : null);
+  }
+
+  private void runAndVerifyBin(byte[] b, BytesWritable keyWr, String expResultBase64, GenericUDFAesEncrypt udf)
+      throws HiveException {
+    DeferredObject valueObj0 = new DeferredJavaObject(b != null ? new BytesWritable(b) : null);
+    DeferredObject valueObj1 = new DeferredJavaObject(keyWr);
+    DeferredObject[] args = { valueObj0, valueObj1 };
+    BytesWritable output = (BytesWritable) udf.evaluate(args);
+    assertEquals("aes_encrypt() test ", expResultBase64, output != null ? copyBytesAndBase64(output) : null);
+  }
+
+  private String copyBytesAndBase64(BytesWritable bw) {
+    int size = bw.getLength();
+    byte[] bytes = new byte[size];
+    System.arraycopy(bw.getBytes(), 0, bytes, 0, size);
+    return new String(Base64.encodeBase64(bytes));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/test/queries/clientpositive/udf_aes_decrypt.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_aes_decrypt.q b/ql/src/test/queries/clientpositive/udf_aes_decrypt.q
new file mode 100644
index 0000000..36a0cf9
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_aes_decrypt.q
@@ -0,0 +1,21 @@
+DESCRIBE FUNCTION aes_decrypt;
+DESC FUNCTION EXTENDED aes_decrypt;
+
+explain select aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), '1234567890123456');
+
+select
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), '1234567890123456'),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), binary('1234567890123456')),
+aes_decrypt(unbase64("BQGHoM3lqYcsurCRq3PlUw=="), '1234567890123456') = binary(''),
+aes_decrypt(unbase64("BQGHoM3lqYcsurCRq3PlUw=="), binary('1234567890123456')) = binary(''),
+aes_decrypt(cast(null as binary), '1234567890123456'),
+aes_decrypt(cast(null as binary), binary('1234567890123456'));
+
+--bad key
+select
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), '12345678901234567'),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), binary('123456789012345')),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), ''),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), binary('')),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), cast(null as string)),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), cast(null as binary));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/test/queries/clientpositive/udf_aes_encrypt.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_aes_encrypt.q b/ql/src/test/queries/clientpositive/udf_aes_encrypt.q
new file mode 100644
index 0000000..2f03943
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_aes_encrypt.q
@@ -0,0 +1,21 @@
+DESCRIBE FUNCTION aes_encrypt;
+DESC FUNCTION EXTENDED aes_encrypt;
+
+explain select aes_encrypt('ABC', '1234567890123456');
+
+select
+base64(aes_encrypt('ABC', '1234567890123456')),
+base64(aes_encrypt('', '1234567890123456')),
+base64(aes_encrypt(binary('ABC'), binary('1234567890123456'))),
+base64(aes_encrypt(binary(''), binary('1234567890123456'))),
+aes_encrypt(cast(null as string), '1234567890123456'),
+aes_encrypt(cast(null as binary), binary('1234567890123456'));
+
+--bad key
+select
+aes_encrypt('ABC', '12345678901234567'),
+aes_encrypt(binary('ABC'), binary('123456789012345')),
+aes_encrypt('ABC', ''),
+aes_encrypt(binary('ABC'), binary('')),
+aes_encrypt('ABC', cast(null as string)),
+aes_encrypt(binary('ABC'), cast(null as binary));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index 540079b..fbcd86a 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -22,6 +22,8 @@ POSTHOOK: type: SHOWFUNCTIONS
 abs
 acos
 add_months
+aes_decrypt
+aes_encrypt
 and
 array
 array_contains

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/test/results/clientpositive/udf_aes_decrypt.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_aes_decrypt.q.out b/ql/src/test/results/clientpositive/udf_aes_decrypt.q.out
new file mode 100644
index 0000000..83780a9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_aes_decrypt.q.out
@@ -0,0 +1,79 @@
+PREHOOK: query: DESCRIBE FUNCTION aes_decrypt
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION aes_decrypt
+POSTHOOK: type: DESCFUNCTION
+aes_decrypt(input binary, key string/binary) - Decrypt input using AES.
+PREHOOK: query: DESC FUNCTION EXTENDED aes_decrypt
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED aes_decrypt
+POSTHOOK: type: DESCFUNCTION
+aes_decrypt(input binary, key string/binary) - Decrypt input using AES.
+AES (Advanced Encryption Standard) algorithm. Key lengths of 128, 192 or 256 bits can be used. 192 and 256 bits keys can be used if Java Cryptography Extension (JCE) Unlimited Strength Jurisdiction Policy Files are installed. If either argument is NULL or the key length is not one of the permitted values, the return value is NULL.
+Example: > SELECT aes_decrypt(unbase64('y6Ss+zCYObpCbgfWfyNWTw=='), '1234567890123456');
+ 'ABC'
+PREHOOK: query: explain select aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), '1234567890123456')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), '1234567890123456')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: _dummy_table
+          Row Limit Per Split: 1
+          Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: 414243 (type: binary)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+            ListSink
+
+PREHOOK: query: select
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), '1234567890123456'),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), binary('1234567890123456')),
+aes_decrypt(unbase64("BQGHoM3lqYcsurCRq3PlUw=="), '1234567890123456') = binary(''),
+aes_decrypt(unbase64("BQGHoM3lqYcsurCRq3PlUw=="), binary('1234567890123456')) = binary(''),
+aes_decrypt(cast(null as binary), '1234567890123456'),
+aes_decrypt(cast(null as binary), binary('1234567890123456'))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), '1234567890123456'),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), binary('1234567890123456')),
+aes_decrypt(unbase64("BQGHoM3lqYcsurCRq3PlUw=="), '1234567890123456') = binary(''),
+aes_decrypt(unbase64("BQGHoM3lqYcsurCRq3PlUw=="), binary('1234567890123456')) = binary(''),
+aes_decrypt(cast(null as binary), '1234567890123456'),
+aes_decrypt(cast(null as binary), binary('1234567890123456'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+ABC	ABC	true	true	NULL	NULL
+PREHOOK: query: --bad key
+select
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), '12345678901234567'),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), binary('123456789012345')),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), ''),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), binary('')),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), cast(null as string)),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), cast(null as binary))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: --bad key
+select
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), '12345678901234567'),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), binary('123456789012345')),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), ''),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), binary('')),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), cast(null as string)),
+aes_decrypt(unbase64("y6Ss+zCYObpCbgfWfyNWTw=="), cast(null as binary))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+NULL	NULL	NULL	NULL	NULL	NULL

http://git-wip-us.apache.org/repos/asf/hive/blob/f4acb44f/ql/src/test/results/clientpositive/udf_aes_encrypt.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_aes_encrypt.q.out b/ql/src/test/results/clientpositive/udf_aes_encrypt.q.out
new file mode 100644
index 0000000..9e356b2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_aes_encrypt.q.out
@@ -0,0 +1,79 @@
+PREHOOK: query: DESCRIBE FUNCTION aes_encrypt
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION aes_encrypt
+POSTHOOK: type: DESCFUNCTION
+aes_encrypt(input string/binary, key string/binary) - Encrypt input using AES.
+PREHOOK: query: DESC FUNCTION EXTENDED aes_encrypt
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED aes_encrypt
+POSTHOOK: type: DESCFUNCTION
+aes_encrypt(input string/binary, key string/binary) - Encrypt input using AES.
+AES (Advanced Encryption Standard) algorithm. Key lengths of 128, 192 or 256 bits can be used. 192 and 256 bits keys can be used if Java Cryptography Extension (JCE) Unlimited Strength Jurisdiction Policy Files are installed. If either argument is NULL or the key length is not one of the permitted values, the return value is NULL.
+Example: > SELECT base64(aes_encrypt('ABC', '1234567890123456'));
+ 'y6Ss+zCYObpCbgfWfyNWTw=='
+PREHOOK: query: explain select aes_encrypt('ABC', '1234567890123456')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select aes_encrypt('ABC', '1234567890123456')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: _dummy_table
+          Row Limit Per Split: 1
+          Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: CBA4ACFB309839BA426E07D67F23564F (type: binary)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+            ListSink
+
+PREHOOK: query: select
+base64(aes_encrypt('ABC', '1234567890123456')),
+base64(aes_encrypt('', '1234567890123456')),
+base64(aes_encrypt(binary('ABC'), binary('1234567890123456'))),
+base64(aes_encrypt(binary(''), binary('1234567890123456'))),
+aes_encrypt(cast(null as string), '1234567890123456'),
+aes_encrypt(cast(null as binary), binary('1234567890123456'))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select
+base64(aes_encrypt('ABC', '1234567890123456')),
+base64(aes_encrypt('', '1234567890123456')),
+base64(aes_encrypt(binary('ABC'), binary('1234567890123456'))),
+base64(aes_encrypt(binary(''), binary('1234567890123456'))),
+aes_encrypt(cast(null as string), '1234567890123456'),
+aes_encrypt(cast(null as binary), binary('1234567890123456'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+y6Ss+zCYObpCbgfWfyNWTw==	BQGHoM3lqYcsurCRq3PlUw==	y6Ss+zCYObpCbgfWfyNWTw==	BQGHoM3lqYcsurCRq3PlUw==	NULL	NULL
+PREHOOK: query: --bad key
+select
+aes_encrypt('ABC', '12345678901234567'),
+aes_encrypt(binary('ABC'), binary('123456789012345')),
+aes_encrypt('ABC', ''),
+aes_encrypt(binary('ABC'), binary('')),
+aes_encrypt('ABC', cast(null as string)),
+aes_encrypt(binary('ABC'), cast(null as binary))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: --bad key
+select
+aes_encrypt('ABC', '12345678901234567'),
+aes_encrypt(binary('ABC'), binary('123456789012345')),
+aes_encrypt('ABC', ''),
+aes_encrypt(binary('ABC'), binary('')),
+aes_encrypt('ABC', cast(null as string)),
+aes_encrypt(binary('ABC'), cast(null as binary))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+NULL	NULL	NULL	NULL	NULL	NULL

[25/50] [abbrv] hive git commit: HIVE-11704. Create errata file.

Posted by xu...@apache.org.

HIVE-11704. Create errata file.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d5977659
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d5977659
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d5977659

Branch: refs/heads/beeline-cli
Commit: d597765937ccf57e32ff4a79abb60ec69dbe84f6
Parents: 9763c9d
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Aug 31 16:33:20 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Sep 1 09:18:26 2015 -0700

----------------------------------------------------------------------
 errata.txt | 9 +++++++++
 1 file changed, 9 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d5977659/errata.txt
----------------------------------------------------------------------
diff --git a/errata.txt b/errata.txt
new file mode 100644
index 0000000..70992ad
--- /dev/null
+++ b/errata.txt
@@ -0,0 +1,9 @@
+Commits with the wrong JIRA referenced:
+
+git commit                               branch     jira       url
+5a576b6fbf1680ab4dd8f275cad484a2614ef2c1 master     HIVE-10391 https://issues.apache.org/jira/browse/HIVE-10391
+582f4e1bc39b9605d11f762480b29561a44688ae llap       HIVE-10217 https://issues.apache.org/jira/browse/HIVE-10217
+8981f365bf0cf921bc0ac2ff8914df44ca2f7de7 master     HIVE-10500 https://issues.apache.org/jira/browse/HIVE-10500
+09100831adff7589ee48e735a4beac6ebb25cb3e master     HIVE-10885 https://issues.apache.org/jira/browse/HIVE-10885
+f3ab5fda6af57afff31c29ad048d906fd095d5fb branch-1.2 HIVE-10885 https://issues.apache.org/jira/browse/HIVE-10885
+dcf21cd6fa98fb5db01ef661bb3b9f94d9ca2d15 master     HIVE-10021 https://issues.apache.org/jira/browse/HIVE-10021

[20/50] [abbrv] hive git commit: HIVE-11629: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : fix the filter expressions for full outer join and right outer join (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

Posted by xu...@apache.org.

HIVE-11629: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : fix the filter expressions for full outer join and right outer join (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5aa16ecb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5aa16ecb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5aa16ecb

Branch: refs/heads/beeline-cli
Commit: 5aa16ecb3aadbeb1770ae08f1f1d476503cbbb6e
Parents: dcf21cd
Author: Pengcheng Xiong <px...@apache.org>
Authored: Sat Aug 29 23:48:09 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Sat Aug 29 23:48:09 2015 -0700

----------------------------------------------------------------------
 .../calcite/translator/HiveOpConverter.java     |  22 +-
 .../clientpositive/cbo_rp_outer_join_ppr.q      |  40 +
 .../cbo_rp_outer_join_ppr.q.java1.7.out         | 855 +++++++++++++++++++
 3 files changed, 914 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5aa16ecb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
index 1931880..9391952 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
@@ -995,7 +995,7 @@ public class HiveOpConverter {
    * to be expressed that way.
    */
   private static int updateExprNode(ExprNodeDesc expr, final Map<String, Byte> reversedExprs,
-          final Map<String, ExprNodeDesc> colExprMap) {
+      final Map<String, ExprNodeDesc> colExprMap) throws SemanticException {
     int inputPos = -1;
     if (expr instanceof ExprNodeGenericFuncDesc) {
       ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) expr;
@@ -1003,10 +1003,26 @@ public class HiveOpConverter {
       for (ExprNodeDesc functionChild : func.getChildren()) {
         if (functionChild instanceof ExprNodeColumnDesc) {
           String colRef = functionChild.getExprString();
-          inputPos = reversedExprs.get(colRef);
+          int pos = reversedExprs.get(colRef);
+          if (pos != -1) {
+            if (inputPos == -1) {
+              inputPos = pos;
+            } else if (inputPos != pos) {
+              throw new SemanticException(
+                  "UpdateExprNode is expecting only one position for join operator convert. But there are more than one.");
+            }
+          }
           newChildren.add(colExprMap.get(colRef));
         } else {
-          inputPos = updateExprNode(functionChild, reversedExprs, colExprMap);
+          int pos = updateExprNode(functionChild, reversedExprs, colExprMap);
+          if (pos != -1) {
+            if (inputPos == -1) {
+              inputPos = pos;
+            } else if (inputPos != pos) {
+              throw new SemanticException(
+                  "UpdateExprNode is expecting only one position for join operator convert. But there are more than one.");
+            }
+          }
           newChildren.add(functionChild);
         }
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/5aa16ecb/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q b/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q
new file mode 100644
index 0000000..8daf718
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q
@@ -0,0 +1,40 @@
+set hive.cbo.enable=true;
+set hive.cbo.returnpath.hiveop=true;
+set hive.optimize.ppd=true;
+
+-- SORT_QUERY_RESULTS
+-- JAVA_VERSION_SPECIFIC_OUTPUT
+
+EXPLAIN EXTENDED
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25;
+
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25;
+
+EXPLAIN EXTENDED
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08';
+
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08';

http://git-wip-us.apache.org/repos/asf/hive/blob/5aa16ecb/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
new file mode 100644
index 0000000..c19b47a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
@@ -0,0 +1,855 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+-- JAVA_VERSION_SPECIFIC_OUTPUT
+
+EXPLAIN EXTENDED
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
+PREHOOK: type: QUERY
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+-- JAVA_VERSION_SPECIFIC_OUTPUT
+
+EXPLAIN EXTENDED
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_FULLOUTERJOIN
+         TOK_TABREF
+            TOK_TABNAME
+               src
+            a
+         TOK_TABREF
+            TOK_TABNAME
+               srcpart
+            b
+         AND
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     a
+                  key
+               .
+                  TOK_TABLE_OR_COL
+                     b
+                  key
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     b
+                  ds
+               '2008-04-08'
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  a
+               key
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  a
+               value
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  b
+               key
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  b
+               value
+      TOK_WHERE
+         AND
+            AND
+               AND
+                  >
+                     .
+                        TOK_TABLE_OR_COL
+                           a
+                        key
+                     10
+                  <
+                     .
+                        TOK_TABLE_OR_COL
+                           a
+                        key
+                     20
+               >
+                  .
+                     TOK_TABLE_OR_COL
+                        b
+                     key
+                  15
+            <
+               .
+                  TOK_TABLE_OR_COL
+                     b
+                  key
+               25
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                tag: 0
+                value expressions: value (type: string)
+                auto parallelism: false
+          TableScan
+            alias: b
+            Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Select Operator
+              expressions: key (type: string), value (type: string), ds (type: string)
+              outputColumnNames: key, value, ds
+              Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                tag: 1
+                value expressions: value (type: string), ds (type: string)
+                auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: src
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.src
+              numFiles 1
+              numRows 500
+              rawDataSize 5312
+              serialization.ddl struct src { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE true
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.src
+                numFiles 1
+                numRows 500
+                rawDataSize 5312
+                serialization.ddl struct src { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 5812
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src
+            name: default.src
+#### A masked pattern was here ####
+          Partition
+            base file name: hr=11
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+              hr 11
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.srcpart
+              numFiles 1
+              numRows 500
+              partition_columns ds/hr
+              partition_columns.types string:string
+              rawDataSize 5312
+              serialization.ddl struct srcpart { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.srcpart
+                partition_columns ds/hr
+                partition_columns.types string:string
+                serialization.ddl struct srcpart { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.srcpart
+            name: default.srcpart
+#### A masked pattern was here ####
+          Partition
+            base file name: hr=12
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+              hr 12
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.srcpart
+              numFiles 1
+              numRows 500
+              partition_columns ds/hr
+              partition_columns.types string:string
+              rawDataSize 5312
+              serialization.ddl struct srcpart { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.srcpart
+                partition_columns ds/hr
+                partition_columns.types string:string
+                serialization.ddl struct srcpart { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.srcpart
+            name: default.srcpart
+#### A masked pattern was here ####
+          Partition
+            base file name: hr=11
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-09
+              hr 11
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.srcpart
+              numFiles 1
+              numRows 500
+              partition_columns ds/hr
+              partition_columns.types string:string
+              rawDataSize 5312
+              serialization.ddl struct srcpart { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.srcpart
+                partition_columns ds/hr
+                partition_columns.types string:string
+                serialization.ddl struct srcpart { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.srcpart
+            name: default.srcpart
+#### A masked pattern was here ####
+          Partition
+            base file name: hr=12
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-09
+              hr 12
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.srcpart
+              numFiles 1
+              numRows 500
+              partition_columns ds/hr
+              partition_columns.types string:string
+              rawDataSize 5312
+              serialization.ddl struct srcpart { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.srcpart
+                partition_columns ds/hr
+                partition_columns.types string:string
+                serialization.ddl struct srcpart { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.srcpart
+            name: default.srcpart
+      Truncated Path -> Alias:
+        /src [a]
+        /srcpart/ds=2008-04-08/hr=11 [b]
+        /srcpart/ds=2008-04-08/hr=12 [b]
+        /srcpart/ds=2008-04-09/hr=11 [b]
+        /srcpart/ds=2008-04-09/hr=12 [b]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Outer Join 0 to 1
+          filter mappings:
+            1 [0, 1]
+          filter predicates:
+            0 
+            1 {(VALUE.ds = '2008-04-08')}
+          keys:
+            0 key (type: string)
+            1 key (type: string)
+          outputColumnNames: key, value, key0, value0
+          Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            isSamplingPred: false
+            predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key0) > 15.0)) and (UDFToDouble(key0) < 25.0)) (type: boolean)
+            Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string)
+              outputColumnNames: key, value, key0, value0
+              Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      columns key,value,key0,value0
+                      columns.types string:string:string:string
+                      escape.delim \
+                      hive.serialization.extend.additional.nesting.levels true
+                      serialization.format 1
+                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+17	val_17	17	val_17
+17	val_17	17	val_17
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+19	val_19	19	val_19
+19	val_19	19	val_19
+PREHOOK: query: EXPLAIN EXTENDED
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_FULLOUTERJOIN
+         TOK_TABREF
+            TOK_TABNAME
+               src
+            a
+         TOK_TABREF
+            TOK_TABNAME
+               srcpart
+            b
+         =
+            .
+               TOK_TABLE_OR_COL
+                  a
+               key
+            .
+               TOK_TABLE_OR_COL
+                  b
+               key
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  a
+               key
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  a
+               value
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  b
+               key
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  b
+               value
+      TOK_WHERE
+         AND
+            AND
+               AND
+                  AND
+                     >
+                        .
+                           TOK_TABLE_OR_COL
+                              a
+                           key
+                        10
+                     <
+                        .
+                           TOK_TABLE_OR_COL
+                              a
+                           key
+                        20
+                  >
+                     .
+                        TOK_TABLE_OR_COL
+                           b
+                        key
+                     15
+               <
+                  .
+                     TOK_TABLE_OR_COL
+                        b
+                     key
+                  25
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     b
+                  ds
+               '2008-04-08'
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+              Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: key, value
+                Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: key (type: string)
+                  sort order: +
+                  Map-reduce partition columns: key (type: string)
+                  Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+                  tag: 0
+                  value expressions: value (type: string)
+                  auto parallelism: false
+          TableScan
+            alias: b
+            Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+              Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: key, value
+                Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: key (type: string)
+                  sort order: +
+                  Map-reduce partition columns: key (type: string)
+                  Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+                  tag: 1
+                  value expressions: value (type: string)
+                  auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: src
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.src
+              numFiles 1
+              numRows 500
+              rawDataSize 5312
+              serialization.ddl struct src { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE true
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.src
+                numFiles 1
+                numRows 500
+                rawDataSize 5312
+                serialization.ddl struct src { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 5812
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src
+            name: default.src
+#### A masked pattern was here ####
+          Partition
+            base file name: hr=11
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+              hr 11
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.srcpart
+              numFiles 1
+              numRows 500
+              partition_columns ds/hr
+              partition_columns.types string:string
+              rawDataSize 5312
+              serialization.ddl struct srcpart { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.srcpart
+                partition_columns ds/hr
+                partition_columns.types string:string
+                serialization.ddl struct srcpart { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.srcpart
+            name: default.srcpart
+#### A masked pattern was here ####
+          Partition
+            base file name: hr=12
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+              hr 12
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.srcpart
+              numFiles 1
+              numRows 500
+              partition_columns ds/hr
+              partition_columns.types string:string
+              rawDataSize 5312
+              serialization.ddl struct srcpart { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.srcpart
+                partition_columns ds/hr
+                partition_columns.types string:string
+                serialization.ddl struct srcpart { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.srcpart
+            name: default.srcpart
+      Truncated Path -> Alias:
+        /src [a]
+        /srcpart/ds=2008-04-08/hr=11 [b]
+        /srcpart/ds=2008-04-08/hr=12 [b]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          keys:
+            0 key (type: string)
+            1 key (type: string)
+          outputColumnNames: key, value, key0, value0
+          Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            isSamplingPred: false
+            predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
+            Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string)
+              outputColumnNames: key, value, key0, value0
+              Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      columns key,value,key0,value0
+                      columns.types string:string:string:string
+                      escape.delim \
+                      hive.serialization.extend.additional.nesting.levels true
+                      serialization.format 1
+                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+#### A masked pattern was here ####
+17	val_17	17	val_17
+17	val_17	17	val_17
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+18	val_18	18	val_18
+19	val_19	19	val_19
+19	val_19	19	val_19

[04/50] [abbrv] hive git commit: HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)

Posted by xu...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out
new file mode 100644
index 0000000..55edd90
--- /dev/null
+++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
@@ -0,0 +1,1647 @@
+PREHOOK: query: drop table pcr_t1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table pcr_t2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table pcr_t3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t3
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@pcr_t1
+POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@pcr_t1
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds=2000-04-08
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds=2000-04-08
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds=2000-04-09
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds=2000-04-09
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds=2000-04-10
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds=2000-04-10
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: create table pcr_t2 (ds string, key int, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: create table pcr_t2 (ds string, key int, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@pcr_t2
+PREHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Input: default@pcr_t1@ds=2000-04-08
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
+POSTHOOK: Output: default@pcr_t2
+POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Input: default@pcr_t1@ds=2000-04-08
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
+POSTHOOK: Output: default@pcr_t2
+POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: explain extended
+select key, value, ds
+from pcr_t1
+where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
+order by key, value, ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select key, value, ds
+from pcr_t1
+where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
+order by key, value, ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_TABREF
+         TOK_TABNAME
+            pcr_t1
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               key
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               value
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               ds
+      TOK_WHERE
+         or
+            and
+               =
+                  TOK_TABLE_OR_COL
+                     ds
+                  '2000-04-08'
+               =
+                  TOK_TABLE_OR_COL
+                     key
+                  1
+            and
+               =
+                  TOK_TABLE_OR_COL
+                     ds
+                  '2000-04-09'
+               =
+                  TOK_TABLE_OR_COL
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               key
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               value
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               ds
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: pcr_t1
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
+              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: int), value (type: string), ds (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+                  sort order: +++
+                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                  tag: -1
+                  auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [pcr_t1]
+        /pcr_t1/ds=2000-04-09 [pcr_t1]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types int:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
+order by t1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
+order by t1.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t2
+         and
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-08'
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     t2
+                  ds
+               '2000-04-08'
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 0
+                value expressions: value (type: string)
+                auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 1
+                value expressions: value (type: string)
+                auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1, t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 key (type: int)
+            1 key (type: int)
+          outputColumnNames: _col0, _col1, _col6, _col7
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
+            outputColumnNames: _col0, _col1, _col3, _col4
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1,_col3,_col4
+                    columns.types int,string,int,string
+                    escape.delim \
+                    serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col3,_col4
+              columns.types int,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col3,_col4
+                columns.types int,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-08' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:int:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
+order by t1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
+order by t1.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t2
+         and
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-08'
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     t2
+                  ds
+               '2000-04-09'
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 0
+                value expressions: value (type: string)
+                auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 1
+                value expressions: value (type: string)
+                auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1]
+        /pcr_t1/ds=2000-04-09 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 key (type: int)
+            1 key (type: int)
+          outputColumnNames: _col0, _col1, _col6, _col7
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
+            outputColumnNames: _col0, _col1, _col3, _col4
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1,_col3,_col4
+                    columns.types int,string,int,string
+                    escape.delim \
+                    serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col3,_col4
+              columns.types int,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col3,_col4
+                columns.types int,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-09' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:int:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t2
+            t2
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_WHERE
+         or
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-08'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+                  1
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-09'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               key
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               value
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               ds
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+              tag: 0
+              value expressions: key (type: int), value (type: string), ds (type: string)
+              auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              tag: 1
+              value expressions: ds (type: string), key (type: int), value (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: pcr_t2
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns ds,key,value
+              columns.comments 
+              columns.types string:int:string
+#### A masked pattern was here ####
+              name default.pcr_t2
+              numFiles 1
+              numRows 1
+              rawDataSize 18
+              serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 19
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE true
+                bucket_count -1
+                columns ds,key,value
+                columns.comments 
+                columns.types string:int:string
+#### A masked pattern was here ####
+                name default.pcr_t2
+                numFiles 1
+                numRows 1
+                rawDataSize 18
+                serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 19
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t2
+            name: default.pcr_t2
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1]
+        /pcr_t1/ds=2000-04-09 [t1]
+        /pcr_t2 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
+          Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            isSamplingPred: false
+            predicate: ((_col2) IN ('2000-04-08', '2000-04-09') and (struct(_col7,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean)
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns _col0,_col1,_col2,_col3,_col4,_col5
+                      columns.types int,string,string,string,int,string
+                      escape.delim \
+                      serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
+              sort order: +++
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col2,_col3,_col4,_col5
+              columns.types int,string,string,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col2,_col3,_col4,_col5
+                columns.types int,string,string,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:string:int:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
+order by t1.key, t1.value, t2.ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
+order by t1.key, t1.value, t2.ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t2
+            t2
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_WHERE
+         or
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     ds
+                  '2000-04-08'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  1
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     ds
+                  '2000-04-09'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               value
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               ds
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+              tag: 0
+              value expressions: key (type: int), value (type: string), ds (type: string)
+              auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              tag: 1
+              value expressions: ds (type: string), key (type: int), value (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-10
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-10
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: pcr_t2
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns ds,key,value
+              columns.comments 
+              columns.types string:int:string
+#### A masked pattern was here ####
+              name default.pcr_t2
+              numFiles 1
+              numRows 1
+              rawDataSize 18
+              serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 19
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE true
+                bucket_count -1
+                columns ds,key,value
+                columns.comments 
+                columns.types string:int:string
+#### A masked pattern was here ####
+                name default.pcr_t2
+                numFiles 1
+                numRows 1
+                rawDataSize 18
+                serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 19
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t2
+            name: default.pcr_t2
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1]
+        /pcr_t1/ds=2000-04-09 [t1]
+        /pcr_t1/ds=2000-04-10 [t1]
+        /pcr_t2 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
+          Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            isSamplingPred: false
+            predicate: (struct(_col0,_col6)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
+            Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+              Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns _col0,_col1,_col2,_col3,_col4,_col5
+                      columns.types int,string,string,string,int,string
+                      escape.delim \
+                      serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+              sort order: +++
+              Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col2,_col3,_col4,_col5
+              columns.types int,string,string,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col2,_col3,_col4,_col5
+                columns.types int,string,string,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:string:int:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table pcr_t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Output: default@pcr_t1
+POSTHOOK: query: drop table pcr_t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Output: default@pcr_t1
+PREHOOK: query: drop table pcr_t2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@pcr_t2
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: drop table pcr_t2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@pcr_t2
+POSTHOOK: Output: default@pcr_t2
+PREHOOK: query: drop table pcr_t3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t3
+POSTHOOK: type: DROPTABLE

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/ppd_transform.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out b/ql/src/test/results/clientpositive/ppd_transform.q.out
index f536767..17248e4 100644
--- a/ql/src/test/results/clientpositive/ppd_transform.q.out
+++ b/ql/src/test/results/clientpositive/ppd_transform.q.out
@@ -390,21 +390,21 @@ STAGE PLANS:
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: (_col0) IN ('a', 'b') (type: boolean)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 Filter Operator
-                  predicate: (_col0) IN ('c', 'd') (type: boolean)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out
index 5aa0df8..fb08f10 100644
--- a/ql/src/test/results/clientpositive/spark/pcr.q.out
+++ b/ql/src/test/results/clientpositive/spark/pcr.q.out
@@ -2534,16 +2534,16 @@ STAGE PLANS:
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
-                    Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
+                    Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string), ds (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                         sort order: +++
-                        Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                         tag: -1
                         auto parallelism: false
             Path -> Alias:
@@ -2648,13 +2648,13 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
 #### A masked pattern was here ####
                   NumFilesPerFileSink: 1
-                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
index a6e6e38..52a847a 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
@@ -405,21 +405,21 @@ STAGE PLANS:
                           serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                       Filter Operator
-                        predicate: (_col0) IN ('a', 'b') (type: boolean)
-                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                        predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                           table:
                               input format: org.apache.hadoop.mapred.TextInputFormat
                               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       Filter Operator
-                        predicate: (_col0) IN ('c', 'd') (type: boolean)
-                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                        predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                           table:
                               input format: org.apache.hadoop.mapred.TextInputFormat
                               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index 54003c3..c2250e6 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -45,7 +45,7 @@ STAGE PLANS:
         TableScan
           alias: alltypesorc
           Filter Operator
-            predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
+            predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
             Select Operator
               expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
               outputColumnNames: _col0, _col1, _col2

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index e8a9786..9756b0c 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -2909,7 +2909,7 @@ Stage-0
       Select Operator [SEL_2]
          outputColumnNames:["_col0"]
          Filter Operator [FIL_4]
-            predicate:(c_int) IN (-6, 6) (type: boolean)
+            predicate:((c_int = -6) or (c_int = 6)) (type: boolean)
             TableScan [TS_0]
                alias:cbo_t1
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
index 54003c3..c2250e6 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
@@ -45,7 +45,7 @@ STAGE PLANS:
         TableScan
           alias: alltypesorc
           Filter Operator
-            predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
+            predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
             Select Operator
               expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
               outputColumnNames: _col0, _col1, _col2

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
index 9e47014..73bf12d 100644
--- a/ql/src/test/results/clientpositive/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
@@ -46,19 +46,20 @@ STAGE PLANS:
             alias: alltypesorc
             Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
-              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator

[26/50] [abbrv] hive git commit: Added HIVE-11536 to errata

Posted by xu...@apache.org.

Added HIVE-11536 to errata


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b22e54ec
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b22e54ec
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b22e54ec

Branch: refs/heads/beeline-cli
Commit: b22e54ecec96d1b86fea1b53dae4c8bb68a11f9c
Parents: d597765
Author: Alan Gates <ga...@hortonworks.com>
Authored: Tue Sep 1 09:49:47 2015 -0700
Committer: Alan Gates <ga...@hortonworks.com>
Committed: Tue Sep 1 09:49:47 2015 -0700

----------------------------------------------------------------------
 errata.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b22e54ec/errata.txt
----------------------------------------------------------------------
diff --git a/errata.txt b/errata.txt
index 70992ad..e5a1748 100644
--- a/errata.txt
+++ b/errata.txt
@@ -1,4 +1,4 @@
-Commits with the wrong JIRA referenced:
+Commits with the wrong or no JIRA referenced:
 
 git commit                               branch     jira       url
 5a576b6fbf1680ab4dd8f275cad484a2614ef2c1 master     HIVE-10391 https://issues.apache.org/jira/browse/HIVE-10391
@@ -7,3 +7,4 @@ git commit                               branch     jira       url
 09100831adff7589ee48e735a4beac6ebb25cb3e master     HIVE-10885 https://issues.apache.org/jira/browse/HIVE-10885
 f3ab5fda6af57afff31c29ad048d906fd095d5fb branch-1.2 HIVE-10885 https://issues.apache.org/jira/browse/HIVE-10885
 dcf21cd6fa98fb5db01ef661bb3b9f94d9ca2d15 master     HIVE-10021 https://issues.apache.org/jira/browse/HIVE-10021
+9763c9dd31bd5939db3ca50e75bb97955b411f6d master     HIVE-11536 https://issues.apache.org/jira/browse/HIVE-11536

[41/50] [abbrv] hive git commit: HIVE-11600 : Hive Parser to Support multi col in clause (x, y..) in ((..), ..., ()) (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)

Posted by xu...@apache.org.

HIVE-11600 : Hive Parser to Support multi col in clause (x,y..) in ((..),..., ()) (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8bed378e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8bed378e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8bed378e

Branch: refs/heads/beeline-cli
Commit: 8bed378eac538a1bf1f4599b50929abedc735891
Parents: 730a404
Author: Pengcheng Xiong <px...@apache.org>
Authored: Fri Sep 4 10:13:49 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Fri Sep 4 10:13:49 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/IdentifiersParser.g    |  46 ++-
 .../TestSQL11ReservedKeyWordsNegative.java      |  32 +-
 .../TestSQL11ReservedKeyWordsPositive.java      |  23 +-
 ql/src/test/queries/clientpositive/char_udf1.q  |   9 +-
 ql/src/test/queries/clientpositive/keyword_2.q  |  14 +
 .../queries/clientpositive/multi_column_in.q    |  71 ++++
 .../test/queries/clientpositive/varchar_udf1.q  |   6 +-
 .../clientpositive/char_udf1.q.java1.7.out      |  22 +-
 .../test/results/clientpositive/keyword_2.q.out |  51 +++
 .../clientpositive/multi_column_in.q.out        | 410 +++++++++++++++++++
 .../clientpositive/varchar_udf1.q.java1.7.out   |  12 +-
 11 files changed, 663 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 64af7d1..bac0d22 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -430,24 +430,31 @@ precedenceEqualOperator
 subQueryExpression 
     : 
     LPAREN! selectStatement[true] RPAREN!     
- ;
- 
+    ;
+
 precedenceEqualExpression
     :
+    (LPAREN precedenceBitwiseOrExpression COMMA) => precedenceEqualExpressionMutiple
+    |
+    precedenceEqualExpressionSingle
+    ;
+
+precedenceEqualExpressionSingle
+    :
     (left=precedenceBitwiseOrExpression -> $left)
     (
        (KW_NOT precedenceEqualNegatableOperator notExpr=precedenceBitwiseOrExpression)
-       -> ^(KW_NOT ^(precedenceEqualNegatableOperator $precedenceEqualExpression $notExpr))
+       -> ^(KW_NOT ^(precedenceEqualNegatableOperator $precedenceEqualExpressionSingle $notExpr))
     | (precedenceEqualOperator equalExpr=precedenceBitwiseOrExpression)
-       -> ^(precedenceEqualOperator $precedenceEqualExpression $equalExpr)
+       -> ^(precedenceEqualOperator $precedenceEqualExpressionSingle $equalExpr)
     | (KW_NOT KW_IN LPAREN KW_SELECT)=>  (KW_NOT KW_IN subQueryExpression) 
-       -> ^(KW_NOT ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpression))
+       -> ^(KW_NOT ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpressionSingle))
     | (KW_NOT KW_IN expressions)
-       -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions))
+       -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionSingle expressions))
     | (KW_IN LPAREN KW_SELECT)=>  (KW_IN subQueryExpression) 
-       -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpression)
+       -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpressionSingle)
     | (KW_IN expressions)
-       -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions)
+       -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionSingle expressions)
     | ( KW_NOT KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) )
        -> ^(TOK_FUNCTION Identifier["between"] KW_TRUE $left $min $max)
     | ( KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) )
@@ -458,7 +465,22 @@ precedenceEqualExpression
 
 expressions
     :
-    LPAREN expression (COMMA expression)* RPAREN -> expression*
+    LPAREN expression (COMMA expression)* RPAREN -> expression+
+    ;
+
+//we transform the (col0, col1) in ((v00,v01),(v10,v11)) into struct(col0, col1) in (struct(v00,v01),struct(v10,v11))
+precedenceEqualExpressionMutiple
+    :
+    (LPAREN precedenceBitwiseOrExpression (COMMA precedenceBitwiseOrExpression)+ RPAREN -> ^(TOK_FUNCTION Identifier["struct"] precedenceBitwiseOrExpression+))
+    ( (KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ RPAREN)
+       -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple expressionsToStruct+)
+    | (KW_NOT KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ RPAREN)
+       -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple expressionsToStruct+)))
+    ;
+
+expressionsToStruct
+    :
+    LPAREN expression (COMMA expression)* RPAREN -> ^(TOK_FUNCTION Identifier["struct"] expression+)
     ;
 
 precedenceNotOperator
@@ -635,7 +657,7 @@ nonReserved
     | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_OFFLINE
     | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY
     | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER
-    | KW_REGEXP | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE | KW_RLIKE
+    | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE
     | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SHARED
     | KW_SHOW | KW_SHOW_DATABASE | KW_SKEWED | KW_SORT | KW_SORTED | KW_SSL | KW_STATISTICS | KW_STORED
     | KW_STREAMTABLE | KW_STRING | KW_STRUCT | KW_TABLES | KW_TBLPROPERTIES | KW_TEMPORARY | KW_TERMINATED
@@ -668,5 +690,7 @@ sql11ReservedKeywordsUsedAsIdentifier
     | KW_LEFT | KW_LIKE | KW_LOCAL | KW_NONE | KW_NULL | KW_OF | KW_ORDER | KW_OUT | KW_OUTER | KW_PARTITION 
     | KW_PERCENT | KW_PROCEDURE | KW_RANGE | KW_READS | KW_REVOKE | KW_RIGHT 
     | KW_ROLLUP | KW_ROW | KW_ROWS | KW_SET | KW_SMALLINT | KW_TABLE | KW_TIMESTAMP | KW_TO | KW_TRIGGER | KW_TRUE 
-    | KW_TRUNCATE | KW_UNION | KW_UPDATE | KW_USER | KW_USING | KW_VALUES | KW_WITH
+    | KW_TRUNCATE | KW_UNION | KW_UPDATE | KW_USER | KW_USING | KW_VALUES | KW_WITH 
+//The following two keywords come from MySQL. Although they are not keywords in SQL2011, they are reserved keywords in MySQL.    
+    | KW_REGEXP | KW_RLIKE
     ;

http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
index 61b5892..97ae0d9 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
@@ -30,7 +30,7 @@ import org.junit.Test;
 
 /**
  * Parser tests for SQL11 Reserved KeyWords. Please find more information in
- * HIVE-6617. Total number : 74
+ * HIVE-6617. Total number : 74 + 2 (MySQL)
  */
 public class TestSQL11ReservedKeyWordsNegative {
   private static HiveConf conf;
@@ -1070,4 +1070,34 @@ public class TestSQL11ReservedKeyWordsNegative {
               ex.getMessage());
     }
   }
+
+  // MySQL reserved keywords.
+  @Test
+  public void testSQL11ReservedKeyWords_RLIKE() {
+    try {
+      parse("CREATE TABLE RLIKE (col STRING)");
+      Assert.assertFalse("Expected ParseException", true);
+    } catch (ParseException ex) {
+      Assert
+          .assertEquals(
+              "Failure didn't match.",
+              "line 1:13 Failed to recognize predicate 'RLIKE'. Failed rule: 'identifier' in table name",
+              ex.getMessage());
+    }
+  }
+
+  @Test
+  public void testSQL11ReservedKeyWords_REGEXP() {
+    try {
+      parse("CREATE TABLE REGEXP (col STRING)");
+      Assert.assertFalse("Expected ParseException", true);
+    } catch (ParseException ex) {
+      Assert
+          .assertEquals(
+              "Failure didn't match.",
+              "line 1:13 Failed to recognize predicate 'REGEXP'. Failed rule: 'identifier' in table name",
+              ex.getMessage());
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsPositive.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsPositive.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsPositive.java
index 4c84e91..2a68899 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsPositive.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsPositive.java
@@ -30,7 +30,7 @@ import org.junit.Test;
 
 /**
  * Parser tests for SQL11 Reserved KeyWords. Please find more information in
- * HIVE-6617. Total number : 74
+ * HIVE-6617. Total number : 74 + 2 (MySQL)
  */
 public class TestSQL11ReservedKeyWordsPositive {
   private static HiveConf conf;
@@ -798,4 +798,25 @@ public class TestSQL11ReservedKeyWordsPositive {
             "(TOK_CREATETABLE (TOK_TABNAME WITH) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL col TOK_STRING)))",
             ast.toStringTree());
   }
+
+  // MySQL reserved keywords.
+  @Test
+  public void testSQL11ReservedKeyWords_RLIKE() throws ParseException {
+    ASTNode ast = parse("CREATE TABLE RLIKE (col STRING)");
+    Assert
+        .assertEquals(
+            "AST doesn't match",
+            "(TOK_CREATETABLE (TOK_TABNAME RLIKE) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL col TOK_STRING)))",
+            ast.toStringTree());
+  }
+
+  @Test
+  public void testSQL11ReservedKeyWords_REGEXP() throws ParseException {
+    ASTNode ast = parse("CREATE TABLE REGEXP (col STRING)");
+    Assert
+        .assertEquals(
+            "AST doesn't match",
+            "(TOK_CREATETABLE (TOK_TABNAME REGEXP) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL col TOK_STRING)))",
+            ast.toStringTree());
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/test/queries/clientpositive/char_udf1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/char_udf1.q b/ql/src/test/queries/clientpositive/char_udf1.q
index 8848609..09012b4 100644
--- a/ql/src/test/queries/clientpositive/char_udf1.q
+++ b/ql/src/test/queries/clientpositive/char_udf1.q
@@ -74,10 +74,13 @@ select
   ltrim(c2) = ltrim(c4)
 from char_udf_1 limit 1;
 
+-- In hive wiki page https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
+-- we only allow A regexp B, not regexp (A,B).
+
 select
-  regexp(c2, 'val'),
-  regexp(c4, 'val'),
-  regexp(c2, 'val') = regexp(c4, 'val')
+  c2 regexp 'val',
+  c4 regexp 'val',
+  (c2 regexp 'val') = (c4 regexp 'val')
 from char_udf_1 limit 1;
 
 select

http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/test/queries/clientpositive/keyword_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/keyword_2.q b/ql/src/test/queries/clientpositive/keyword_2.q
new file mode 100644
index 0000000..054e26a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/keyword_2.q
@@ -0,0 +1,14 @@
+set hive.support.sql11.reserved.keywords=false;
+drop table varchar_udf_1;
+
+create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20));
+insert overwrite table varchar_udf_1
+  select key, value, key, value from src where key = '238' limit 1;
+
+select
+  regexp(c2, 'val'),
+  regexp(c4, 'val'),
+  regexp(c2, 'val') = regexp(c4, 'val')
+from varchar_udf_1 limit 1;
+
+drop table varchar_udf_1;

http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/test/queries/clientpositive/multi_column_in.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_column_in.q b/ql/src/test/queries/clientpositive/multi_column_in.q
new file mode 100644
index 0000000..18a56cc
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/multi_column_in.q
@@ -0,0 +1,71 @@
+drop table emps;
+
+create table emps (empno int, deptno int, empname string);
+
+insert into table emps values (1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22");
+
+select * from emps;
+
+select * from emps where (int(empno+deptno/2), int(deptno/3)) in ((2,0),(3,2));
+
+select * from emps where (int(empno+deptno/2), int(deptno/3)) not in ((2,0),(3,2));
+
+select * from emps where (empno,deptno) in ((1,2),(3,2));
+
+select * from emps where (empno,deptno) not in ((1,2),(3,2));
+
+select * from emps where (empno,deptno) in ((1,2),(1,3));
+
+select * from emps where (empno,deptno) not in ((1,2),(1,3));
+
+explain
+select * from emps where (empno+1,deptno) in ((1,2),(3,2));
+
+explain 
+select * from emps where (empno+1,deptno) not in ((1,2),(3,2));
+
+select * from emps where empno in (1,2);
+
+select * from emps where empno in (1,2) and deptno > 2;
+
+select * from emps where (empno) in (1,2) and deptno > 2;
+
+select * from emps where ((empno) in (1,2) and deptno > 2);
+
+explain select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2));
+
+select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2));
+
+select (empno*2)|1,substr(empname,1,1) from emps;
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+2,'2'));
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) not in ((empno+1,'2'),(empno+2,'2'));
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'));
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) not in ((empno+1,'2'),(empno+3,'2'));
+
+
+select sum(empno), empname from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+group by empname;
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+union
+select * from emps where (empno,deptno) in ((1,2),(3,2));
+
+drop view v;
+
+create view v as 
+select * from(
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+union
+select * from emps where (empno,deptno) in ((1,2),(3,2)))subq order by empno desc;
+
+select * from v;
+
+select subq.e1 from 
+(select (empno*2)|1 as e1, substr(empname,1,1) as n1 from emps)subq
+join
+(select empno as e2 from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2')))subq2
+on e1=e2+1;

http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/test/queries/clientpositive/varchar_udf1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/varchar_udf1.q b/ql/src/test/queries/clientpositive/varchar_udf1.q
index 395fb12..ff40b31 100644
--- a/ql/src/test/queries/clientpositive/varchar_udf1.q
+++ b/ql/src/test/queries/clientpositive/varchar_udf1.q
@@ -75,9 +75,9 @@ select
 from varchar_udf_1 limit 1;
 
 select
-  regexp(c2, 'val'),
-  regexp(c4, 'val'),
-  regexp(c2, 'val') = regexp(c4, 'val')
+  c2 regexp 'val',
+  c4 regexp 'val',
+  (c2 regexp 'val') = (c4 regexp 'val')
 from varchar_udf_1 limit 1;
 
 select

http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out b/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
index ced0132..bfed116 100644
--- a/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
@@ -219,18 +219,24 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@char_udf_1
 #### A masked pattern was here ####
 val_238	val_238	true
-PREHOOK: query: select
-  regexp(c2, 'val'),
-  regexp(c4, 'val'),
-  regexp(c2, 'val') = regexp(c4, 'val')
+PREHOOK: query: -- In hive wiki page https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
+-- we only allow A regexp B, not regexp (A,B).
+
+select
+  c2 regexp 'val',
+  c4 regexp 'val',
+  (c2 regexp 'val') = (c4 regexp 'val')
 from char_udf_1 limit 1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@char_udf_1
 #### A masked pattern was here ####
-POSTHOOK: query: select
-  regexp(c2, 'val'),
-  regexp(c4, 'val'),
-  regexp(c2, 'val') = regexp(c4, 'val')
+POSTHOOK: query: -- In hive wiki page https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
+-- we only allow A regexp B, not regexp (A,B).
+
+select
+  c2 regexp 'val',
+  c4 regexp 'val',
+  (c2 regexp 'val') = (c4 regexp 'val')
 from char_udf_1 limit 1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@char_udf_1

http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/test/results/clientpositive/keyword_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/keyword_2.q.out b/ql/src/test/results/clientpositive/keyword_2.q.out
new file mode 100644
index 0000000..3d248fb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/keyword_2.q.out
@@ -0,0 +1,51 @@
+PREHOOK: query: drop table varchar_udf_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table varchar_udf_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@varchar_udf_1
+POSTHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@varchar_udf_1
+PREHOOK: query: insert overwrite table varchar_udf_1
+  select key, value, key, value from src where key = '238' limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@varchar_udf_1
+POSTHOOK: query: insert overwrite table varchar_udf_1
+  select key, value, key, value from src where key = '238' limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@varchar_udf_1
+POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select
+  regexp(c2, 'val'),
+  regexp(c4, 'val'),
+  regexp(c2, 'val') = regexp(c4, 'val')
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+POSTHOOK: query: select
+  regexp(c2, 'val'),
+  regexp(c4, 'val'),
+  regexp(c2, 'val') = regexp(c4, 'val')
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+true	true	true
+PREHOOK: query: drop table varchar_udf_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_udf_1
+PREHOOK: Output: default@varchar_udf_1
+POSTHOOK: query: drop table varchar_udf_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_udf_1
+POSTHOOK: Output: default@varchar_udf_1

http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/test/results/clientpositive/multi_column_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/multi_column_in.q.out b/ql/src/test/results/clientpositive/multi_column_in.q.out
new file mode 100644
index 0000000..e0ec848
--- /dev/null
+++ b/ql/src/test/results/clientpositive/multi_column_in.q.out
@@ -0,0 +1,410 @@
+PREHOOK: query: drop table emps
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table emps
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table emps (empno int, deptno int, empname string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emps
+POSTHOOK: query: create table emps (empno int, deptno int, empname string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emps
+PREHOOK: query: insert into table emps values (1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@emps
+POSTHOOK: query: insert into table emps values (1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@emps
+POSTHOOK: Lineage: emps.deptno EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: emps.empname SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: emps.empno EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: select * from emps
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	2	11
+1	2	11
+3	4	33
+1	3	11
+2	5	22
+2	5	22
+PREHOOK: query: select * from emps where (int(empno+deptno/2), int(deptno/3)) in ((2,0),(3,2))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where (int(empno+deptno/2), int(deptno/3)) in ((2,0),(3,2))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	2	11
+1	2	11
+PREHOOK: query: select * from emps where (int(empno+deptno/2), int(deptno/3)) not in ((2,0),(3,2))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where (int(empno+deptno/2), int(deptno/3)) not in ((2,0),(3,2))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+3	4	33
+1	3	11
+2	5	22
+2	5	22
+PREHOOK: query: select * from emps where (empno,deptno) in ((1,2),(3,2))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where (empno,deptno) in ((1,2),(3,2))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	2	11
+1	2	11
+PREHOOK: query: select * from emps where (empno,deptno) not in ((1,2),(3,2))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where (empno,deptno) not in ((1,2),(3,2))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+3	4	33
+1	3	11
+2	5	22
+2	5	22
+PREHOOK: query: select * from emps where (empno,deptno) in ((1,2),(1,3))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where (empno,deptno) in ((1,2),(1,3))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	2	11
+1	2	11
+1	3	11
+PREHOOK: query: select * from emps where (empno,deptno) not in ((1,2),(1,3))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where (empno,deptno) not in ((1,2),(1,3))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+3	4	33
+2	5	22
+2	5	22
+PREHOOK: query: explain
+select * from emps where (empno+1,deptno) in ((1,2),(3,2))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from emps where (empno+1,deptno) in ((1,2),(3,2))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: emps
+            Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (struct((empno + 1),deptno)) IN (const struct(1,2), const struct(3,2)) (type: boolean)
+              Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: empno (type: int), deptno (type: int), empname (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain 
+select * from emps where (empno+1,deptno) not in ((1,2),(3,2))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select * from emps where (empno+1,deptno) not in ((1,2),(3,2))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: emps
+            Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (not (struct((empno + 1),deptno)) IN (const struct(1,2), const struct(3,2))) (type: boolean)
+              Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: empno (type: int), deptno (type: int), empname (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from emps where empno in (1,2)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where empno in (1,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	2	11
+1	2	11
+1	3	11
+2	5	22
+2	5	22
+PREHOOK: query: select * from emps where empno in (1,2) and deptno > 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where empno in (1,2) and deptno > 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	3	11
+2	5	22
+2	5	22
+PREHOOK: query: select * from emps where (empno) in (1,2) and deptno > 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where (empno) in (1,2) and deptno > 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	3	11
+2	5	22
+2	5	22
+PREHOOK: query: select * from emps where ((empno) in (1,2) and deptno > 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where ((empno) in (1,2) and deptno > 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	3	11
+2	5	22
+2	5	22
+PREHOOK: query: explain select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: emps
+            Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (struct(((empno * 2) | 1),deptno)) IN (struct((empno + 1),2), struct((empno + 2),2)) (type: boolean)
+              Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: empno (type: int), deptno (type: int), empname (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	2	11
+1	2	11
+PREHOOK: query: select (empno*2)|1,substr(empname,1,1) from emps
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select (empno*2)|1,substr(empname,1,1) from emps
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+3	1
+3	1
+7	3
+3	1
+5	2
+5	2
+PREHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+2,'2'))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+2,'2'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+PREHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) not in ((empno+1,'2'),(empno+2,'2'))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) not in ((empno+1,'2'),(empno+2,'2'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	2	11
+1	2	11
+3	4	33
+1	3	11
+2	5	22
+2	5	22
+PREHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+2	5	22
+2	5	22
+PREHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) not in ((empno+1,'2'),(empno+3,'2'))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) not in ((empno+1,'2'),(empno+3,'2'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	2	11
+1	2	11
+3	4	33
+1	3	11
+PREHOOK: query: select sum(empno), empname from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+group by empname
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(empno), empname from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+group by empname
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+4	22
+PREHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+union
+select * from emps where (empno,deptno) in ((1,2),(3,2))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+union
+select * from emps where (empno,deptno) in ((1,2),(3,2))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+1	2	11
+2	5	22
+PREHOOK: query: drop view v
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view v
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: create view v as 
+select * from(
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+union
+select * from emps where (empno,deptno) in ((1,2),(3,2)))subq order by empno desc
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@emps
+PREHOOK: Output: database:default
+PREHOOK: Output: default@v
+POSTHOOK: query: create view v as 
+select * from(
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2'))
+union
+select * from emps where (empno,deptno) in ((1,2),(3,2)))subq order by empno desc
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@emps
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@v
+PREHOOK: query: select * from v
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+PREHOOK: Input: default@v
+#### A masked pattern was here ####
+POSTHOOK: query: select * from v
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+POSTHOOK: Input: default@v
+#### A masked pattern was here ####
+2	5	22
+1	2	11
+PREHOOK: query: select subq.e1 from 
+(select (empno*2)|1 as e1, substr(empname,1,1) as n1 from emps)subq
+join
+(select empno as e2 from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2')))subq2
+on e1=e2+1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps
+#### A masked pattern was here ####
+POSTHOOK: query: select subq.e1 from 
+(select (empno*2)|1 as e1, substr(empname,1,1) as n1 from emps)subq
+join
+(select empno as e2 from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2')))subq2
+on e1=e2+1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps
+#### A masked pattern was here ####
+3
+3
+3
+3
+3
+3

http://git-wip-us.apache.org/repos/asf/hive/blob/8bed378e/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out b/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
index 96ba06e..853bc4a 100644
--- a/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
@@ -220,17 +220,17 @@ POSTHOOK: Input: default@varchar_udf_1
 #### A masked pattern was here ####
 val_238	val_238	true
 PREHOOK: query: select
-  regexp(c2, 'val'),
-  regexp(c4, 'val'),
-  regexp(c2, 'val') = regexp(c4, 'val')
+  c2 regexp 'val',
+  c4 regexp 'val',
+  (c2 regexp 'val') = (c4 regexp 'val')
 from varchar_udf_1 limit 1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@varchar_udf_1
 #### A masked pattern was here ####
 POSTHOOK: query: select
-  regexp(c2, 'val'),
-  regexp(c4, 'val'),
-  regexp(c2, 'val') = regexp(c4, 'val')
+  c2 regexp 'val',
+  c4 regexp 'val',
+  (c2 regexp 'val') = (c4 regexp 'val')
 from varchar_udf_1 limit 1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@varchar_udf_1

[24/50] [abbrv] hive git commit: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive

Posted by xu...@apache.org.

Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9763c9dd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9763c9dd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9763c9dd

Branch: refs/heads/beeline-cli
Commit: 9763c9dd31bd5939db3ca50e75bb97955b411f6d
Parents: da95f63 8f930e5
Author: Dmitry Tolpeko <dm...@gmail.com>
Authored: Tue Sep 1 05:00:33 2015 -0700
Committer: Dmitry Tolpeko <dm...@gmail.com>
Committed: Tue Sep 1 05:00:33 2015 -0700

----------------------------------------------------------------------
 .../java/org/apache/hive/beeline/BeeLine.java   |   13 +-
 .../org/apache/hive/beeline/BeeLineOpts.java    |   12 +-
 .../org/apache/hive/beeline/HiveSchemaTool.java |   14 +-
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   76 +-
 data/conf/tez/hive-site.xml                     |    9 +
 .../hive/hbase/HiveHBaseInputFormatUtil.java    |   50 +-
 .../queries/positive/hbase_null_first_col.q     |   22 +
 .../results/positive/hbase_null_first_col.q.out |  109 ++
 .../vectorization/VectorizationBench.java       |   93 +
 .../org/apache/hive/jdbc/HiveConnection.java    |   30 +-
 jdbc/src/java/org/apache/hive/jdbc/Utils.java   |  117 +-
 .../hive/jdbc/ZooKeeperHiveClientHelper.java    |  104 +-
 .../hadoop/hive/metastore/HiveAlterHandler.java |    2 +-
 .../hadoop/hive/metastore/HiveMetaStore.java    |    6 +-
 .../hive/metastore/MetaStoreDirectSql.java      |   40 +-
 .../hadoop/hive/metastore/MetaStoreUtils.java   |   17 +-
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |   61 +-
 .../hadoop/hive/ql/exec/ScriptOperator.java     |   85 +-
 .../hive/ql/exec/tez/TezSessionState.java       |    2 +
 .../ql/exec/vector/VectorHashKeyWrapper.java    |    2 +-
 .../ql/exec/vector/VectorizationContext.java    |    7 +-
 .../expressions/CastStringGroupToString.java    |   40 +
 .../ql/exec/vector/expressions/ColAndCol.java   |   34 +-
 .../ql/exec/vector/expressions/ColOrCol.java    |   42 +-
 .../exec/vector/expressions/CuckooSetBytes.java |    2 +-
 .../hive/ql/exec/vector/expressions/NotCol.java |   14 +-
 ...VectorMapJoinInnerBigOnlyStringOperator.java |    4 +-
 .../VectorMapJoinInnerStringOperator.java       |    4 +-
 .../VectorMapJoinLeftSemiStringOperator.java    |    4 +-
 .../VectorMapJoinOuterStringOperator.java       |    4 +-
 .../apache/hadoop/hive/ql/io/orc/FileDump.java  |   48 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |   18 +-
 .../apache/hadoop/hive/ql/io/orc/Reader.java    |    6 +
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       |  281 ++-
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |    2 -
 .../hive/ql/io/orc/TreeReaderFactory.java       |   18 +-
 .../hive/ql/io/parquet/LeafFilterFactory.java   |   43 +-
 .../read/ParquetFilterPredicateConverter.java   |   35 +-
 .../hive/ql/io/sarg/ConvertAstToSearchArg.java  |    3 -
 .../hadoop/hive/ql/lib/DefaultGraphWalker.java  |   80 +-
 .../hadoop/hive/ql/lib/ForwardWalker.java       |   33 +-
 .../ql/metadata/SessionHiveMetaStoreClient.java |    2 +-
 .../hadoop/hive/ql/optimizer/ColumnPruner.java  |    6 +-
 .../hive/ql/optimizer/ConstantPropagate.java    |   10 +-
 .../optimizer/ConstantPropagateProcFactory.java |  100 +-
 .../hadoop/hive/ql/optimizer/IndexUtils.java    |   13 +-
 .../hadoop/hive/ql/optimizer/Optimizer.java     |   17 +-
 .../hive/ql/optimizer/PointLookupOptimizer.java |  102 +-
 .../calcite/translator/HiveOpConverter.java     |   53 +-
 .../ql/parse/ColumnStatsSemanticAnalyzer.java   |    6 +-
 .../hadoop/hive/ql/parse/LeadLagInfo.java       |    4 +-
 .../hive/ql/parse/LoadSemanticAnalyzer.java     |   38 +-
 .../hadoop/hive/ql/plan/ExprNodeDesc.java       |   23 +-
 .../hadoop/hive/ql/ppd/ExprWalkerInfo.java      |  136 +-
 .../hive/ql/ppd/ExprWalkerProcFactory.java      |   92 +-
 .../hadoop/hive/ql/ppd/OpProcFactory.java       |   11 +-
 .../hadoop/hive/ql/udf/generic/GenericUDF.java  |   14 +-
 .../ql/udf/generic/GenericUDFBaseNumeric.java   |    4 +-
 .../hive/ql/udf/generic/GenericUDFBasePad.java  |    8 +-
 .../hive/ql/udf/generic/GenericUDFNvl.java      |    2 +-
 .../hive/ql/udf/generic/GenericUDFOPAnd.java    |    4 +
 .../hive/ql/udf/generic/GenericUDFOPEqual.java  |    4 +
 .../generic/GenericUDFOPEqualOrGreaterThan.java |    4 +
 .../generic/GenericUDFOPEqualOrLessThan.java    |    4 +
 .../ql/udf/generic/GenericUDFOPGreaterThan.java |    4 +
 .../ql/udf/generic/GenericUDFOPLessThan.java    |    4 +
 .../ql/udf/generic/GenericUDFOPNotEqual.java    |    5 +
 .../ql/udf/generic/GenericUDFOPNotNull.java     |    4 +
 .../hive/ql/udf/generic/GenericUDFOPNull.java   |    4 +
 .../hive/ql/udf/generic/GenericUDFOPOr.java     |    4 +
 ql/src/main/resources/tez-container-log4j2.xml  |   49 +
 .../apache/hadoop/hive/ql/TestTxnCommands2.java |   85 +-
 .../hadoop/hive/ql/exec/TestOperators.java      |   16 +
 .../hive/ql/io/orc/TestInputOutputFormat.java   |    4 +-
 .../hadoop/hive/ql/io/orc/TestOrcFile.java      |   10 +-
 .../hive/ql/io/orc/TestRecordReaderImpl.java    |   42 +-
 .../parquet/TestParquetRecordReaderWrapper.java |   50 +-
 .../read/TestParquetFilterPredicate.java        |   27 +-
 .../ql/io/sarg/TestConvertAstToSearchArg.java   |  128 +-
 .../hive/ql/io/sarg/TestSearchArgumentImpl.java |   22 +-
 .../queries/clientnegative/load_orc_negative3.q |    6 +
 .../queries/clientnegative/nvl_mismatch_type.q  |   20 +
 .../clientpositive/cbo_rp_outer_join_ppr.q      |   40 +
 .../clientpositive/columnstats_quoting.q        |    8 +
 .../queries/clientpositive/flatten_and_or.q     |    4 +-
 .../queries/clientpositive/folder_predicate.q   |   32 +
 .../test/queries/clientpositive/load_orc_part.q |    4 +
 .../clientpositive/parquet_ppd_partition.q      |    9 +
 .../clientpositive/parquet_predicate_pushdown.q |  297 +++-
 .../test/queries/clientpositive/pointlookup.q   |   59 +
 .../test/queries/clientpositive/pointlookup2.q  |   51 +
 .../queries/clientpositive/selectDistinctStar.q |    2 +
 ql/src/test/queries/clientpositive/structin.q   |    6 +
 .../clientpositive/unionall_unbalancedppd.q     |    3 +
 .../clientpositive/vector_cast_constant.q       |    4 +-
 .../queries/clientpositive/vectorized_casts.q   |    6 +
 .../clientnegative/char_pad_convert_fail0.q.out |    2 +-
 .../clientnegative/char_pad_convert_fail1.q.out |    2 +-
 .../clientnegative/char_pad_convert_fail3.q.out |    2 +-
 .../clientnegative/load_orc_negative3.q.out     |   25 +
 .../clientnegative/nvl_mismatch_type.q.out      |   43 +
 .../alter_partition_coltype.q.out               |   12 +-
 .../clientpositive/annotate_stats_filter.q.out  |   18 +-
 .../cbo_rp_outer_join_ppr.q.java1.7.out         |  855 +++++++++
 .../clientpositive/columnstats_quoting.q.out    |  114 ++
 .../results/clientpositive/decimal_udf.q.out    |   18 +-
 .../clientpositive/filter_cond_pushdown.q.out   |   32 +-
 .../clientpositive/filter_join_breaktask.q.out  |   12 +-
 .../results/clientpositive/flatten_and_or.q.out |    8 +-
 .../test/results/clientpositive/fold_when.q.out |   16 +-
 .../clientpositive/folder_predicate.q.out       |  368 ++++
 .../clientpositive/input_testxpath2.q.out       |    2 +-
 .../list_bucket_query_oneskew_3.q.out           |    6 +-
 .../results/clientpositive/load_orc_part.q.out  |   18 +
 .../clientpositive/parquet_ppd_partition.q.out  |   47 +
 .../parquet_predicate_pushdown.q.out            | 1309 +++++++++++++-
 ql/src/test/results/clientpositive/pcr.q.out    |   12 +-
 .../results/clientpositive/pointlookup.q.out    |  198 +++
 .../results/clientpositive/pointlookup2.q.out   | 1647 ++++++++++++++++++
 .../results/clientpositive/ppd_transform.q.out  |   12 +-
 .../clientpositive/rand_partitionpruner3.q.out  |   12 +-
 .../clientpositive/selectDistinctStar.q.out     |    8 +-
 .../clientpositive/select_unquote_not.q.out     |    8 +-
 .../spark/filter_join_breaktask.q.out           |   12 +-
 .../test/results/clientpositive/spark/pcr.q.out |   12 +-
 .../clientpositive/spark/ppd_transform.q.out    |   12 +-
 .../spark/vector_cast_constant.q.java1.7.out    |   41 +-
 .../clientpositive/spark/vectorized_case.q.out  |    2 +-
 .../test/results/clientpositive/structin.q.out  |   44 +
 .../clientpositive/tez/explainuser_1.q.out      |    2 +-
 .../tez/filter_join_breaktask.q.out             |   12 +-
 .../clientpositive/tez/selectDistinctStar.q.out |    8 +-
 .../tez/vector_cast_constant.q.java1.7.out      |   21 +-
 .../tez/vector_char_mapjoin1.q.out              |    1 +
 .../clientpositive/tez/vector_decimal_udf.q.out |   24 +-
 .../tez/vector_varchar_mapjoin1.q.out           |    1 +
 .../clientpositive/tez/vectorized_case.q.out    |    2 +-
 .../clientpositive/tez/vectorized_casts.q.out   |   99 +-
 .../clientpositive/udf_isnull_isnotnull.q.out   |    2 +-
 .../test/results/clientpositive/udf_size.q.out  |    2 +-
 .../clientpositive/unionall_unbalancedppd.q.out |   14 +-
 .../vector_cast_constant.q.java1.7.out          |   32 +-
 .../clientpositive/vector_char_mapjoin1.q.out   |    1 +
 .../clientpositive/vector_decimal_udf.q.out     |   24 +-
 .../vector_varchar_mapjoin1.q.out               |    1 +
 .../clientpositive/vectorized_case.q.out        |    9 +-
 .../clientpositive/vectorized_casts.q.out       |   66 +-
 .../serde2/avro/AvroLazyObjectInspector.java    |   19 +-
 .../hadoop/hive/serde2/avro/InstanceCache.java  |   17 +-
 .../objectinspector/ObjectInspectorFactory.java |   61 +-
 .../ReflectionStructObjectInspector.java        |   60 +-
 .../StandardStructObjectInspector.java          |    7 +-
 .../ThriftUnionObjectInspector.java             |   28 +-
 .../avro/TestAvroLazyObjectInspector.java       |   59 +
 .../TestReflectionObjectInspectors.java         |   71 +-
 .../apache/hive/service/server/HiveServer2.java |   74 +-
 shims/0.20S/pom.xml                             |    8 +-
 .../hadoop/hive/shims/Hadoop20SShims.java       |   35 +-
 shims/0.23/pom.xml                              |   21 +-
 .../apache/hadoop/hive/shims/Hadoop23Shims.java |   79 +-
 .../apache/hadoop/hive/shims/HadoopShims.java   |   22 +
 .../hadoop/hive/shims/HadoopShimsSecure.java    |   32 +
 .../hadoop/hive/ql/io/sarg/PredicateLeaf.java   |    3 +-
 163 files changed, 7857 insertions(+), 1241 deletions(-)
----------------------------------------------------------------------

[46/50] [abbrv] hive git commit: HIVE-11646: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix multiple window spec for PTF operator (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

Posted by xu...@apache.org.

HIVE-11646: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix multiple window spec for PTF operator (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2d3316b9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2d3316b9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2d3316b9

Branch: refs/heads/beeline-cli
Commit: 2d3316b9154cb175b6db15f2ec4551d2d54397d6
Parents: 7aec272
Author: Pengcheng Xiong <px...@hortonworks.com>
Authored: Tue Sep 8 11:33:44 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Tue Sep 8 11:33:44 2015 +0100

----------------------------------------------------------------------
 .../calcite/translator/ExprNodeConverter.java   |   25 +-
 .../calcite/translator/HiveOpConverter.java     |    5 +-
 .../queries/clientpositive/cbo_rp_windowing_2.q |  439 ++++
 .../clientpositive/cbo_rp_windowing_2.q.out     | 2338 ++++++++++++++++++
 4 files changed, 2798 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2d3316b9/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
index ec22f1a..42f1ab6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
 import java.math.BigDecimal;
 import java.sql.Date;
 import java.sql.Timestamp;
+import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.LinkedList;
 import java.util.List;
@@ -80,11 +81,11 @@ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> {
   private final String             tabAlias;
   private final String             columnAlias;
   private final RelDataType        inputRowType;
-  private final RelDataType        outputRowType;
   private final ImmutableSet<Integer>       inputVCols;
-  private WindowFunctionSpec wfs;
+  private List<WindowFunctionSpec> windowFunctionSpecs = new ArrayList<>();
   private final RelDataTypeFactory dTFactory;
   protected final Log LOG = LogFactory.getLog(this.getClass().getName());
+  private static long uniqueCounter = 0;
 
   public ExprNodeConverter(String tabAlias, RelDataType inputRowType,
       Set<Integer> vCols, RelDataTypeFactory dTFactory) {
@@ -97,13 +98,12 @@ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> {
     this.tabAlias = tabAlias;
     this.columnAlias = columnAlias;
     this.inputRowType = inputRowType;
-    this.outputRowType = outputRowType;
     this.inputVCols = ImmutableSet.copyOf(inputVCols);
     this.dTFactory = dTFactory;
   }
 
-  public WindowFunctionSpec getWindowFunctionSpec() {
-    return this.wfs;
+  public List<WindowFunctionSpec> getWindowFunctionSpec() {
+    return this.windowFunctionSpecs;
   }
 
   @Override
@@ -235,7 +235,7 @@ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> {
     final WindowFrameSpec windowFrameSpec = getWindowRange(window);
     windowSpec.setWindowFrame(windowFrameSpec);
 
-    wfs = new WindowFunctionSpec();
+    WindowFunctionSpec wfs = new WindowFunctionSpec();
     wfs.setWindowSpec(windowSpec);
     final Schema schema = new Schema(tabAlias, inputRowType.getFieldList());
     final ASTNode wUDAFAst = new ASTConverter.RexVisitor(schema).visitOver(over);
@@ -246,10 +246,15 @@ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> {
       ASTNode child = (ASTNode) wUDAFAst.getChild(i);
       wfs.addArg(child);
     }
+    if (wUDAFAst.getText().equals("TOK_FUNCTIONSTAR")) {
+      wfs.setStar(true);
+    }
+    String columnAlias = getWindowColumnAlias();
     wfs.setAlias(columnAlias);
 
-    RelDataTypeField f = outputRowType.getField(columnAlias, false, false);
-    return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), columnAlias, tabAlias,
+    this.windowFunctionSpecs.add(wfs);
+
+    return new ExprNodeColumnDesc(TypeConverter.convert(over.getType()), columnAlias, tabAlias,
             false);
   }
 
@@ -343,4 +348,8 @@ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> {
     return boundarySpec;
   }
 
+  private String getWindowColumnAlias() {
+    return "$win$_col_" + (uniqueCounter++);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/2d3316b9/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
index 9391952..3f66893 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
@@ -81,6 +81,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.parse.UnparseTranslator;
 import org.apache.hadoop.hive.ql.parse.WindowingComponentizer;
 import org.apache.hadoop.hive.ql.parse.WindowingSpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
@@ -284,7 +285,9 @@ public class HiveOpConverter {
       exprCols.add(exprCol);
       //TODO: Cols that come through PTF should it retain (VirtualColumness)?
       if (converter.getWindowFunctionSpec() != null) {
-        windowingSpec.addWindowFunction(converter.getWindowFunctionSpec());
+        for (WindowFunctionSpec wfs : converter.getWindowFunctionSpec()) {
+          windowingSpec.addWindowFunction(wfs);
+        }
       }
     }
     if (windowingSpec.getWindowExpressions() != null

http://git-wip-us.apache.org/repos/asf/hive/blob/2d3316b9/ql/src/test/queries/clientpositive/cbo_rp_windowing_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_windowing_2.q b/ql/src/test/queries/clientpositive/cbo_rp_windowing_2.q
new file mode 100644
index 0000000..d02c8be
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_windowing_2.q
@@ -0,0 +1,439 @@
+set hive.cbo.enable=true;
+set hive.cbo.returnpath.hiveop=true;
+set hive.exec.check.crossproducts=false;
+set mapred.reduce.tasks=4;
+-- SORT_QUERY_RESULTS
+
+-- 1. testWindowing
+select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1
+from part
+;
+
+-- 2. testGroupByWithPartitioning
+select p_mfgr, p_name, p_size, 
+min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name)as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+;
+       
+-- 3. testGroupByHavingWithSWQ
+select p_mfgr, p_name, p_size, min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+having p_size > 0
+;
+
+-- 4. testCount
+select p_mfgr, p_name, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd 
+from part 
+;
+
+-- 5. testCountWithWindowingUDAF
+select p_mfgr, p_name, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd, 
+p_retailprice, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1, 
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz 
+from part 
+;
+
+-- 6. testCountInSubQ
+select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz 
+from (select p_mfgr, p_name, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd, 
+p_retailprice, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1, 
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz 
+from part 
+) sub1;
+
+-- 7. testJoinWithWindowingAndPTF
+select abc.p_mfgr, abc.p_name, 
+rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, 
+dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, 
+abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, 
+abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz 
+from noop(on part 
+partition by p_mfgr 
+order by p_name 
+) abc join part p1 on abc.p_partkey = p1.p_partkey 
+;
+
+-- 8. testMixedCaseAlias
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R
+from part 
+;
+
+-- 9. testHavingWithWindowingNoGBY
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row)  as s1
+from part  
+; 
+
+-- 10. testHavingWithWindowingCondRankNoGBY
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 
+from part 
+;
+
+-- 11. testFirstLast   
+select  p_mfgr,p_name, p_size, 
+sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, 
+first_value(p_size) over w1  as f, 
+last_value(p_size, false) over w1  as l 
+from part 
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
+
+-- 12. testFirstLastWithWhere
+select  p_mfgr,p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, 
+first_value(p_size) over w1 as f,  
+last_value(p_size, false) over w1 as l 
+from part 
+where p_mfgr = 'Manufacturer#3'  
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
+
+-- 13. testSumWindow
+select  p_mfgr,p_name, p_size,  
+sum(p_size) over w1 as s1, 
+sum(p_size) over (distribute by p_mfgr  sort by p_name rows between current row and current row)  as s2 
+from part 
+window w1 as (distribute by p_mfgr  sort by p_name rows between 2 preceding and 2 following);
+
+-- 14. testNoSortClause
+select  p_mfgr,p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr 
+from part  
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
+
+-- 15. testExpressions
+select  p_mfgr,p_name, p_size,  
+rank() over(distribute by p_mfgr sort by p_name) as r,  
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+cume_dist() over(distribute by p_mfgr sort by p_name) as cud, 
+percent_rank() over(distribute by p_mfgr sort by p_name) as pr, 
+ntile(3) over(distribute by p_mfgr sort by p_name) as nt, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as ca, 
+avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, 
+stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, 
+first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, 
+last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, 
+first_value(p_size) over w1  as fvW1
+from part 
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
+
+-- 16. testMultipleWindows
+select  p_mfgr,p_name, p_size,  
+  rank() over(distribute by p_mfgr sort by p_name) as r, 
+  dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+cume_dist() over(distribute by p_mfgr sort by p_name) as cud, 
+sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, 
+sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, 
+first_value(p_size) over w1  as fv1
+from part 
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
+
+-- 17. testCountStar
+select  p_mfgr,p_name, p_size,
+count(*) over(distribute by p_mfgr sort by p_name ) as c, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as ca, 
+first_value(p_size) over w1  as fvW1
+from part 
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
+
+-- 18. testUDAFs
+select  p_mfgr,p_name, p_size, 
+sum(p_retailprice) over w1 as s, 
+min(p_retailprice) over w1 as mi,
+max(p_retailprice) over w1 as ma,
+avg(p_retailprice) over w1 as ag
+from part
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
+
+-- 19. testUDAFsWithGBY
+select  p_mfgr,p_name, p_size, p_retailprice, 
+sum(p_retailprice) over w1 as s, 
+min(p_retailprice) as mi ,
+max(p_retailprice) as ma ,
+avg(p_retailprice) over w1 as ag
+from part
+group by p_mfgr,p_name, p_size, p_retailprice
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
+
+-- 20. testSTATs
+select  p_mfgr,p_name, p_size, 
+stddev(p_retailprice) over w1 as sdev, 
+stddev_pop(p_retailprice) over w1 as sdev_pop, 
+collect_set(p_size) over w1 as uniq_size, 
+variance(p_retailprice) over w1 as var,
+corr(p_size, p_retailprice) over w1 as cor,
+covar_pop(p_size, p_retailprice) over w1 as covarp
+from part
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
+
+-- 21. testDISTs
+select  p_mfgr,p_name, p_size, 
+histogram_numeric(p_retailprice, 5) over w1 as hist, 
+percentile(p_partkey, 0.5) over w1 as per,
+row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn
+from part
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
+
+-- 22. testViewAsTableInputWithWindowing
+create view IF NOT EXISTS mfgr_price_view as 
+select p_mfgr, p_brand, 
+round(sum(p_retailprice),2) as s 
+from part 
+group by p_mfgr, p_brand;
+        
+select * 
+from (
+select p_mfgr, p_brand, s, 
+round(sum(s) over w1 , 2)  as s1
+from mfgr_price_view 
+window w1 as (distribute by p_mfgr sort by p_mfgr )
+) sq
+order by p_mfgr, p_brand;
+
+select p_mfgr, p_brand, s, 
+round(sum(s) over w1 ,2)  as s1
+from mfgr_price_view 
+window w1 as (distribute by p_mfgr sort by p_brand rows between 2 preceding and current row);
+
+-- 23. testCreateViewWithWindowingQuery
+create view IF NOT EXISTS mfgr_brand_price_view as 
+select p_mfgr, p_brand, 
+sum(p_retailprice) over w1  as s
+from part 
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row);
+        
+select * from mfgr_brand_price_view;        
+        
+-- 24. testLateralViews
+select p_mfgr, p_name, 
+lv_col, p_size, sum(p_size) over w1   as s
+from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p 
+lateral view explode(arr) part_lv as lv_col
+window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row);        
+
+-- 25. testMultipleInserts3SWQs
+CREATE TABLE part_1( 
+p_mfgr STRING, 
+p_name STRING, 
+p_size INT, 
+r INT, 
+dr INT, 
+s DOUBLE);
+
+CREATE TABLE part_2( 
+p_mfgr STRING, 
+p_name STRING, 
+p_size INT, 
+r INT, 
+dr INT, 
+cud INT,  
+s2 DOUBLE, 
+fv1 INT);
+
+CREATE TABLE part_3( 
+p_mfgr STRING, 
+p_name STRING, 
+p_size INT, 
+c INT, 
+ca INT, 
+fv INT);
+
+from part 
+INSERT OVERWRITE TABLE part_1 
+select p_mfgr, p_name, p_size, 
+rank() over(distribute by p_mfgr sort by p_name ) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, 
+sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row)  as s
+INSERT OVERWRITE TABLE part_2 
+select  p_mfgr,p_name, p_size,  
+rank() over(distribute by p_mfgr sort by p_name) as r, 
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr, 
+cume_dist() over(distribute by p_mfgr sort by p_name) as cud, 
+round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, 
+first_value(p_size) over w1  as fv1
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) 
+INSERT OVERWRITE TABLE part_3 
+select  p_mfgr,p_name, p_size,  
+count(*) over(distribute by p_mfgr sort by p_name) as c, 
+count(p_size) over(distribute by p_mfgr sort by p_name) as ca, 
+first_value(p_size) over w1  as fv
+window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
+
+select * from part_1;
+
+select * from part_2;
+
+select * from part_3;
+
+-- 26. testGroupByHavingWithSWQAndAlias
+select p_mfgr, p_name, p_size, min(p_retailprice) as mi,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+having p_size > 0
+;
+	 
+-- 27. testMultipleRangeWindows
+select  p_mfgr,p_name, p_size, 
+sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, 
+sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following )  as s1
+from part  
+window w1 as (rows between 2 preceding and 2 following);
+
+-- 28. testPartOrderInUDAFInvoke
+select p_mfgr, p_name, p_size,
+sum(p_size) over (partition by p_mfgr  order by p_name  rows between 2 preceding and 2 following) as s
+from part;
+
+-- 29. testPartOrderInWdwDef
+select p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (partition by p_mfgr  order by p_name  rows between 2 preceding and 2 following);
+
+-- 30. testDefaultPartitioningSpecRules
+select p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s,
+sum(p_size) over w2 as s2
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following),
+       w2 as (partition by p_mfgr order by p_name);
+       
+-- 31. testWindowCrossReference
+select p_mfgr, p_name, p_size, 
+sum(p_size) over w1 as s1, 
+sum(p_size) over w2 as s2
+from part 
+window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), 
+       w2 as w1;
+       
+               
+-- 32. testWindowInheritance
+select p_mfgr, p_name, p_size, 
+sum(p_size) over w1 as s1, 
+sum(p_size) over w2 as s2 
+from part 
+window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), 
+       w2 as (w1 rows between unbounded preceding and current row); 
+
+        
+-- 33. testWindowForwardReference
+select p_mfgr, p_name, p_size, 
+sum(p_size) over w1 as s1, 
+sum(p_size) over w2 as s2,
+sum(p_size) over w3 as s3
+from part 
+window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), 
+       w2 as w3,
+       w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); 
+
+
+-- 34. testWindowDefinitionPropagation
+select p_mfgr, p_name, p_size, 
+sum(p_size) over w1 as s1, 
+sum(p_size) over w2 as s2,
+sum(p_size) over (w3 rows between 2 preceding and 2 following)  as s3
+from part 
+window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), 
+       w2 as w3,
+       w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); 
+
+-- 35. testDistinctWithWindowing
+select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
+
+-- 36. testRankWithPartitioning
+select p_mfgr, p_name, p_size, 
+rank() over (partition by p_mfgr order by p_name )  as r
+from part;    
+
+-- 37. testPartitioningVariousForms
+select p_mfgr,
+round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1,
+min(p_retailprice) over (partition by p_mfgr) as s2,
+max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3,
+round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4,
+count(p_retailprice) over (cluster by p_mfgr ) as s5
+from part;
+
+-- 38. testPartitioningVariousForms2
+select p_mfgr, p_name, p_size,
+sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row) as s1,
+min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2,
+max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3
+from part;
+
+-- 39. testUDFOnOrderCols
+select p_mfgr, p_type, substr(p_type, 2) as short_ptype,
+rank() over (partition by p_mfgr order by substr(p_type, 2))  as r
+from part;
+
+-- 40. testNoBetweenForRows
+select p_mfgr, p_name, p_size,
+    sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding) as s1
+     from part ;
+
+-- 41. testNoBetweenForRange
+select p_mfgr, p_name, p_size,
+    sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding) as s1
+     from part ;
+
+-- 42. testUnboundedFollowingForRows
+select p_mfgr, p_name, p_size,
+    sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following) as s1
+    from part ;
+
+-- 43. testUnboundedFollowingForRange
+select p_mfgr, p_name, p_size,
+    sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following) as s1
+    from part ;
+        
+-- 44. testOverNoPartitionSingleAggregate
+select p_name, p_retailprice,
+round(avg(p_retailprice) over(),2)
+from part
+order by p_name;
+
+-- 45. empty partition test
+select p_mfgr, 
+  sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) 
+from part 
+where p_mfgr = 'Manufacturer#6'
+;
+
+-- 46. window sz is same as partition sz
+select p_retailprice, avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following), 
+sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following) 
+from part 
+where p_mfgr='Manufacturer#1';
+
+-- 47. empty partition
+select sum(p_size) over (partition by p_mfgr )
+from part where p_mfgr = 'm1';

[08/50] [abbrv] hive git commit: HIVE-11623: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix the tableAlias for ReduceSink operator (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

Posted by xu...@apache.org.

HIVE-11623: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix the tableAlias for ReduceSink operator (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9e85bbf2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9e85bbf2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9e85bbf2

Branch: refs/heads/beeline-cli
Commit: 9e85bbf2780510edda79c247248da57619530577
Parents: fb152e4
Author: Pengcheng Xiong <px...@apache.org>
Authored: Thu Aug 27 11:26:25 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Thu Aug 27 11:26:46 2015 -0700

----------------------------------------------------------------------
 .../calcite/translator/HiveOpConverter.java     | 31 ++++++++++++++++----
 1 file changed, 26 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9e85bbf2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
index 4db9863..1931880 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
@@ -686,13 +686,34 @@ public class HiveOpConverter {
       int numReducers, Operation acidOperation, boolean strictMode,
       List<String> keepColNames) throws SemanticException {
     // 1. Generate RS operator
-    if (input.getSchema().getTableNames().size() != 1) {
+    // 1.1 Prune the tableNames, only count the tableNames that are not empty strings
+	// as empty string in table aliases is only allowed for virtual columns.
+    String tableAlias = null;
+    Set<String> tableNames = input.getSchema().getTableNames();
+    for (String tableName : tableNames) {
+      if (tableName != null) {
+        if (tableName.length() == 0) {
+          if (tableAlias == null) {
+            tableAlias = tableName;
+          }
+        } else {
+          if (tableAlias == null || tableAlias.length() == 0) {
+            tableAlias = tableName;
+          } else {
+            if (!tableName.equals(tableAlias)) {
+              throw new SemanticException(
+                  "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is more than one");
+            }
+          }
+        }
+      }
+    }
+    if (tableAlias == null) {
       throw new SemanticException(
-          "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one SelectOp but there is "
-              + input.getSchema().getTableNames().size());
+          "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none");
     }
-    ReduceSinkOperator rsOp = genReduceSink(input, input.getSchema().getTableNames().iterator()
-        .next(), keys, tag, partitionCols, order, numReducers, acidOperation, strictMode);
+    // 1.2 Now generate RS operator
+    ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, numReducers, acidOperation, strictMode);
 
     // 2. Generate backtrack Select operator
     Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSink(keepColNames,

[14/50] [abbrv] hive git commit: HIVE-11669: OrcFileDump service should support directories (Prasanth Jayachandran reviewed by Sergey Shelukhin)

Posted by xu...@apache.org.

HIVE-11669: OrcFileDump service should support directories (Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8e712da0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8e712da0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8e712da0

Branch: refs/heads/beeline-cli
Commit: 8e712da0d8464173e0977b61661bbd00960b08d8
Parents: 2ef40ca
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Fri Aug 28 13:13:18 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Fri Aug 28 13:13:18 2015 -0500

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/io/orc/FileDump.java  | 48 ++++++++++++++++++--
 1 file changed, 44 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8e712da0/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
index 4acb810..76ecb33 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
@@ -22,7 +22,7 @@ import java.io.OutputStreamWriter;
 import java.io.PrintStream;
 import java.text.DecimalFormat;
 import java.util.ArrayList;
-import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
@@ -32,8 +32,10 @@ import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
@@ -47,6 +49,9 @@ import org.apache.hadoop.io.LongWritable;
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONWriter;
 
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+
 /**
  * A tool for printing out the file structure of ORC files.
  */
@@ -86,23 +91,55 @@ public final class FileDump {
       System.err.println("Error : ORC files are not specified");
       return;
     }
+
+    // if the specified path is directory, iterate through all files and print the file dump
+    List<String> filesInPath = Lists.newArrayList();
+    for (String filename : files) {
+      Path path = new Path(filename);
+      filesInPath.addAll(getAllFilesInPath(path, conf));
+    }
+
     if (dumpData) {
-      printData(Arrays.asList(files), conf);
+      printData(filesInPath, conf);
     } else {
       if (jsonFormat) {
         boolean prettyPrint = cli.hasOption('p');
-        JsonFileDump.printJsonMetaData(Arrays.asList(files), conf, rowIndexCols, prettyPrint,
+        JsonFileDump.printJsonMetaData(filesInPath, conf, rowIndexCols, prettyPrint,
             printTimeZone);
       } else {
-        printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
+        printMetaData(filesInPath, conf, rowIndexCols, printTimeZone);
+      }
+    }
+  }
+
+  private static Collection<? extends String> getAllFilesInPath(final Path path,
+      final Configuration conf) throws IOException {
+    List<String> filesInPath = Lists.newArrayList();
+    FileSystem fs = path.getFileSystem(conf);
+    FileStatus fileStatus = fs.getFileStatus(path);
+    if (fileStatus.isDir()) {
+      FileStatus[] fileStatuses = fs.listStatus(path, AcidUtils.hiddenFileFilter);
+      for (FileStatus fileInPath : fileStatuses) {
+        if (fileInPath.isDir()) {
+          filesInPath.addAll(getAllFilesInPath(fileInPath.getPath(), conf));
+        } else {
+          filesInPath.add(fileInPath.getPath().toString());
+        }
       }
+    } else {
+      filesInPath.add(path.toString());
     }
+
+    return filesInPath;
   }
 
   private static void printData(List<String> files, Configuration conf) throws IOException,
       JSONException {
     for (String file : files) {
       printJsonData(conf, file);
+      if (files.size() > 1) {
+        System.out.println(Strings.repeat("=", 80) + "\n");
+      }
     }
   }
 
@@ -204,6 +241,9 @@ public final class FileDump {
       System.out.println("Padding length: " + paddedBytes + " bytes");
       System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
       rows.close();
+      if (files.size() > 1) {
+        System.out.println(Strings.repeat("=", 80) + "\n");
+      }
     }
   }

[38/50] [abbrv] hive git commit: HIVE-11383: Upgrade Hive to Calcite 1.4 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Posted by xu...@apache.org.

HIVE-11383: Upgrade Hive to Calcite 1.4 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c40382d4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c40382d4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c40382d4

Branch: refs/heads/beeline-cli
Commit: c40382d4c3bd3f0b0c0f1b09affb7d03198f47ba
Parents: 492c8b1
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Sep 3 11:48:07 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Sep 3 11:48:07 2015 +0200

----------------------------------------------------------------------
 pom.xml                                         |   2 +-
 .../ql/optimizer/calcite/HiveRelOptUtil.java    |  23 --
 .../calcite/reloperators/HiveSort.java          |  29 +-
 .../rules/HiveJoinProjectTransposeRule.java     | 238 ++--------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |   5 +-
 .../clientpositive/constprog_partitioner.q.out  |  30 +-
 .../clientpositive/correlationoptimizer10.q.out |  48 ++--
 .../spark/constprog_partitioner.q.out           |  30 +-
 .../clientpositive/spark/subquery_exists.q.out  |  12 +-
 .../clientpositive/spark/subquery_in.q.out      |  36 +--
 .../spark/vector_mapjoin_reduce.q.out           |  22 +-
 .../clientpositive/subquery_exists.q.out        |  12 +-
 .../results/clientpositive/subquery_in.q.out    |  36 +--
 .../clientpositive/subquery_in_having.q.out     |  50 ++--
 .../subquery_unqualcolumnrefs.q.out             |  26 +-
 .../results/clientpositive/subquery_views.q.out |  40 +--
 .../clientpositive/tez/explainuser_1.q.out      | 278 +++++++++----------
 .../clientpositive/tez/subquery_exists.q.out    |  12 +-
 .../clientpositive/tez/subquery_in.q.out        |  36 +--
 .../clientpositive/tez/vector_inner_join.q.out  |  14 +-
 .../tez/vector_mapjoin_reduce.q.out             |  24 +-
 .../clientpositive/vector_inner_join.q.out      |  12 +-
 .../clientpositive/vector_mapjoin_reduce.q.out  |  26 +-
 23 files changed, 431 insertions(+), 610 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 15c2805..b55e86a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -103,7 +103,7 @@
     <antlr.version>3.4</antlr.version>
     <avro.version>1.7.7</avro.version>
     <bonecp.version>0.8.0.RELEASE</bonecp.version>
-    <calcite.version>1.3.0-incubating</calcite.version>
+    <calcite.version>1.4.0-incubating</calcite.version>
     <datanucleus-api-jdo.version>3.2.6</datanucleus-api-jdo.version>
     <datanucleus-core.version>3.2.10</datanucleus-core.version>
     <datanucleus-rdbms.version>3.2.9</datanucleus-rdbms.version>

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
index 5a5954d..0e282b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
@@ -278,29 +278,6 @@ public class HiveRelOptUtil extends RelOptUtil {
     }
   }
 
-  private static SqlOperator op(SqlKind kind, SqlOperator operator) {
-    switch (kind) {
-    case EQUALS:
-      return SqlStdOperatorTable.EQUALS;
-    case NOT_EQUALS:
-      return SqlStdOperatorTable.NOT_EQUALS;
-    case GREATER_THAN:
-      return SqlStdOperatorTable.GREATER_THAN;
-    case GREATER_THAN_OR_EQUAL:
-      return SqlStdOperatorTable.GREATER_THAN_OR_EQUAL;
-    case LESS_THAN:
-      return SqlStdOperatorTable.LESS_THAN;
-    case LESS_THAN_OR_EQUAL:
-      return SqlStdOperatorTable.LESS_THAN_OR_EQUAL;
-    case IS_DISTINCT_FROM:
-      return SqlStdOperatorTable.IS_DISTINCT_FROM;
-    case IS_NOT_DISTINCT_FROM:
-      return SqlStdOperatorTable.IS_NOT_DISTINCT_FROM;
-    default:
-      return operator;
-    }
-  }
-
   private static void addJoinKey(
       List<RexNode> joinKeyList,
       RexNode key,

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
index 18d2838..1df6542 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
@@ -22,6 +22,7 @@ import java.util.Map;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelTraitSet;
 import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.RelFactories;
 import org.apache.calcite.rel.core.Sort;
@@ -49,6 +50,24 @@ public class HiveSort extends Sort implements HiveRelNode {
         offset, fetch);
   }
 
+  /**
+   * Creates a HiveSort.
+   *
+   * @param input     Input relational expression
+   * @param collation array of sort specifications
+   * @param offset    Expression for number of rows to discard before returning
+   *                  first row
+   * @param fetch     Expression for number of rows to fetch
+   */
+  public static HiveSort create(RelNode input, RelCollation collation,
+      RexNode offset, RexNode fetch) {
+    RelOptCluster cluster = input.getCluster();
+    collation = RelCollationTraitDef.INSTANCE.canonize(collation);
+    RelTraitSet traitSet =
+        TraitsUtil.getSortTraitSet(cluster, input.getTraitSet(), collation);
+    return new HiveSort(cluster, traitSet, input, collation, offset, fetch);
+  }
+
   @Override
   public HiveSort copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation,
       RexNode offset, RexNode fetch) {
@@ -77,9 +96,15 @@ public class HiveSort extends Sort implements HiveRelNode {
   private static class HiveSortRelFactory implements RelFactories.SortFactory {
 
     @Override
-    public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation,
+    public RelNode createSort(RelTraitSet traits, RelNode input, RelCollation collation,
         RexNode offset, RexNode fetch) {
-      return new HiveSort(child.getCluster(), traits, child, collation, offset, fetch);
+      return createSort(input, collation, offset, fetch);
+    }
+
+    @Override
+    public RelNode createSort(RelNode input, RelCollation collation, RexNode offset,
+        RexNode fetch) {
+      return create(input, collation, offset, fetch);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
index fd8f5cb..ac72ee5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
@@ -49,14 +49,14 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule {
               operand(HiveProject.class, any()),
               operand(HiveProject.class, any())),
           "JoinProjectTransposeRule(Project-Project)",
-          HiveProject.DEFAULT_PROJECT_FACTORY);
+          false, HiveProject.DEFAULT_PROJECT_FACTORY);
 
   public static final HiveJoinProjectTransposeRule LEFT_PROJECT =
       new HiveJoinProjectTransposeRule(
           operand(HiveJoin.class,
               some(operand(HiveProject.class, any()))),
           "JoinProjectTransposeRule(Project-Other)",
-          HiveProject.DEFAULT_PROJECT_FACTORY);
+          false, HiveProject.DEFAULT_PROJECT_FACTORY);
 
   public static final HiveJoinProjectTransposeRule RIGHT_PROJECT =
       new HiveJoinProjectTransposeRule(
@@ -65,219 +65,37 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule {
               operand(RelNode.class, any()),
               operand(HiveProject.class, any())),
           "JoinProjectTransposeRule(Other-Project)",
-          HiveProject.DEFAULT_PROJECT_FACTORY);
+          false, HiveProject.DEFAULT_PROJECT_FACTORY);
 
-  private final ProjectFactory projectFactory;
-
-
-  private HiveJoinProjectTransposeRule(
-      RelOptRuleOperand operand,
-      String description, ProjectFactory pFactory) {
-    super(operand, description, pFactory);
-    this.projectFactory = pFactory;
-  }
-
-  @Override
-  public void onMatch(RelOptRuleCall call) {
-    Join joinRel = call.rel(0);
-    JoinRelType joinType = joinRel.getJoinType();
-
-    Project leftProj;
-    Project rightProj;
-    RelNode leftJoinChild;
-    RelNode rightJoinChild;
-
-    // see if at least one input's projection doesn't generate nulls
-    if (hasLeftChild(call)) {
-      leftProj = call.rel(1);
-      leftJoinChild = getProjectChild(call, leftProj, true);
-    } else {
-      leftProj = null;
-      leftJoinChild = call.rel(1);
-    }
-    if (hasRightChild(call)) {
-      rightProj = getRightChild(call);
-      rightJoinChild = getProjectChild(call, rightProj, false);
-    } else {
-      rightProj = null;
-      rightJoinChild = joinRel.getRight();
-    }
-    if ((leftProj == null) && (rightProj == null)) {
-      return;
-    }
-
-    // Construct two RexPrograms and combine them.  The bottom program
-    // is a join of the projection expressions from the left and/or
-    // right projects that feed into the join.  The top program contains
-    // the join condition.
-
-    // Create a row type representing a concatenation of the inputs
-    // underneath the projects that feed into the join.  This is the input
-    // into the bottom RexProgram.  Note that the join type is an inner
-    // join because the inputs haven't actually been joined yet.
-    RelDataType joinChildrenRowType =
-        Join.deriveJoinRowType(
-            leftJoinChild.getRowType(),
-            rightJoinChild.getRowType(),
-            JoinRelType.INNER,
-            joinRel.getCluster().getTypeFactory(),
-            null,
-            Collections.<RelDataTypeField>emptyList());
-
-    // Create projection expressions, combining the projection expressions
-    // from the projects that feed into the join.  For the RHS projection
-    // expressions, shift them to the right by the number of fields on
-    // the LHS.  If the join input was not a projection, simply create
-    // references to the inputs.
-    int nProjExprs = joinRel.getRowType().getFieldCount();
-    List<Pair<RexNode, String>> projects =
-        new ArrayList<Pair<RexNode, String>>();
-    RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
-
-    createProjectExprs(
-        leftProj,
-        leftJoinChild,
-        0,
-        rexBuilder,
-        joinChildrenRowType.getFieldList(),
-        projects);
-
-    List<RelDataTypeField> leftFields =
-        leftJoinChild.getRowType().getFieldList();
-    int nFieldsLeft = leftFields.size();
-    createProjectExprs(
-        rightProj,
-        rightJoinChild,
-        nFieldsLeft,
-        rexBuilder,
-        joinChildrenRowType.getFieldList(),
-        projects);
-
-    List<RelDataType> projTypes = new ArrayList<RelDataType>();
-    for (int i = 0; i < nProjExprs; i++) {
-      projTypes.add(projects.get(i).left.getType());
-    }
-    RelDataType projRowType =
-        rexBuilder.getTypeFactory().createStructType(
-            projTypes,
-            Pair.right(projects));
-
-    // create the RexPrograms and merge them
-    RexProgram bottomProgram =
-        RexProgram.create(
-            joinChildrenRowType,
-            Pair.left(projects),
-            null,
-            projRowType,
-            rexBuilder);
-    RexProgramBuilder topProgramBuilder =
-        new RexProgramBuilder(
-            projRowType,
-            rexBuilder);
-    topProgramBuilder.addIdentity();
-    topProgramBuilder.addCondition(joinRel.getCondition());
-    RexProgram topProgram = topProgramBuilder.getProgram();
-    RexProgram mergedProgram =
-        RexProgramBuilder.mergePrograms(
-            topProgram,
-            bottomProgram,
-            rexBuilder);
+  public static final HiveJoinProjectTransposeRule BOTH_PROJECT_INCLUDE_OUTER =
+      new HiveJoinProjectTransposeRule(
+          operand(HiveJoin.class,
+              operand(HiveProject.class, any()),
+              operand(HiveProject.class, any())),
+          "Join(IncludingOuter)ProjectTransposeRule(Project-Project)",
+          true, HiveProject.DEFAULT_PROJECT_FACTORY);
 
-    // expand out the join condition and construct a new LogicalJoin that
-    // directly references the join children without the intervening
-    // ProjectRels
-    RexNode newCondition =
-        mergedProgram.expandLocalRef(
-            mergedProgram.getCondition());
-    Join newJoinRel =
-        joinRel.copy(joinRel.getTraitSet(), newCondition,
-            leftJoinChild, rightJoinChild, joinRel.getJoinType(),
-            joinRel.isSemiJoinDone());
+  public static final HiveJoinProjectTransposeRule LEFT_PROJECT_INCLUDE_OUTER =
+      new HiveJoinProjectTransposeRule(
+          operand(HiveJoin.class,
+              some(operand(HiveProject.class, any()))),
+          "Join(IncludingOuter)ProjectTransposeRule(Project-Other)",
+          true, HiveProject.DEFAULT_PROJECT_FACTORY);
 
-    // expand out the new projection expressions; if the join is an
-    // outer join, modify the expressions to reference the join output
-    List<RexNode> newProjExprs = new ArrayList<RexNode>();
-    List<RexLocalRef> projList = mergedProgram.getProjectList();
-    List<RelDataTypeField> newJoinFields =
-        newJoinRel.getRowType().getFieldList();
-    int nJoinFields = newJoinFields.size();
-    int[] adjustments = new int[nJoinFields];
-    for (int i = 0; i < nProjExprs; i++) {
-      RexNode newExpr = mergedProgram.expandLocalRef(projList.get(i));
-      if (joinType != JoinRelType.INNER) {
-        newExpr =
-            newExpr.accept(
-                new RelOptUtil.RexInputConverter(
-                    rexBuilder,
-                    joinChildrenRowType.getFieldList(),
-                    newJoinFields,
-                    adjustments));
-      }
-      newProjExprs.add(newExpr);
-    }
+  public static final HiveJoinProjectTransposeRule RIGHT_PROJECT_INCLUDE_OUTER =
+      new HiveJoinProjectTransposeRule(
+          operand(
+              HiveJoin.class,
+              operand(RelNode.class, any()),
+              operand(HiveProject.class, any())),
+          "Join(IncludingOuter)ProjectTransposeRule(Other-Project)",
+          true, HiveProject.DEFAULT_PROJECT_FACTORY);
 
-    // finally, create the projection on top of the join
-    RelNode newProjRel = projectFactory.createProject(newJoinRel, newProjExprs,
-        joinRel.getRowType().getFieldNames());
 
-    call.transformTo(newProjRel);
+  private HiveJoinProjectTransposeRule(
+      RelOptRuleOperand operand, String description,
+      boolean includeOuter, ProjectFactory projectFactory) {
+    super(operand, description, includeOuter, projectFactory);
   }
 
-  /**
-   * Creates projection expressions corresponding to one of the inputs into
-   * the join
-   *
-   * @param projRel            the projection input into the join (if it exists)
-   * @param joinChild          the child of the projection input (if there is a
-   *                           projection); otherwise, this is the join input
-   * @param adjustmentAmount   the amount the expressions need to be shifted by
-   * @param rexBuilder         rex builder
-   * @param joinChildrenFields concatenation of the fields from the left and
-   *                           right join inputs (once the projections have been
-   *                           removed)
-   * @param projects           Projection expressions &amp; names to be created
-   */
-  private void createProjectExprs(
-      Project projRel,
-      RelNode joinChild,
-      int adjustmentAmount,
-      RexBuilder rexBuilder,
-      List<RelDataTypeField> joinChildrenFields,
-      List<Pair<RexNode, String>> projects) {
-    List<RelDataTypeField> childFields =
-        joinChild.getRowType().getFieldList();
-    if (projRel != null) {
-      List<Pair<RexNode, String>> namedProjects =
-          projRel.getNamedProjects();
-      int nChildFields = childFields.size();
-      int[] adjustments = new int[nChildFields];
-      for (int i = 0; i < nChildFields; i++) {
-        adjustments[i] = adjustmentAmount;
-      }
-      for (Pair<RexNode, String> pair : namedProjects) {
-        RexNode e = pair.left;
-        if (adjustmentAmount != 0) {
-          // shift the references by the adjustment amount
-          e = e.accept(
-              new RelOptUtil.RexInputConverter(
-                  rexBuilder,
-                  childFields,
-                  joinChildrenFields,
-                  adjustments));
-        }
-        projects.add(Pair.of(e, pair.right));
-      }
-    } else {
-      // no projection; just create references to the inputs
-      for (int i = 0; i < childFields.size(); i++) {
-        final RelDataTypeField field = childFields.get(i);
-        projects.add(
-            Pair.of(
-                (RexNode) rexBuilder.makeInputRef(
-                    field.getType(),
-                    i + adjustmentAmount),
-                field.getName()));
-      }
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index f26d1df..73ae7c4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -914,8 +914,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
         // 6.1. Merge join into multijoin operators (if possible)
         calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(),
-                HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT,
-                HiveJoinProjectTransposeRule.LEFT_PROJECT, HiveJoinProjectTransposeRule.RIGHT_PROJECT,
+                HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER,
+                HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER,
+                HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER,
                 HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE);
         // The previous rules can pull up projections through join operators,
         // thus we run the field trimmer again to push them back down

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
index 6475fa7..e22a4a9 100644
--- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
@@ -111,42 +111,42 @@ STAGE PLANS:
               predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
               Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
-                outputColumnNames: _col0, _col1, _col2, _col3
+                expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+                outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: int), _col3 (type: int)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: int), _col2 (type: int)
           TableScan
             alias: li
             Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+              predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
               Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: l_orderkey (type: int), l_linenumber (type: int)
-                outputColumnNames: _col0, _col1
+                expressions: l_orderkey (type: int)
+                outputColumnNames: _col0
                 Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
-                  keys: _col0 (type: int), _col1 (type: int)
+                  keys: _col0 (type: int)
                   mode: hash
-                  outputColumnNames: _col0, _col1
+                  outputColumnNames: _col0
                   Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
-                    key expressions: _col0 (type: int), _col1 (type: int)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
                     Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
                Left Semi Join 0 to 1
           keys:
-            0 _col0 (type: int), _col3 (type: int)
-            1 _col0 (type: int), _col1 (type: int)
+            0 _col0 (type: int)
+            1 _col0 (type: int)
           outputColumnNames: _col1, _col2
           Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
           Select Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
index 14a6ade..90fb6f4 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
@@ -474,17 +474,17 @@ STAGE PLANS:
             alias: xx
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 20.0)) and key is not null) (type: boolean)
+              Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
           TableScan
             Reduce Output Operator
@@ -581,17 +581,17 @@ STAGE PLANS:
             alias: xx
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 20.0)) and key is not null) (type: boolean)
+              Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
           TableScan
             alias: x
@@ -625,9 +625,9 @@ STAGE PLANS:
                   Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Demux Operator
-          Statistics: Num rows: 69 Data size: 693 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 57 Data size: 601 Basic stats: COMPLETE Column stats: NONE
           Mux Operator
-            Statistics: Num rows: 69 Data size: 693 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 57 Data size: 601 Basic stats: COMPLETE Column stats: NONE
             Join Operator
               condition map:
                    Left Semi Join 0 to 1
@@ -652,7 +652,7 @@ STAGE PLANS:
             outputColumnNames: _col0
             Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
             Mux Operator
-              Statistics: Num rows: 69 Data size: 693 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 57 Data size: 601 Basic stats: COMPLETE Column stats: NONE
               Join Operator
                 condition map:
                      Left Semi Join 0 to 1
@@ -802,17 +802,17 @@ STAGE PLANS:
             alias: xx
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 180.0)) and key is not null) (type: boolean)
+              Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
           TableScan
             Reduce Output Operator
@@ -828,10 +828,10 @@ STAGE PLANS:
             0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -921,17 +921,17 @@ STAGE PLANS:
             alias: xx
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 180.0)) and key is not null) (type: boolean)
+              Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
           TableScan
             alias: xx
@@ -965,9 +965,9 @@ STAGE PLANS:
                   Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Demux Operator
-          Statistics: Num rows: 306 Data size: 3250 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 84 Data size: 891 Basic stats: COMPLETE Column stats: NONE
           Mux Operator
-            Statistics: Num rows: 306 Data size: 3250 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 84 Data size: 891 Basic stats: COMPLETE Column stats: NONE
             Join Operator
               condition map:
                    Left Semi Join 0 to 1
@@ -992,7 +992,7 @@ STAGE PLANS:
             outputColumnNames: _col0
             Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
             Mux Operator
-              Statistics: Num rows: 306 Data size: 3250 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 84 Data size: 891 Basic stats: COMPLETE Column stats: NONE
               Join Operator
                 condition map:
                      Left Semi Join 0 to 1

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
index 665bfce..3a3a751 100644
--- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
@@ -124,13 +124,13 @@ STAGE PLANS:
                     predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
                     Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
-                      outputColumnNames: _col0, _col1, _col2, _col3
+                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+                      outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int), _col3 (type: int)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int)
         Map 3 
@@ -139,21 +139,21 @@ STAGE PLANS:
                   alias: li
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+                    predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
                     Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: l_orderkey (type: int), l_linenumber (type: int)
-                      outputColumnNames: _col0, _col1
+                      expressions: l_orderkey (type: int)
+                      outputColumnNames: _col0
                       Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
-                        keys: _col0 (type: int), _col1 (type: int)
+                        keys: _col0 (type: int)
                         mode: hash
-                        outputColumnNames: _col0, _col1
+                        outputColumnNames: _col0
                         Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
-                          key expressions: _col0 (type: int), _col1 (type: int)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
                           Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
@@ -161,8 +161,8 @@ STAGE PLANS:
                 condition map:
                      Left Semi Join 0 to 1
                 keys:
-                  0 _col0 (type: int), _col3 (type: int)
-                  1 _col0 (type: int), _col1 (type: int)
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
                 outputColumnNames: _col1, _col2
                 Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
                 Select Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
index 28eda26..44f467b 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
@@ -41,17 +41,17 @@ STAGE PLANS:
                   alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (value is not null and key is not null) (type: boolean)
-                    Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((value > 'val_9') and key is not null) (type: boolean)
+                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col1 (type: string), _col0 (type: string)
                         sort order: ++
                         Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
-                        Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -83,10 +83,10 @@ STAGE PLANS:
                   0 _col1 (type: string), _col0 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
index 00b3399..bfcdaa8 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
@@ -31,17 +31,17 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (key > '9') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
         Map 3 
             Map Operator Tree:
@@ -74,10 +74,10 @@ STAGE PLANS:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -149,17 +149,17 @@ STAGE PLANS:
                   alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (key is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((key > '9') and value is not null) (type: boolean)
+                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string), _col1 (type: string)
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                        Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -191,10 +191,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -627,17 +627,17 @@ STAGE PLANS:
                   alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (key is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((key > '9') and value is not null) (type: boolean)
+                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string), _col1 (type: string)
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                        Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -669,10 +669,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
index a2dd910..be39d0d 100644
--- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
@@ -205,21 +205,21 @@ STAGE PLANS:
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+                    predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
                     Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: l_orderkey (type: int), l_linenumber (type: int)
-                      outputColumnNames: _col0, _col1
+                      expressions: l_orderkey (type: int)
+                      outputColumnNames: _col0
                       Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
-                        keys: _col0 (type: int), _col1 (type: int)
+                        keys: _col0 (type: int)
                         mode: hash
-                        outputColumnNames: _col0, _col1
+                        outputColumnNames: _col0
                         Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                         Spark HashTable Sink Operator
                           keys:
-                            0 _col0 (type: int), _col3 (type: int)
-                            1 _col0 (type: int), _col1 (type: int)
+                            0 _col0 (type: int)
+                            1 _col0 (type: int)
             Local Work:
               Map Reduce Local Work
 
@@ -236,15 +236,15 @@ STAGE PLANS:
                     predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
                     Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
-                      outputColumnNames: _col0, _col1, _col2, _col3
+                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+                      outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Left Semi Join 0 to 1
                         keys:
-                          0 _col0 (type: int), _col3 (type: int)
-                          1 _col0 (type: int), _col1 (type: int)
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
                         outputColumnNames: _col1, _col2
                         input vertices:
                           1 Map 2

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out
index 776b145..ace5efe 100644
--- a/ql/src/test/results/clientpositive/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/subquery_exists.q.out
@@ -36,17 +36,17 @@ STAGE PLANS:
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (value is not null and key is not null) (type: boolean)
-              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((value > 'val_9') and key is not null) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col1 (type: string), _col0 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
-                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -75,10 +75,10 @@ STAGE PLANS:
             0 _col1 (type: string), _col0 (type: string)
             1 _col0 (type: string), _col1 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_in.q.out b/ql/src/test/results/clientpositive/subquery_in.q.out
index 7c53638..f82c799 100644
--- a/ql/src/test/results/clientpositive/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/subquery_in.q.out
@@ -26,17 +26,17 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              predicate: (key > '9') (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
           TableScan
             alias: src
@@ -66,10 +66,10 @@ STAGE PLANS:
             0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -136,17 +136,17 @@ STAGE PLANS:
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (key is not null and value is not null) (type: boolean)
-              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((key > '9') and value is not null) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -175,10 +175,10 @@ STAGE PLANS:
             0 _col0 (type: string), _col1 (type: string)
             1 _col0 (type: string), _col1 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -671,17 +671,17 @@ STAGE PLANS:
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (key is not null and value is not null) (type: boolean)
-              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((key > '9') and value is not null) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
           TableScan
             Reduce Output Operator
               key expressions: _col0 (type: string), _col1 (type: string)
@@ -696,10 +696,10 @@ STAGE PLANS:
             0 _col0 (type: string), _col1 (type: string)
             1 _col0 (type: string), _col1 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_in_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out
index 8cabfa7..da1da06 100644
--- a/ql/src/test/results/clientpositive/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out
@@ -782,17 +782,17 @@ STAGE PLANS:
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              predicate: (key > '8') (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
           TableScan
             alias: b
@@ -822,13 +822,13 @@ STAGE PLANS:
             0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
           Group By Operator
             aggregations: count()
             keys: _col0 (type: string), _col1 (type: string)
             mode: hash
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
               table:
@@ -844,7 +844,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string)
               sort order: ++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -852,10 +852,10 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
             predicate: _col2 is not null (type: boolean)
-            Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
               table:
@@ -871,7 +871,7 @@ STAGE PLANS:
               key expressions: _col2 (type: bigint)
               sort order: +
               Map-reduce partition columns: _col2 (type: bigint)
-              Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col0 (type: string), _col1 (type: string)
           TableScan
             Reduce Output Operator
@@ -887,10 +887,10 @@ STAGE PLANS:
             0 _col2 (type: bigint)
             1 _col0 (type: bigint)
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1079,10 +1079,10 @@ STAGE PLANS:
                 0 _col2 (type: bigint)
                 1 _col0 (type: bigint)
               outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1098,7 +1098,7 @@ STAGE PLANS:
               key expressions: _col2 (type: bigint)
               sort order: +
               Map-reduce partition columns: _col2 (type: bigint)
-              Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col0 (type: string), _col1 (type: string)
           TableScan
             Reduce Output Operator
@@ -1114,10 +1114,10 @@ STAGE PLANS:
             0 _col2 (type: bigint)
             1 _col0 (type: bigint)
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1158,12 +1158,12 @@ STAGE PLANS:
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              predicate: (key > '8') (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Map Join Operator
                   condition map:
                        Left Semi Join 0 to 1
@@ -1171,18 +1171,18 @@ STAGE PLANS:
                     0 _col0 (type: string)
                     1 _col0 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
                     keys: _col0 (type: string), _col1 (type: string)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: string), _col1 (type: string)
                       sort order: ++
                       Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                      Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col2 (type: bigint)
       Local Work:
         Map Reduce Local Work
@@ -1192,10 +1192,10 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
             predicate: _col2 is not null (type: boolean)
-            Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
               table:

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
index 5d6d4a8..76d7503 100644
--- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
+++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
@@ -52,7 +52,7 @@ STAGE PLANS:
             alias: src11
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: (key1 is not null and value1 is not null) (type: boolean)
+              predicate: ((key1 > '9') and value1 is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: key1 (type: string), value1 (type: string)
@@ -122,17 +122,17 @@ STAGE PLANS:
             alias: a
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (key is not null and value is not null) (type: boolean)
-              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((key > '9') and value is not null) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
           TableScan
             alias: a
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -161,10 +161,10 @@ STAGE PLANS:
             0 _col0 (type: string), _col1 (type: string)
             1 _col0 (type: string), _col1 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -588,17 +588,17 @@ STAGE PLANS:
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (key is not null and value is not null) (type: boolean)
-              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((key > '9') and value is not null) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
           TableScan
             Reduce Output Operator
               key expressions: _col0 (type: string), _col1 (type: string)
@@ -613,10 +613,10 @@ STAGE PLANS:
             0 _col0 (type: string), _col1 (type: string)
             1 _col0 (type: string), _col1 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_views.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_views.q.out b/ql/src/test/results/clientpositive/subquery_views.q.out
index 41834a3..c59d86e 100644
--- a/ql/src/test/results/clientpositive/subquery_views.q.out
+++ b/ql/src/test/results/clientpositive/subquery_views.q.out
@@ -70,7 +70,7 @@ POSTHOOK: Input: default@src
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@cv2
 Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
-Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
 PREHOOK: query: explain
 select * 
 from cv2 where cv2.key in (select key from cv2 c where c.key < '11')
@@ -97,7 +97,7 @@ STAGE PLANS:
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean)
+              predicate: ((value > 'val_11') and (key is null or value is null or key is null)) (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -135,15 +135,15 @@ STAGE PLANS:
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              predicate: (key < '11') (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   sort order: 
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col0 (type: string), _col1 (type: string)
           TableScan
             Reduce Output Operator
@@ -157,7 +157,7 @@ STAGE PLANS:
             0 
             1 
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -173,22 +173,22 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string)
-              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((value > 'val_11') and key is not null) (type: boolean)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((value > 'val_11') and (key < '11')) and key is not null) (type: boolean)
+              Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string), key (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -197,14 +197,14 @@ STAGE PLANS:
             0 _col0 (type: string), _col1 (type: string), _col0 (type: string)
             1 _col0 (type: string), _col1 (type: string), _col2 (type: string)
           outputColumnNames: _col0, _col1, _col3
-          Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
             predicate: _col3 is null (type: boolean)
-            Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: _col0 (type: string), _col1 (type: string)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
                 table:
@@ -220,7 +220,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string)
               sort order: +
               Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col1 (type: string)
           TableScan
             Reduce Output Operator
@@ -236,10 +236,10 @@ STAGE PLANS:
             0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 166 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 110 Data size: 1172 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 166 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 110 Data size: 1172 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -252,7 +252,7 @@ STAGE PLANS:
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean)
+              predicate: ((value > 'val_11') and (key is null or value is null or key is null)) (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -379,7 +379,7 @@ STAGE PLANS:
         ListSink
 
 Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
-Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
 PREHOOK: query: select * 
 from cv2 where cv2.key in (select key from cv2 c where c.key < '11')
 PREHOOK: type: QUERY

[02/50] [abbrv] hive git commit: HIVE-11472: ORC StringDirectTreeReader is thrashing the GC due to byte[] allocation per row (Gopal V, reviewed by Ashutosh Chauhan)

Posted by xu...@apache.org.

HIVE-11472: ORC StringDirectTreeReader is thrashing the GC due to byte[] allocation per row (Gopal V, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bb7153f9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bb7153f9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bb7153f9

Branch: refs/heads/beeline-cli
Commit: bb7153f9b1ee2d7e067341d252667edac593e15e
Parents: 3e63fc4
Author: Gopal V <go...@apache.org>
Authored: Tue Aug 25 14:19:36 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Tue Aug 25 14:23:02 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/io/orc/TreeReaderFactory.java       | 18 ++++------
 .../apache/hadoop/hive/shims/Hadoop23Shims.java | 38 ++++++++++++++++++++
 .../apache/hadoop/hive/shims/HadoopShims.java   | 22 ++++++++++++
 .../hadoop/hive/shims/HadoopShimsSecure.java    | 32 +++++++++++++++++
 4 files changed, 99 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bb7153f9/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
index 9bfe268..6d47532 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
@@ -47,6 +47,8 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.hive.shims.HadoopShims.TextReaderShim;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
@@ -1486,6 +1488,7 @@ public class TreeReaderFactory {
    */
   protected static class StringDirectTreeReader extends TreeReader {
     protected InStream stream;
+    protected TextReaderShim data;
     protected IntegerReader lengths;
     private final LongColumnVector scratchlcv;
 
@@ -1500,6 +1503,7 @@ public class TreeReaderFactory {
       this.stream = data;
       if (length != null && encoding != null) {
         this.lengths = createIntegerReader(encoding, length, false, false);
+        this.data = ShimLoader.getHadoopShims().getTextReaderShim(this.stream);
       }
     }
 
@@ -1520,6 +1524,7 @@ public class TreeReaderFactory {
       StreamName name = new StreamName(columnId,
           OrcProto.Stream.Kind.DATA);
       stream = streams.get(name);
+      data = ShimLoader.getHadoopShims().getTextReaderShim(this.stream);
       lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
           streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
           false, false);
@@ -1534,6 +1539,7 @@ public class TreeReaderFactory {
     public void seek(PositionProvider index) throws IOException {
       super.seek(index);
       stream.seek(index);
+      // don't seek data stream
       lengths.seek(index);
     }
 
@@ -1548,17 +1554,7 @@ public class TreeReaderFactory {
           result = (Text) previous;
         }
         int len = (int) lengths.next();
-        int offset = 0;
-        byte[] bytes = new byte[len];
-        while (len > 0) {
-          int written = stream.read(bytes, offset, len);
-          if (written < 0) {
-            throw new EOFException("Can't finish byte read from " + stream);
-          }
-          len -= written;
-          offset += written;
-        }
-        result.set(bytes);
+        data.read(result, len);
       }
       return result;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/bb7153f9/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 29d0f13..3292cb3 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -17,8 +17,10 @@
  */
 package org.apache.hadoop.hive.shims;
 
+import java.io.DataInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStream;
 import java.lang.reflect.Method;
 import java.net.InetSocketAddress;
 import java.net.MalformedURLException;
@@ -68,7 +70,9 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
 import org.apache.hadoop.hdfs.client.HdfsAdmin;
 import org.apache.hadoop.hdfs.protocol.EncryptionZone;
+import org.apache.hadoop.hive.shims.HadoopShims.TextReaderShim;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.ClusterStatus;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
@@ -110,10 +114,12 @@ public class Hadoop23Shims extends HadoopShimsSecure {
   HadoopShims.MiniDFSShim cluster = null;
   final boolean zeroCopy;
   final boolean storagePolicy;
+  final boolean fastread;
 
   public Hadoop23Shims() {
     boolean zcr = false;
     boolean storage = false;
+    boolean fastread = false;
     try {
       Class.forName("org.apache.hadoop.fs.CacheFlag", false,
           ShimLoader.class.getClassLoader());
@@ -130,8 +136,18 @@ public class Hadoop23Shims extends HadoopShimsSecure {
       } catch (ClassNotFoundException ce) {
       }
     }
+
+    if (storage) {
+      for (Method m : Text.class.getMethods()) {
+        if ("readWithKnownLength".equals(m.getName())) {
+          fastread = true;
+        }
+      }
+    }
+
     this.storagePolicy = storage;
     this.zeroCopy = zcr;
+    this.fastread = fastread;
   }
 
   @Override
@@ -1409,4 +1425,26 @@ public class Hadoop23Shims extends HadoopShimsSecure {
   public long getFileId(FileSystem fs, String path) throws IOException {
     return ensureDfs(fs).getClient().getFileInfo(path).getFileId();
   }
+
+  private final class FastTextReaderShim implements TextReaderShim {
+    private final DataInputStream din;
+
+    public FastTextReaderShim(InputStream in) {
+      this.din = new DataInputStream(in);
+    }
+
+    @Override
+    public void read(Text txt, int len) throws IOException {
+      txt.readWithKnownLength(din, len);
+    }
+  }
+
+  @Override
+  public TextReaderShim getTextReaderShim(InputStream in) throws IOException {
+    if (!fastread) {
+      return super.getTextReaderShim(in);
+    }
+    return new FastTextReaderShim(in);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/bb7153f9/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
----------------------------------------------------------------------
diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
index 2b6f322..6e2dedb 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hive.shims;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.InetSocketAddress;
 import java.net.MalformedURLException;
 import java.net.URI;
@@ -49,6 +50,7 @@ import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.ClusterStatus;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.JobProfile;
@@ -746,4 +748,24 @@ public interface HadoopShims {
    * @return inode ID of the file.
    */
   long getFileId(FileSystem fs, String path) throws IOException;
+
+  /**
+   * Read data into a Text object in the fastest way possible
+   */
+  public interface TextReaderShim {
+    /**
+     * @param txt
+     * @param len
+     * @return bytes read
+     * @throws IOException
+     */
+    void read(Text txt, int size) throws IOException;
+  }
+
+  /**
+   * Wrap a TextReaderShim around an input stream. The reader shim will not
+   * buffer any reads from the underlying stream and will only consume bytes
+   * which are required for TextReaderShim.read() input.
+   */
+  public TextReaderShim getTextReaderShim(InputStream input) throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/bb7153f9/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
----------------------------------------------------------------------
diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
index 89d7798..c6b7c9d 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
@@ -19,7 +19,9 @@ package org.apache.hadoop.hive.shims;
 
 import java.io.DataInput;
 import java.io.DataOutput;
+import java.io.EOFException;
 import java.io.IOException;
+import java.io.InputStream;
 import java.lang.reflect.Constructor;
 import java.net.URI;
 import java.security.AccessControlException;
@@ -40,6 +42,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.ClusterStatus;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.InputSplit;
@@ -392,4 +395,33 @@ public abstract class HadoopShimsSecure implements HadoopShims {
 
   @Override
   abstract public void addDelegationTokens(FileSystem fs, Credentials cred, String uname) throws IOException;
+
+  private final class BasicTextReaderShim implements TextReaderShim {
+    private final InputStream in;
+
+    public BasicTextReaderShim(InputStream in) {
+      this.in = in;
+    }
+
+    @Override
+    public void read(Text txt, int len) throws IOException {
+      int offset = 0;
+      byte[] bytes = new byte[len];
+      while (len > 0) {
+        int written = in.read(bytes, offset, len);
+        if (written < 0) {
+          throw new EOFException("Can't finish read from " + in + " read "
+              + (offset) + " bytes out of " + bytes.length);
+        }
+        len -= written;
+        offset += written;
+      }
+      txt.set(bytes);
+    }
+  }
+
+  @Override
+  public TextReaderShim getTextReaderShim(InputStream in) throws IOException {
+    return new BasicTextReaderShim(in);
+  }
 }

[31/50] [abbrv] hive git commit: HIVE-11698: Add additional test for PointLookupOptimizer (Jesus Camacho Rodriguez, reviewed by Hari Sankar Sivarama Subramaniyan)

Posted by xu...@apache.org.

HIVE-11698: Add additional test for PointLookupOptimizer (Jesus Camacho Rodriguez, reviewed by Hari Sankar Sivarama Subramaniyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dbdd6116
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dbdd6116
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dbdd6116

Branch: refs/heads/beeline-cli
Commit: dbdd6116bd9e25bdb5112d21fd40ec09d7f39adc
Parents: f2056a1
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Sep 2 08:35:27 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Wed Sep 2 08:35:37 2015 +0200

----------------------------------------------------------------------
 .../test/queries/clientpositive/pointlookup3.q  |   41 +
 .../results/clientpositive/pointlookup3.q.out   | 1394 ++++++++++++++++++
 2 files changed, 1435 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/dbdd6116/ql/src/test/queries/clientpositive/pointlookup3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup3.q b/ql/src/test/queries/clientpositive/pointlookup3.q
new file mode 100644
index 0000000..3daa94b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/pointlookup3.q
@@ -0,0 +1,41 @@
+drop table pcr_t1;
+
+create table pcr_t1 (key int, value string) partitioned by (ds1 string, ds2 string);
+insert overwrite table pcr_t1 partition (ds1='2000-04-08', ds2='2001-04-08') select * from src where key < 20 order by key;
+insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') select * from src where key < 20 order by key;
+insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key;
+
+set hive.optimize.point.lookup.min=2;
+set hive.optimize.point.lookup.extract=true;
+
+explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and key=1) or (ds1='2000-04-09' and key=2)
+order by key, value, ds1, ds2;
+
+explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-08' and key=2)
+order by key, value, ds1, ds2;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds2='2001-04-08'
+order by t1.key;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds1='2000-04-09'
+order by t1.key;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+where (t1.ds1='2000-04-08' and t2.key=1) or (t1.ds1='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds1;
+
+drop table pcr_t1;

http://git-wip-us.apache.org/repos/asf/hive/blob/dbdd6116/ql/src/test/results/clientpositive/pointlookup3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out
new file mode 100644
index 0000000..4cfb97e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/pointlookup3.q.out
@@ -0,0 +1,1394 @@
+PREHOOK: query: drop table pcr_t1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds1 string, ds2 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@pcr_t1
+POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds1 string, ds2 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@pcr_t1
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-08', ds2='2001-04-08') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds1=2000-04-08/ds2=2001-04-08
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-08', ds2='2001-04-08') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds1=2000-04-08/ds2=2001-04-08
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-08,ds2=2001-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-08,ds2=2001-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds1=2000-04-09/ds2=2001-04-09
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds1=2000-04-09/ds2=2001-04-09
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-09,ds2=2001-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-09,ds2=2001-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds1=2000-04-10/ds2=2001-04-10
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds1=2000-04-10/ds2=2001-04-10
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-10,ds2=2001-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-10,ds2=2001-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and key=1) or (ds1='2000-04-09' and key=2)
+order by key, value, ds1, ds2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and key=1) or (ds1='2000-04-09' and key=2)
+order by key, value, ds1, ds2
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_TABREF
+         TOK_TABNAME
+            pcr_t1
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               key
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               value
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               ds1
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               ds2
+      TOK_WHERE
+         or
+            and
+               =
+                  TOK_TABLE_OR_COL
+                     ds1
+                  '2000-04-08'
+               =
+                  TOK_TABLE_OR_COL
+                     key
+                  1
+            and
+               =
+                  TOK_TABLE_OR_COL
+                     ds1
+                  '2000-04-09'
+               =
+                  TOK_TABLE_OR_COL
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               key
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               value
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               ds1
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               ds2
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: pcr_t1
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: (struct(ds1,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean)
+              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+                  sort order: ++++
+                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                  tag: -1
+                  auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds2=2001-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds1 2000-04-08
+              ds2 2001-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds1/ds2
+              partition_columns.types string:string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds1/ds2
+                partition_columns.types string:string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds2=2001-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds1 2000-04-09
+              ds2 2001-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds1/ds2
+              partition_columns.types string:string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds1/ds2
+                partition_columns.types string:string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [pcr_t1]
+        /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [pcr_t1]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3
+                  columns.types int:string:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-08' and key=2)
+order by key, value, ds1, ds2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-08' and key=2)
+order by key, value, ds1, ds2
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_TABREF
+         TOK_TABNAME
+            pcr_t1
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               key
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               value
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               ds1
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               ds2
+      TOK_WHERE
+         or
+            and
+               and
+                  =
+                     TOK_TABLE_OR_COL
+                        ds1
+                     '2000-04-08'
+                  =
+                     TOK_TABLE_OR_COL
+                        ds2
+                     '2001-04-08'
+               =
+                  TOK_TABLE_OR_COL
+                     key
+                  1
+            and
+               and
+                  =
+                     TOK_TABLE_OR_COL
+                        ds1
+                     '2000-04-09'
+                  =
+                     TOK_TABLE_OR_COL
+                        ds2
+                     '2001-04-08'
+               =
+                  TOK_TABLE_OR_COL
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               key
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               value
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               ds1
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               ds2
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: pcr_t1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: (key = 1) (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: value (type: string), ds1 (type: string)
+                outputColumnNames: _col1, _col2
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: 1 (type: int), _col1 (type: string), _col2 (type: string), '2001-04-08' (type: string)
+                  sort order: ++++
+                  Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                  tag: -1
+                  auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds2=2001-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds1 2000-04-08
+              ds2 2001-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds1/ds2
+              partition_columns.types string:string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds1/ds2
+                partition_columns.types string:string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [pcr_t1]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: 1 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3
+                  columns.types int:string:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds2='2001-04-08'
+order by t1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds2='2001-04-08'
+order by t1.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t2
+         and
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds1
+                  '2000-04-08'
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     t2
+                  ds2
+               '2001-04-08'
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 0
+                value expressions: value (type: string), ds2 (type: string)
+                auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 1
+                value expressions: value (type: string), ds1 (type: string)
+                auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds2=2001-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds1 2000-04-08
+              ds2 2001-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds1/ds2
+              partition_columns.types string:string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds1/ds2
+                partition_columns.types string:string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [t1, t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 key (type: int)
+            1 key (type: int)
+          outputColumnNames: _col0, _col1, _col3, _col7, _col8, _col9
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string)
+            outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1,_col3,_col4,_col5,_col6
+                    columns.types int,string,string,int,string,string
+                    escape.delim \
+                    serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col3,_col4,_col5,_col6
+              columns.types int,string,string,int,string,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col3,_col4,_col5,_col6
+                columns.types int,string,string,int,string,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: string), '2001-04-08' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+                  columns.types int:string:string:string:int:string:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds1='2000-04-09'
+order by t1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds1='2000-04-09'
+order by t1.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t2
+         and
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds1
+                  '2000-04-08'
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     t2
+                  ds1
+               '2000-04-09'
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 0
+                value expressions: value (type: string), ds2 (type: string)
+                auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 1
+                value expressions: value (type: string), ds2 (type: string)
+                auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds2=2001-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds1 2000-04-08
+              ds2 2001-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds1/ds2
+              partition_columns.types string:string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds1/ds2
+                partition_columns.types string:string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds2=2001-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds1 2000-04-09
+              ds2 2001-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds1/ds2
+              partition_columns.types string:string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds1/ds2
+                partition_columns.types string:string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [t1]
+        /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 key (type: int)
+            1 key (type: int)
+          outputColumnNames: _col0, _col1, _col3, _col7, _col8, _col10
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col7 (type: int), _col8 (type: string), _col10 (type: string)
+            outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col7
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1,_col3,_col4,_col5,_col7
+                    columns.types int,string,string,int,string,string
+                    escape.delim \
+                    serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col3,_col4,_col5,_col7
+              columns.types int,string,string,int,string,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col3,_col4,_col5,_col7
+                columns.types int,string,string,int,string,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), '2000-04-09' (type: string), VALUE._col6 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+                  columns.types int:string:string:string:int:string:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+where (t1.ds1='2000-04-08' and t2.key=1) or (t1.ds1='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+where (t1.ds1='2000-04-08' and t2.key=1) or (t1.ds1='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t2
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_WHERE
+         or
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds1
+                  '2000-04-08'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+                  1
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds1
+                  '2000-04-09'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               key
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               value
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               ds1
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+              tag: 0
+              value expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
+              auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+              tag: 1
+              value expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds2=2001-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds1 2000-04-08
+              ds2 2001-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds1/ds2
+              partition_columns.types string:string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds1/ds2
+                partition_columns.types string:string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds2=2001-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds1 2000-04-09
+              ds2 2001-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds1/ds2
+              partition_columns.types string:string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds1/ds2
+                partition_columns.types string:string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds2=2001-04-10
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds1 2000-04-10
+              ds2 2001-04-10
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds1/ds2
+              partition_columns.types string:string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds1/ds2
+                partition_columns.types string:string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [t1, t2]
+        /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [t1, t2]
+        /pcr_t1/ds1=2000-04-10/ds2=2001-04-10 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8, _col9, _col10
+          Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            isSamplingPred: false
+            predicate: ((_col2) IN ('2000-04-08', '2000-04-09') and (struct(_col7,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean)
+            Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+              Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+                      columns.types int,string,string,string,int,string,string,string
+                      escape.delim \
+                      serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
+              sort order: +++
+              Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+              columns.types int,string,string,string,int,string,string,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+                columns.types int,string,string,string,int,string,string,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+          Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+                  columns.types int:string:string:string:int:string:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table pcr_t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Output: default@pcr_t1
+POSTHOOK: query: drop table pcr_t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Output: default@pcr_t1

[10/50] [abbrv] hive git commit: HIVE-11627: Reduce the number of accesses to hashmaps in PPD (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Posted by xu...@apache.org.

HIVE-11627: Reduce the number of accesses to hashmaps in PPD (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d147a79c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d147a79c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d147a79c

Branch: refs/heads/beeline-cli
Commit: d147a79c13a9fdde83372c740167236eb80962de
Parents: 607b0e8
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Aug 27 18:07:48 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Aug 28 10:09:38 2015 +0200

----------------------------------------------------------------------
 .../hadoop/hive/ql/ppd/ExprWalkerInfo.java      | 127 ++++---------------
 .../hive/ql/ppd/ExprWalkerProcFactory.java      |  92 +++++++++-----
 .../hadoop/hive/ql/ppd/OpProcFactory.java       |  11 +-
 3 files changed, 93 insertions(+), 137 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d147a79c/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
index f23facf..e4b768e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
@@ -38,29 +38,21 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 public class ExprWalkerInfo implements NodeProcessorCtx {
 
   /** Information maintained for an expr while walking an expr tree. */
-  private static class ExprInfo {
+  protected class ExprInfo {
     /**
      * true if expr rooted at this node doesn't contain more than one table.
      * alias
      */
-    public boolean isCandidate = false;
+    protected boolean isCandidate = false;
     /** alias that this expression refers to. */
-    public String alias = null;
+    protected String alias = null;
     /** new expr for this expression. */
-    public ExprNodeDesc convertedExpr = null;
+    protected ExprNodeDesc convertedExpr = null;
 
-    public ExprInfo() {
-    }
 
-    public ExprInfo(boolean isCandidate, String alias, ExprNodeDesc replacedNode) {
-      this.isCandidate = isCandidate;
-      this.alias = alias;
-      convertedExpr = replacedNode;
-    }
   }
 
-  protected static final Log LOG = LogFactory.getLog(OpProcFactory.class
-      .getName());;
+  protected static final Log LOG = LogFactory.getLog(OpProcFactory.class.getName());
   private Operator<? extends OperatorDesc> op = null;
 
   /**
@@ -127,105 +119,33 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
   }
 
   /**
-   * @return converted expression for give node. If there is none then returns
-   *         null.
-   */
-  public ExprNodeDesc getConvertedNode(ExprNodeDesc nd) {
-    ExprInfo ei = exprInfoMap.get(nd);
-    if (ei == null) {
-      return null;
-    }
-    return ei.convertedExpr;
-  }
-
-  /**
-   * adds a replacement node for this expression.
-   *
-   * @param oldNode
-   *          original node
-   * @param newNode
-   *          new node
+   * Get additional info for a given expression node
    */
-  public void addConvertedNode(ExprNodeDesc oldNode, ExprNodeDesc newNode) {
-    ExprInfo ei = exprInfoMap.get(oldNode);
-    if (ei == null) {
-      ei = new ExprInfo();
-      exprInfoMap.put(oldNode, ei);
-    }
-    ei.convertedExpr = newNode;
-    exprInfoMap.put(newNode, new ExprInfo(ei.isCandidate, ei.alias, null));
+  public ExprInfo getExprInfo(ExprNodeDesc expr) {
+    return exprInfoMap.get(expr);
   }
 
   /**
-   * Returns true if the specified expression is pushdown candidate else false.
-   *
-   * @param expr
-   * @return true or false
+   * Get additional info for a given expression node if it
+   * exists, or create a new one and store it if it does not
    */
-  public boolean isCandidate(ExprNodeDesc expr) {
-    ExprInfo ei = exprInfoMap.get(expr);
-    if (ei == null) {
-      return false;
-    }
-    return ei.isCandidate;
+  public ExprInfo addExprInfo(ExprNodeDesc expr) {
+    ExprInfo exprInfo = new ExprInfo();
+    exprInfoMap.put(expr, exprInfo);
+    return exprInfo;
   }
 
   /**
-   * Marks the specified expr to the specified value.
-   *
-   * @param expr
-   * @param b
-   *          can
+   * Get additional info for a given expression node if it
+   * exists, or create a new one and store it if it does not
    */
-  public void setIsCandidate(ExprNodeDesc expr, boolean b) {
-    ExprInfo ei = exprInfoMap.get(expr);
-    if (ei == null) {
-      ei = new ExprInfo();
-      exprInfoMap.put(expr, ei);
+  public ExprInfo addOrGetExprInfo(ExprNodeDesc expr) {
+    ExprInfo exprInfo = exprInfoMap.get(expr);
+    if (exprInfo == null) {
+      exprInfo = new ExprInfo();
+      exprInfoMap.put(expr, exprInfo);
     }
-    ei.isCandidate = b;
-  }
-
-  /**
-   * Returns the alias of the specified expr.
-   *
-   * @param expr
-   * @return The alias of the expression
-   */
-  public String getAlias(ExprNodeDesc expr) {
-    ExprInfo ei = exprInfoMap.get(expr);
-    if (ei == null) {
-      return null;
-    }
-    return ei.alias;
-  }
-
-  /**
-   * Adds the specified alias to the specified expr.
-   *
-   * @param expr
-   * @param alias
-   */
-  public void addAlias(ExprNodeDesc expr, String alias) {
-    if (alias == null) {
-      return;
-    }
-    ExprInfo ei = exprInfoMap.get(expr);
-    if (ei == null) {
-      ei = new ExprInfo();
-      exprInfoMap.put(expr, ei);
-    }
-    ei.alias = alias;
-  }
-
-  /**
-   * Adds the specified expr as the top-most pushdown expr (ie all its children
-   * can be pushed).
-   *
-   * @param expr
-   */
-  public void addFinalCandidate(ExprNodeDesc expr) {
-    addFinalCandidate(getAlias(expr), expr);
+    return exprInfo;
   }
 
   public void addFinalCandidate(String alias, ExprNodeDesc expr) {
@@ -278,8 +198,7 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
    *
    * @param expr
    */
-  public void addNonFinalCandidate(ExprNodeDesc expr) {
-    String alias = getAlias(expr);
+  public void addNonFinalCandidate(String alias, ExprNodeDesc expr) {
     if (nonFinalPreds.get(alias) == null) {
       nonFinalPreds.put(alias, new ArrayList<ExprNodeDesc>());
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/d147a79c/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
index 6a1bef9..64efbdd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
@@ -38,8 +38,6 @@ import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.lib.Rule;
-import org.apache.hadoop.hive.ql.lib.RuleExactMatch;
-import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.lib.TypeRule;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -47,6 +45,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.ppd.ExprWalkerInfo.ExprInfo;
 
 /**
  * Expression factory for predicate pushdown processing. Each processor
@@ -55,8 +54,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
  */
 public final class ExprWalkerProcFactory {
 
-  private static final Log LOG = LogFactory
-      .getLog(ExprWalkerProcFactory.class.getName());
+  private static final Log LOG = LogFactory.getLog(ExprWalkerProcFactory.class.getName());
 
   /**
    * ColumnExprProcessor.
@@ -80,6 +78,7 @@ public final class ExprWalkerProcFactory {
         tabAlias = ci.getTabAlias();
       }
 
+      ExprInfo colExprInfo = null;
       boolean isCandidate = true;
       if (op.getColumnExprMap() != null) {
         // replace the output expression with the input expression so that
@@ -88,7 +87,8 @@ public final class ExprWalkerProcFactory {
         if (exp == null) {
           // means that expression can't be pushed either because it is value in
           // group by
-          ctx.setIsCandidate(colref, false);
+          colExprInfo = ctx.addOrGetExprInfo(colref);
+          colExprInfo.isCandidate = false;
           return false;
         } else {
           if (exp instanceof ExprNodeGenericFuncDesc) {
@@ -99,16 +99,25 @@ public final class ExprWalkerProcFactory {
             tabAlias = column.getTabAlias();
           }
         }
-        ctx.addConvertedNode(colref, exp);
-        ctx.setIsCandidate(exp, isCandidate);
-        ctx.addAlias(exp, tabAlias);
+        colExprInfo = ctx.addOrGetExprInfo(colref);
+        colExprInfo.convertedExpr = exp;
+        ExprInfo expInfo = ctx.addExprInfo(exp);
+        expInfo.isCandidate = isCandidate;
+        if (tabAlias != null) {
+          expInfo.alias = tabAlias;
+        } else {
+          expInfo.alias = colExprInfo.alias;
+        }
       } else {
         if (ci == null) {
           return false;
         }
-        ctx.addAlias(colref, tabAlias);
+        colExprInfo = ctx.addOrGetExprInfo(colref);
+        if (tabAlias != null) {
+          colExprInfo.alias = tabAlias;
+        }
       }
-      ctx.setIsCandidate(colref, isCandidate);
+      colExprInfo.isCandidate = isCandidate;
       return isCandidate;
     }
 
@@ -127,30 +136,37 @@ public final class ExprWalkerProcFactory {
       String alias = null;
       ExprNodeFieldDesc expr = (ExprNodeFieldDesc) nd;
 
-      boolean isCandidate = true;
       assert (nd.getChildren().size() == 1);
       ExprNodeDesc ch = (ExprNodeDesc) nd.getChildren().get(0);
-      ExprNodeDesc newCh = ctx.getConvertedNode(ch);
+      ExprInfo chExprInfo = ctx.getExprInfo(ch);
+      ExprNodeDesc newCh = chExprInfo != null ? chExprInfo.convertedExpr : null;
       if (newCh != null) {
         expr.setDesc(newCh);
         ch = newCh;
+        chExprInfo = ctx.getExprInfo(ch);
       }
-      String chAlias = ctx.getAlias(ch);
 
-      isCandidate = isCandidate && ctx.isCandidate(ch);
+      boolean isCandidate;
+      String chAlias;
+      if (chExprInfo != null) {
+        chAlias = chExprInfo.alias;
+        isCandidate = chExprInfo.isCandidate;
+      } else {
+        chAlias = null;
+        isCandidate = false;
+      }
       // need to iterate through all children even if one is found to be not a
       // candidate
       // in case if the other children could be individually pushed up
       if (isCandidate && chAlias != null) {
-        if (alias == null) {
-          alias = chAlias;
-        } else if (!chAlias.equalsIgnoreCase(alias)) {
-          isCandidate = false;
-        }
+        alias = chAlias;
       }
 
-      ctx.addAlias(expr, alias);
-      ctx.setIsCandidate(expr, isCandidate);
+      ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
+      if (alias != null) {
+        exprInfo.alias = alias;
+      }
+      exprInfo.isCandidate = isCandidate;
       return isCandidate;
     }
 
@@ -172,7 +188,8 @@ public final class ExprWalkerProcFactory {
 
       if (!FunctionRegistry.isDeterministic(expr.getGenericUDF())) {
         // this GenericUDF can't be pushed down
-        ctx.setIsCandidate(expr, false);
+        ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
+        exprInfo.isCandidate = false;
         ctx.setDeterministic(false);
         return false;
       }
@@ -180,14 +197,22 @@ public final class ExprWalkerProcFactory {
       boolean isCandidate = true;
       for (int i = 0; i < nd.getChildren().size(); i++) {
         ExprNodeDesc ch = (ExprNodeDesc) nd.getChildren().get(i);
-        ExprNodeDesc newCh = ctx.getConvertedNode(ch);
+        ExprInfo chExprInfo = ctx.getExprInfo(ch);
+        ExprNodeDesc newCh = chExprInfo != null ? chExprInfo.convertedExpr : null;
         if (newCh != null) {
           expr.getChildren().set(i, newCh);
           ch = newCh;
+          chExprInfo = ctx.getExprInfo(ch);
         }
-        String chAlias = ctx.getAlias(ch);
 
-        isCandidate = isCandidate && ctx.isCandidate(ch);
+        String chAlias;
+        if (chExprInfo != null) {
+          chAlias = chExprInfo.alias;
+          isCandidate = isCandidate && chExprInfo.isCandidate;
+        } else {
+          chAlias = null;
+          isCandidate = false;
+        }
         // need to iterate through all children even if one is found to be not a
         // candidate
         // in case if the other children could be individually pushed up
@@ -203,8 +228,11 @@ public final class ExprWalkerProcFactory {
           break;
         }
       }
-      ctx.addAlias(expr, alias);
-      ctx.setIsCandidate(expr, isCandidate);
+      ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
+      if (alias != null) {
+        exprInfo.alias = alias;
+      }
+      exprInfo.isCandidate = isCandidate;
       return isCandidate;
     }
 
@@ -219,7 +247,8 @@ public final class ExprWalkerProcFactory {
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
       ExprWalkerInfo ctx = (ExprWalkerInfo) procCtx;
-      ctx.setIsCandidate((ExprNodeDesc) nd, true);
+      ExprInfo exprInfo = ctx.addOrGetExprInfo((ExprNodeDesc) nd);
+      exprInfo.isCandidate = true;
       return true;
     }
   }
@@ -324,12 +353,13 @@ public final class ExprWalkerProcFactory {
       return;
     }
 
-    if (ctx.isCandidate(expr)) {
-      ctx.addFinalCandidate(expr);
+    ExprInfo exprInfo = ctx.getExprInfo(expr);
+    if (exprInfo != null && exprInfo.isCandidate) {
+      ctx.addFinalCandidate(exprInfo.alias, expr);
       return;
     } else if (!FunctionRegistry.isOpAnd(expr) &&
         HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
-      ctx.addNonFinalCandidate(expr);
+      ctx.addNonFinalCandidate(exprInfo != null ? exprInfo.alias : null, expr);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d147a79c/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
index 6f9df53..dbd021b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
@@ -66,6 +66,7 @@ import org.apache.hadoop.hive.ql.plan.ptf.ValueBoundaryDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
+import org.apache.hadoop.hive.ql.ppd.ExprWalkerInfo.ExprInfo;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFDenseRank.GenericUDAFDenseRankEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLead.GenericUDAFLeadEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.GenericUDAFRankEvaluator;
@@ -483,8 +484,14 @@ public final class OpProcFactory {
             prunePreds.getFinalCandidates().get(alias)) {
             // add expr to the list of predicates rejected from further pushing
             // so that we know to add it in createFilter()
-            prunePreds.addAlias(expr, alias);
-            prunePreds.addNonFinalCandidate(expr);
+            ExprInfo exprInfo;
+            if (alias != null) {
+              exprInfo = prunePreds.addOrGetExprInfo(expr);
+              exprInfo.alias = alias;
+            } else {
+              exprInfo = prunePreds.getExprInfo(expr);
+            }
+            prunePreds.addNonFinalCandidate(exprInfo != null ? exprInfo.alias : null, expr);
           }
           prunePreds.getFinalCandidates().remove(alias);
         }

[18/50] [abbrv] hive git commit: HIVE-11652: Avoid expensive call to removeAll in DefaultGraphWalker (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan/Hari Sankar Sivarama Subramaniyan)

Posted by xu...@apache.org.

HIVE-11652: Avoid expensive call to removeAll in DefaultGraphWalker (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan/Hari Sankar Sivarama Subramaniyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af91308e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af91308e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af91308e

Branch: refs/heads/beeline-cli
Commit: af91308e5b6573ea6dc793912bcc628a5a40c000
Parents: 22fa921
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Sat Aug 29 11:40:03 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Sat Aug 29 11:42:59 2015 +0200

----------------------------------------------------------------------
 .../hadoop/hive/ql/lib/DefaultGraphWalker.java  | 80 ++++++++++++++------
 .../hadoop/hive/ql/lib/ForwardWalker.java       | 33 ++++----
 .../hadoop/hive/ql/optimizer/ColumnPruner.java  |  6 +-
 .../hive/ql/optimizer/ConstantPropagate.java    | 10 +--
 4 files changed, 79 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
index 583c113..07d2734 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
@@ -22,7 +22,9 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.IdentityHashMap;
+import java.util.LinkedList;
 import java.util.List;
+import java.util.Queue;
 import java.util.Set;
 import java.util.Stack;
 
@@ -36,7 +38,21 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
  */
 public class DefaultGraphWalker implements GraphWalker {
 
-  protected Stack<Node> opStack;
+  /**
+   * opStack keeps the nodes that have been visited, but have not been
+   * dispatched yet
+   */
+  protected final Stack<Node> opStack;
+  /**
+   * opQueue keeps the nodes in the order that the were dispatched.
+   * Then it is used to go through the processed nodes and store
+   * the results that the dispatcher has produced (if any)
+   */
+  protected final Queue<Node> opQueue;
+  /**
+   * toWalk stores the starting nodes for the graph that needs to be
+   * traversed
+   */
   protected final List<Node> toWalk = new ArrayList<Node>();
   protected final IdentityHashMap<Node, Object> retMap = new  IdentityHashMap<Node, Object>();
   protected final Dispatcher dispatcher;
@@ -50,13 +66,7 @@ public class DefaultGraphWalker implements GraphWalker {
   public DefaultGraphWalker(Dispatcher disp) {
     dispatcher = disp;
     opStack = new Stack<Node>();
-  }
-
-  /**
-   * @return the toWalk
-   */
-  public List<Node> getToWalk() {
-    return toWalk;
+    opQueue = new LinkedList<Node>();
   }
 
   /**
@@ -108,10 +118,22 @@ public class DefaultGraphWalker implements GraphWalker {
     while (toWalk.size() > 0) {
       Node nd = toWalk.remove(0);
       walk(nd);
+      // Some walkers extending DefaultGraphWalker e.g. ForwardWalker
+      // do not use opQueue and rely uniquely in the toWalk structure,
+      // thus we store the results produced by the dispatcher here
+      // TODO: rewriting the logic of those walkers to use opQueue
       if (nodeOutput != null && getDispatchedList().contains(nd)) {
         nodeOutput.put(nd, retMap.get(nd));
       }
     }
+
+    // Store the results produced by the dispatcher
+    while (!opQueue.isEmpty()) {
+      Node node = opQueue.poll();
+      if (nodeOutput != null && getDispatchedList().contains(node)) {
+        nodeOutput.put(node, retMap.get(node));
+      }
+    }
   }
 
   /**
@@ -121,23 +143,33 @@ public class DefaultGraphWalker implements GraphWalker {
    *          current operator in the graph
    * @throws SemanticException
    */
-  public void walk(Node nd) throws SemanticException {
-    if (opStack.empty() || nd != opStack.peek()) {
-      opStack.push(nd);
-    }
+  public void walk(Node nd) throws SemanticException {    
+    // Push the node in the stack
+    opStack.push(nd);
+
+    // While there are still nodes to dispatch...
+    while (!opStack.empty()) {
+      Node node = opStack.peek();
 
-    if ((nd.getChildren() == null)
-        || getDispatchedList().containsAll(nd.getChildren())) {
-      // all children are done or no need to walk the children
-      if (!getDispatchedList().contains(nd)) {
-        dispatch(nd, opStack);
+      if (node.getChildren() == null ||
+              getDispatchedList().containsAll(node.getChildren())) {
+        // Dispatch current node
+        if (!getDispatchedList().contains(node)) {
+          dispatch(node, opStack);
+          opQueue.add(node);
+        }
+        opStack.pop();
+        continue;
       }
-      opStack.pop();
-      return;
-    }
-    // add children, self to the front of the queue in that order
-    getToWalk().add(0, nd);
-    getToWalk().removeAll(nd.getChildren());
-    getToWalk().addAll(0, nd.getChildren());
+
+      // Add a single child and restart the loop
+      for (Node childNode : node.getChildren()) {
+        if (!getDispatchedList().contains(childNode)) {
+          opStack.push(childNode);
+          break;
+        }
+      }
+    } // end while
   }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
index a2db3b5..67b4700 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
@@ -19,20 +19,17 @@
 package org.apache.hadoop.hive.ql.lib;
 
 import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 
 public class ForwardWalker extends DefaultGraphWalker {
 
   /**
-* Constructor.
-*
-* @param disp
-* dispatcher to call for each op encountered
-*/
+   * Constructor.
+   *
+   * @param disp
+   * dispatcher to call for each op encountered
+   */
   public ForwardWalker(Dispatcher disp) {
     super(disp);
   }
@@ -54,17 +51,17 @@ public class ForwardWalker extends DefaultGraphWalker {
   @SuppressWarnings("unchecked")
   protected void addAllParents(Node nd) {
     Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
-    getToWalk().removeAll(op.getParentOperators());
-    getToWalk().addAll(0, op.getParentOperators());
+    toWalk.removeAll(op.getParentOperators());
+    toWalk.addAll(0, op.getParentOperators());
   }
 
   /**
-* walk the current operator and its descendants.
-*
-* @param nd
-* current operator in the graph
-* @throws SemanticException
-*/
+   * walk the current operator and its descendants.
+   *
+   * @param nd
+   * current operator in the graph
+   * @throws SemanticException
+   */
   @Override
   public void walk(Node nd) throws SemanticException {
     if (opStack.empty() || nd != opStack.peek()) {
@@ -73,14 +70,14 @@ public class ForwardWalker extends DefaultGraphWalker {
     if (allParentsDispatched(nd)) {
       // all children are done or no need to walk the children
       if (!getDispatchedList().contains(nd)) {
-        getToWalk().addAll(nd.getChildren());
+        toWalk.addAll(nd.getChildren());
         dispatch(nd, opStack);
       }
       opStack.pop();
       return;
     }
     // add children, self to the front of the queue in that order
-    getToWalk().add(0, nd);
+    toWalk.add(0, nd);
     addAllParents(nd);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
index 9a45458..735b448 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
@@ -174,10 +174,10 @@ public class ColumnPruner implements Transform {
         return;
       }
       // move all the children to the front of queue
-      getToWalk().removeAll(nd.getChildren());
-      getToWalk().addAll(0, nd.getChildren());
+      toWalk.removeAll(nd.getChildren());
+      toWalk.addAll(0, nd.getChildren());
       // add self to the end of the queue
-      getToWalk().add(nd);
+      toWalk.add(nd);
       opStack.pop();
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
index dd53ced..b6f1f27 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
@@ -151,17 +151,17 @@ public class ConstantPropagate implements Transform {
         dispatch(nd, opStack);
         opStack.pop();
       } else {
-        getToWalk().removeAll(parents);
-        getToWalk().add(0, nd);
-        getToWalk().addAll(0, parents);
+        toWalk.removeAll(parents);
+        toWalk.add(0, nd);
+        toWalk.addAll(0, parents);
         return;
       }
 
       // move all the children to the front of queue
       List<? extends Node> children = nd.getChildren();
       if (children != null) {
-        getToWalk().removeAll(children);
-        getToWalk().addAll(children);
+        toWalk.removeAll(children);
+        toWalk.addAll(children);
       }
     }
   }

[05/50] [abbrv] hive git commit: HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)

Posted by xu...@apache.org.

HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b247cac4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b247cac4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b247cac4

Branch: refs/heads/beeline-cli
Commit: b247cac4fc3814e422d4f5d5aad96a1c6e385a7b
Parents: 037fb02
Author: Gopal V <go...@apache.org>
Authored: Thu Aug 27 09:50:08 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Aug 27 09:50:08 2015 +0200

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    4 +
 .../hadoop/hive/ql/optimizer/Optimizer.java     |   17 +-
 .../hive/ql/optimizer/PointLookupOptimizer.java |  102 +-
 .../queries/clientpositive/flatten_and_or.q     |    4 +-
 .../test/queries/clientpositive/pointlookup.q   |   59 +
 .../test/queries/clientpositive/pointlookup2.q  |   51 +
 .../alter_partition_coltype.q.out               |   12 +-
 .../clientpositive/annotate_stats_filter.q.out  |    8 +-
 .../results/clientpositive/flatten_and_or.q.out |    8 +-
 ql/src/test/results/clientpositive/pcr.q.out    |   12 +-
 .../results/clientpositive/pointlookup.q.out    |  198 +++
 .../results/clientpositive/pointlookup2.q.out   | 1647 ++++++++++++++++++
 .../results/clientpositive/ppd_transform.q.out  |   12 +-
 .../test/results/clientpositive/spark/pcr.q.out |   12 +-
 .../clientpositive/spark/ppd_transform.q.out    |   12 +-
 .../clientpositive/spark/vectorized_case.q.out  |    2 +-
 .../clientpositive/tez/explainuser_1.q.out      |    2 +-
 .../clientpositive/tez/vectorized_case.q.out    |    2 +-
 .../clientpositive/vectorized_case.q.out        |    9 +-
 19 files changed, 2118 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 8706a2d..8a00079 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1190,6 +1190,10 @@ public class HiveConf extends Configuration {
         "Whether to push predicates down into storage handlers.  Ignored when hive.optimize.ppd is false."),
     HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true,
          "Whether to transform OR clauses in Filter operators into IN clauses"),
+    HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31,
+             "Minimum number of OR clauses needed to transform into IN clauses"),
+    HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true,
+                 "Extract partial expressions when optimizing point lookup IN clauses"),
     // Constant propagation optimizer
     HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"),
     HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"),

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index 14f362f..439f616 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -68,6 +68,18 @@ public class Optimizer {
 
     // Add the transformation that computes the lineage information.
     transformations.add(new Generator());
+
+    // Try to transform OR predicates in Filter into simpler IN clauses first
+    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
+      final int min = HiveConf.getIntVar(hiveConf,
+          HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
+      final boolean extract = HiveConf.getBoolVar(hiveConf,
+          HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT);
+      final boolean testMode = HiveConf.getBoolVar(hiveConf,
+          HiveConf.ConfVars.HIVE_IN_TEST);
+      transformations.add(new PointLookupOptimizer(min, extract, testMode));
+    }
+
     if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
       transformations.add(new PredicateTransitivePropagate());
       if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
@@ -82,11 +94,6 @@ public class Optimizer {
         transformations.add(new ConstantPropagate());
     }
 
-    // Try to transform OR predicates in Filter into IN clauses.
-    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
-      transformations.add(new PointLookupOptimizer());
-    }
-
     if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
       transformations.add(new PartitionPruner());
       transformations.add(new PartitionConditionRemover());

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
index 6a8acec..d83636d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
@@ -18,10 +18,14 @@
 package org.apache.hadoop.hive.ql.optimizer;
 
 import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.Stack;
 
 import org.apache.calcite.util.Pair;
@@ -46,15 +50,18 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
 import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ImmutableSortedSet;
 import com.google.common.collect.ListMultimap;
 
 /**
@@ -71,7 +78,49 @@ public class PointLookupOptimizer implements Transform {
           GenericUDFIn.class.getAnnotation(Description.class).name();
   private static final String STRUCT_UDF =
           GenericUDFStruct.class.getAnnotation(Description.class).name();
+  private static final String AND_UDF =
+      GenericUDFOPAnd.class.getAnnotation(Description.class).name();
+
+  // these are closure-bound for all the walkers in context
+  public final int minOrExpr;
+  public final boolean extract;
+  public final boolean testMode;
+
+  /*
+   * Pass in configs and pre-create a parse context
+   */
+  public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) {
+    this.minOrExpr = min;
+    this.extract = extract;
+    this.testMode = testMode;
+  }
+
+  // Hash Set iteration isn't ordered, but force string sorted order
+  // to get a consistent test run.
+  private Collection<ExprNodeDescEqualityWrapper> sortForTests(
+      Set<ExprNodeDescEqualityWrapper> valuesExpr) {
+    if (!testMode) {
+      // normal case - sorting is wasted for an IN()
+      return valuesExpr;
+    }
+    final Collection<ExprNodeDescEqualityWrapper> sortedValues;
+
+    sortedValues = ImmutableSortedSet.copyOf(
+        new Comparator<ExprNodeDescEqualityWrapper>() {
+          @Override
+          public int compare(ExprNodeDescEqualityWrapper w1,
+              ExprNodeDescEqualityWrapper w2) {
+            // fail if you find nulls (this is a test-code section)
+            if (w1.equals(w2)) {
+              return 0;
+            }
+            return w1.getExprNodeDesc().getExprString()
+                .compareTo(w2.getExprNodeDesc().getExprString());
+          }
+        }, valuesExpr);
 
+    return sortedValues;
+  }
 
   @Override
   public ParseContext transform(ParseContext pctx) throws SemanticException {
@@ -103,7 +152,9 @@ public class PointLookupOptimizer implements Transform {
         if (LOG.isDebugEnabled()) {
           LOG.debug("Generated new predicate with IN clause: " + newPredicate);
         }
-        filterOp.getConf().setOrigPredicate(predicate);
+        if (!extract) {
+          filterOp.getConf().setOrigPredicate(predicate);
+        }
         filterOp.getConf().setPredicate(newPredicate);
       }
 
@@ -140,8 +191,11 @@ public class PointLookupOptimizer implements Transform {
         return null;
       }
 
-      // 2. It is an OR operator
+      // 2. It is an OR operator with enough children
       List<ExprNodeDesc> children = fd.getChildren();
+      if (children.size() < minOrExpr) {
+        return null;
+      }
       ListMultimap<String,Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> columnConstantsMap =
               ArrayListMultimap.create();
       boolean modeAnd = false;
@@ -272,6 +326,50 @@ public class PointLookupOptimizer implements Transform {
       newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
               FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren);
 
+      if (extract && columns.size() > 1) {
+        final List<ExprNodeDesc> subExpr = new ArrayList<ExprNodeDesc>(columns.size()+1);
+
+        // extract pre-conditions for the tuple expressions
+        // (a,b) IN ((1,2),(2,3)) ->
+        //          ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3))
+
+        for (String keyString : columnConstantsMap.keySet()) {
+          final Set<ExprNodeDescEqualityWrapper> valuesExpr = 
+              new HashSet<ExprNodeDescEqualityWrapper>(children.size());
+          final List<Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> partial = 
+              columnConstantsMap.get(keyString);
+          for (int i = 0; i < children.size(); i++) {
+            Pair<ExprNodeColumnDesc, ExprNodeConstantDesc> columnConstant = partial
+                .get(i);
+            valuesExpr
+                .add(new ExprNodeDescEqualityWrapper(columnConstant.right));
+          }
+          ExprNodeColumnDesc lookupCol = partial.get(0).left;
+          // generate a partial IN clause, if the column is a partition column
+          if (lookupCol.getIsPartitionColOrVirtualCol()
+              || valuesExpr.size() < children.size()) {
+            // optimize only nDV reductions
+            final List<ExprNodeDesc> inExpr = new ArrayList<ExprNodeDesc>();
+            inExpr.add(lookupCol);
+            for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) {
+              inExpr.add(value.getExprNodeDesc());
+            }
+            subExpr.add(new ExprNodeGenericFuncDesc(
+                TypeInfoFactory.booleanTypeInfo, FunctionRegistry
+                    .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr));
+          }
+        }
+        // loop complete, inspect the sub expressions generated
+        if (subExpr.size() > 0) {
+          // add the newPredicate to the end & produce an AND clause
+          subExpr.add(newPredicate);
+          newPredicate = new ExprNodeGenericFuncDesc(
+              TypeInfoFactory.booleanTypeInfo, FunctionRegistry
+                  .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr);
+        }
+        // else, newPredicate is unmodified
+      }
+
       return newPredicate;
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/flatten_and_or.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/flatten_and_or.q b/ql/src/test/queries/clientpositive/flatten_and_or.q
index 6d65225..6c6e0f9 100644
--- a/ql/src/test/queries/clientpositive/flatten_and_or.q
+++ b/ql/src/test/queries/clientpositive/flatten_and_or.q
@@ -1,3 +1,5 @@
+set hive.optimize.point.lookup=false;
+
 explain
 SELECT key
 FROM src
@@ -14,4 +16,4 @@ WHERE
    AND value = '1') OR (key = '9'
    AND value = '1') OR (key = '10'
    AND value = '3'))
-;
\ No newline at end of file
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q
new file mode 100644
index 0000000..1aef2ef
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/pointlookup.q
@@ -0,0 +1,59 @@
+explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+;
+
+
+set hive.optimize.point.lookup.min=3;
+set hive.optimize.point.lookup.extract=false;
+
+explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+;
+
+set hive.optimize.point.lookup.extract=true;
+
+explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q
new file mode 100644
index 0000000..31bebbb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/pointlookup2.q
@@ -0,0 +1,51 @@
+drop table pcr_t1;
+drop table pcr_t2;
+drop table pcr_t3;
+
+create table pcr_t1 (key int, value string) partitioned by (ds string);
+insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key;
+insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key;
+insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key;
+
+create table pcr_t2 (ds string, key int, value string);
+from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08';
+from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2;
+
+set hive.optimize.point.lookup.min=2;
+set hive.optimize.point.lookup.extract=true;
+
+explain extended
+select key, value, ds
+from pcr_t1
+where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
+order by key, value, ds;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
+order by t1.key;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
+order by t1.key;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
+order by t1.key, t1.value, t2.ds;
+
+drop table pcr_t1;
+drop table pcr_t2;
+drop table pcr_t3;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
index 06515da..9fc3c8d 100644
--- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
@@ -1134,15 +1134,11 @@ STAGE PLANS:
           alias: alterdynamic_part_table
           Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
           GatherStats: false
-          Filter Operator
-            isSamplingPred: false
-            predicate: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean)
+          Select Operator
+            expressions: intcol (type: string)
+            outputColumnNames: _col0
             Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: intcol (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
-              ListSink
+            ListSink
 
 PREHOOK: query: select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__')
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
index 9e0e78a..054b573 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
@@ -678,15 +678,15 @@ STAGE PLANS:
             alias: loc_orc
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: (state) IN ('OH', 'CA') (type: boolean)
-              Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+              predicate: ((state = 'OH') or (state = 'CA')) (type: boolean)
+              Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/flatten_and_or.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out b/ql/src/test/results/clientpositive/flatten_and_or.q.out
index 5f25daa..9c51ff3 100644
--- a/ql/src/test/results/clientpositive/flatten_and_or.q.out
+++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out
@@ -44,15 +44,15 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out
index 4c9ea77..d7c40a3 100644
--- a/ql/src/test/results/clientpositive/pcr.q.out
+++ b/ql/src/test/results/clientpositive/pcr.q.out
@@ -2475,16 +2475,16 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
-              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
+              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string), ds (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                   sort order: +++
-                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                   tag: -1
                   auto parallelism: false
       Path -> Alias:
@@ -2588,13 +2588,13 @@ STAGE PLANS:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out
new file mode 100644
index 0000000..7e19be4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/pointlookup.q.out
@@ -0,0 +1,198 @@
+PREHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean)
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

[28/50] [abbrv] hive git commit: HIVE-11689 : minor flow changes to ORC split generation (Sergey Shelukhin, reviewed by Prasanth Jayachandran and Swarnim Kulkarni)

Posted by xu...@apache.org.

HIVE-11689 : minor flow changes to ORC split generation (Sergey Shelukhin, reviewed by Prasanth Jayachandran and Swarnim Kulkarni)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f530f44d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f530f44d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f530f44d

Branch: refs/heads/beeline-cli
Commit: f530f44d1d95c2da2485d53f0855f8f8e0646005
Parents: c0690a6
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Sep 1 11:23:14 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Sep 1 11:23:14 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   | 169 +++++++++++--------
 .../hive/ql/io/orc/TestInputOutputFormat.java   |  13 +-
 2 files changed, 107 insertions(+), 75 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f530f44d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 8c138b9..05efc5f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -28,6 +28,7 @@ import java.util.Map;
 import java.util.NavigableMap;
 import java.util.TreeMap;
 import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorCompletionService;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
@@ -51,6 +52,7 @@ import org.apache.hadoop.hive.ql.io.AcidInputFormat;
 import org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaMetaData;
 import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.AcidUtils.Directory;
 import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
 import org.apache.hadoop.hive.ql.io.InputFormatChecker;
 import org.apache.hadoop.hive.ql.io.RecordIdentifier;
@@ -371,6 +373,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     private final Configuration conf;
     private static Cache<Path, FileInfo> footerCache;
     private static ExecutorService threadPool = null;
+    private static ExecutorCompletionService<AcidDirInfo> ecs = null;
     private final int numBuckets;
     private final long maxSize;
     private final long minSize;
@@ -416,6 +419,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
           threadPool = Executors.newFixedThreadPool(numThreads,
               new ThreadFactoryBuilder().setDaemon(true)
                   .setNameFormat("ORC_GET_SPLITS #%d").build());
+          ecs = new ExecutorCompletionService<AcidDirInfo>(threadPool);
         }
 
         if (footerCache == null && cacheStripeDetails) {
@@ -433,10 +437,34 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     }
   }
 
+  /**
+   * The full ACID directory information needed for splits; no more calls to HDFS needed.
+   * We could just live with AcidUtils.Directory but...
+   * 1) That doesn't contain have base files.
+   * 2) We save fs for convenience to avoid getting it twice.
+   */
+  @VisibleForTesting
+  static final class AcidDirInfo {
+    public AcidDirInfo(FileSystem fs, Path splitPath, Directory acidInfo,
+        List<HdfsFileStatusWithId> baseOrOriginalFiles) {
+      this.splitPath = splitPath;
+      this.acidInfo = acidInfo;
+      this.baseOrOriginalFiles = baseOrOriginalFiles;
+      this.fs = fs;
+    }
+
+    final FileSystem fs;
+    final Path splitPath;
+    final AcidUtils.Directory acidInfo;
+    final List<HdfsFileStatusWithId> baseOrOriginalFiles;
+  }
+
+  @VisibleForTesting
   interface SplitStrategy<T> {
     List<T> getSplits() throws IOException;
   }
 
+  @VisibleForTesting
   static final class SplitInfo extends ACIDSplitStrategy {
     private final Context context;
     private final FileSystem fs;
@@ -638,7 +666,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
    * Given a directory, get the list of files and blocks in those files.
    * To parallelize file generator use "mapreduce.input.fileinputformat.list-status.num-threads"
    */
-  static final class FileGenerator implements Callable<SplitStrategy> {
+  static final class FileGenerator implements Callable<AcidDirInfo> {
     private final Context context;
     private final FileSystem fs;
     private final Path dir;
@@ -652,69 +680,14 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     }
 
     @Override
-    public SplitStrategy call() throws IOException {
-      final SplitStrategy splitStrategy;
+    public AcidDirInfo call() throws IOException {
       AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir,
           context.conf, context.transactionList, useFileIds);
-      List<DeltaMetaData> deltas = AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories());
       Path base = dirInfo.getBaseDirectory();
-      List<HdfsFileStatusWithId> original = dirInfo.getOriginalFiles();
-      boolean[] covered = new boolean[context.numBuckets];
-      boolean isOriginal = base == null;
-
-      // if we have a base to work from
-      if (base != null || !original.isEmpty()) {
-
-        // find the base files (original or new style)
-        List<HdfsFileStatusWithId> children = original;
-        if (base != null) {
-          children = findBaseFiles(base, useFileIds);
-        }
-
-        long totalFileSize = 0;
-        for (HdfsFileStatusWithId child : children) {
-          totalFileSize += child.getFileStatus().getLen();
-          AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename
-              (child.getFileStatus().getPath(), context.conf);
-          int b = opts.getBucket();
-          // If the bucket is in the valid range, mark it as covered.
-          // I wish Hive actually enforced bucketing all of the time.
-          if (b >= 0 && b < covered.length) {
-            covered[b] = true;
-          }
-        }
-
-        int numFiles = children.size();
-        long avgFileSize = totalFileSize / numFiles;
-        int totalFiles = context.numFilesCounter.addAndGet(numFiles);
-        switch(context.splitStrategyKind) {
-          case BI:
-            // BI strategy requested through config
-            splitStrategy = new BISplitStrategy(context, fs, dir, children, isOriginal,
-                deltas, covered);
-            break;
-          case ETL:
-            // ETL strategy requested through config
-            splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal,
-                deltas, covered);
-            break;
-          default:
-            // HYBRID strategy
-            if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) {
-              splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal, deltas,
-                  covered);
-            } else {
-              splitStrategy = new BISplitStrategy(context, fs, dir, children, isOriginal, deltas,
-                  covered);
-            }
-            break;
-        }
-      } else {
-        // no base, only deltas
-        splitStrategy = new ACIDSplitStrategy(dir, context.numBuckets, deltas, covered);
-      }
-
-      return splitStrategy;
+      // find the base files (original or new style)
+      List<HdfsFileStatusWithId> children = (base == null)
+          ? dirInfo.getOriginalFiles() : findBaseFiles(base, useFileIds);
+      return new AcidDirInfo(fs, dir, dirInfo, children);
     }
 
     private List<HdfsFileStatusWithId> findBaseFiles(
@@ -1052,21 +1025,24 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     // use threads to resolve directories into splits
     Context context = new Context(conf, numSplits);
     List<OrcSplit> splits = Lists.newArrayList();
-    List<Future<?>> pathFutures = Lists.newArrayList();
-    List<Future<?>> splitFutures = Lists.newArrayList();
+    List<Future<AcidDirInfo>> pathFutures = Lists.newArrayList();
+    List<Future<List<OrcSplit>>> splitFutures = Lists.newArrayList();
 
     // multi-threaded file statuses and split strategy
     boolean useFileIds = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
-    for (Path dir : getInputPaths(conf)) {
+    Path[] paths = getInputPaths(conf);
+    for (Path dir : paths) {
       FileSystem fs = dir.getFileSystem(conf);
       FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds);
-      pathFutures.add(context.threadPool.submit(fileGenerator));
+      pathFutures.add(Context.ecs.submit(fileGenerator));
     }
 
     // complete path futures and schedule split generation
     try {
-      for (Future<?> pathFuture : pathFutures) {
-        SplitStrategy splitStrategy = (SplitStrategy) pathFuture.get();
+      for (int notIndex = 0; notIndex < paths.length; ++notIndex) {
+        AcidDirInfo adi = Context.ecs.take().get();
+        SplitStrategy splitStrategy = determineSplitStrategy(
+            context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseOrOriginalFiles);
 
         if (isDebugEnabled) {
           LOG.debug(splitStrategy);
@@ -1075,7 +1051,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
         if (splitStrategy instanceof ETLSplitStrategy) {
           List<SplitInfo> splitInfos = splitStrategy.getSplits();
           for (SplitInfo splitInfo : splitInfos) {
-            splitFutures.add(context.threadPool.submit(new SplitGenerator(splitInfo)));
+            splitFutures.add(Context.threadPool.submit(new SplitGenerator(splitInfo)));
           }
         } else {
           splits.addAll(splitStrategy.getSplits());
@@ -1083,8 +1059,8 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
       }
 
       // complete split futures
-      for (Future<?> splitFuture : splitFutures) {
-        splits.addAll((Collection<? extends OrcSplit>) splitFuture.get());
+      for (Future<List<OrcSplit>> splitFuture : splitFutures) {
+        splits.addAll(splitFuture.get());
       }
     } catch (Exception e) {
       cancelFutures(pathFutures);
@@ -1106,8 +1082,8 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     return splits;
   }
 
-  private static void cancelFutures(List<Future<?>> futures) {
-    for (Future future : futures) {
+  private static <T> void cancelFutures(List<Future<T>> futures) {
+    for (Future<T> future : futures) {
       future.cancel(true);
     }
   }
@@ -1375,6 +1351,55 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
         directory);
   }
 
+
+  @VisibleForTesting
+  static SplitStrategy determineSplitStrategy(Context context, FileSystem fs, Path dir,
+      AcidUtils.Directory dirInfo, List<HdfsFileStatusWithId> baseOrOriginalFiles) {
+    Path base = dirInfo.getBaseDirectory();
+    List<HdfsFileStatusWithId> original = dirInfo.getOriginalFiles();
+    List<DeltaMetaData> deltas = AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories());
+    boolean[] covered = new boolean[context.numBuckets];
+    boolean isOriginal = base == null;
+
+    // if we have a base to work from
+    if (base != null || !original.isEmpty()) {
+      long totalFileSize = 0;
+      for (HdfsFileStatusWithId child : baseOrOriginalFiles) {
+        totalFileSize += child.getFileStatus().getLen();
+        AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename
+            (child.getFileStatus().getPath(), context.conf);
+        int b = opts.getBucket();
+        // If the bucket is in the valid range, mark it as covered.
+        // I wish Hive actually enforced bucketing all of the time.
+        if (b >= 0 && b < covered.length) {
+          covered[b] = true;
+        }
+      }
+
+      int numFiles = baseOrOriginalFiles.size();
+      long avgFileSize = totalFileSize / numFiles;
+      int totalFiles = context.numFilesCounter.addAndGet(numFiles);
+      switch(context.splitStrategyKind) {
+        case BI:
+          // BI strategy requested through config
+          return new BISplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
+        case ETL:
+          // ETL strategy requested through config
+          return new ETLSplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
+        default:
+          // HYBRID strategy
+          if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) {
+            return new ETLSplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
+          } else {
+            return new BISplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
+          }
+      }
+    } else {
+      // no base, only deltas
+      return new ACIDSplitStrategy(dir, context.numBuckets, deltas, covered);
+    }
+  }
+
   @Override
   public RawReader<OrcStruct> getRawReader(Configuration conf,
                                            boolean collapseEvents,

http://git-wip-us.apache.org/repos/asf/hive/blob/f530f44d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index ce86cd8..8ba4d2e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -484,7 +484,7 @@ public class TestInputOutputFormat {
               conf, n);
           OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
               context, fs, new MockPath(fs, "mock:/a/b"), false);
-          final SplitStrategy splitStrategy = gen.call();
+          final SplitStrategy splitStrategy = createSplitStrategy(context, gen);
           assertTrue(
               String.format(
                   "Split strategy for %d files x %d size for %d splits", c, s,
@@ -508,7 +508,7 @@ public class TestInputOutputFormat {
     OrcInputFormat.FileGenerator gen =
       new OrcInputFormat.FileGenerator(context, fs,
           new MockPath(fs, "mock:/a/b"), false);
-    SplitStrategy splitStrategy = gen.call();
+    OrcInputFormat.SplitStrategy splitStrategy = createSplitStrategy(context, gen);
     assertEquals(true, splitStrategy instanceof OrcInputFormat.BISplitStrategy);
 
     conf.set("mapreduce.input.fileinputformat.split.maxsize", "500");
@@ -521,11 +521,18 @@ public class TestInputOutputFormat {
         new MockFile("mock:/a/b/part-04", 1000, new byte[1000]));
     gen = new OrcInputFormat.FileGenerator(context, fs,
             new MockPath(fs, "mock:/a/b"), false);
-    splitStrategy = gen.call();
+    splitStrategy = createSplitStrategy(context, gen);
     assertEquals(true, splitStrategy instanceof OrcInputFormat.ETLSplitStrategy);
 
   }
 
+  private OrcInputFormat.SplitStrategy createSplitStrategy(
+      OrcInputFormat.Context context, OrcInputFormat.FileGenerator gen) throws IOException {
+    OrcInputFormat.AcidDirInfo adi = gen.call();
+    return OrcInputFormat.determineSplitStrategy(
+        context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseOrOriginalFiles);
+  }
+
   public static class MockBlock {
     int offset;
     int length;

[12/50] [abbrv] hive git commit: HIVE-11659: Make Vectorization use the fast StringExpr (Gopal V, reviewed by Matt McCline)

Posted by xu...@apache.org.

HIVE-11659: Make Vectorization use the fast StringExpr (Gopal V, reviewed by Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ce258168
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ce258168
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ce258168

Branch: refs/heads/beeline-cli
Commit: ce2581680f1c109ea0a43868e0345a15b06b41c8
Parents: b6d1143
Author: Gopal V <go...@apache.org>
Authored: Fri Aug 28 01:24:32 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Fri Aug 28 01:24:32 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java  | 2 +-
 .../hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java   | 2 +-
 .../vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java  | 4 ++--
 .../ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java | 4 ++--
 .../exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java | 4 ++--
 .../ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java | 4 ++--
 6 files changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
index 626cea5..aff3551 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
@@ -143,7 +143,7 @@ public class VectorHashKeyWrapper extends KeyWrapper {
     for (int i = 0; i < byteValues.length; ++i) {
       // the byte comparison is potentially expensive so is better to branch on null
       if (!isNull[longValues.length + doubleValues.length + i]) {
-        if (0 != StringExpr.compare(
+        if (!StringExpr.equal(
             byteValues[i],
             byteStarts[i],
             byteLengths[i],

http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
index a21162b..6383e8a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
@@ -82,7 +82,7 @@ public class CuckooSetBytes {
   }
 
   private static boolean entryEqual(byte[][] t, int hash, byte[] b, int start, int len) {
-    return t[hash] != null && StringExpr.compare(t[hash], 0, t[hash].length, b, start, len) == 0;
+    return t[hash] != null && StringExpr.equal(t[hash], 0, t[hash].length, b, start, len);
   }
 
   public void insert(byte[] x) {

http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
index 87a11c0..9f2d4c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
@@ -234,8 +234,8 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerB
            */
 
           if (!haveSaveKey ||
-              StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
-                                 vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
+              StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
+                                 vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
 
             // New key.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
index 9f10ff1..5a5d54f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
@@ -229,8 +229,8 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate
            */
 
           if (!haveSaveKey ||
-              StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
-                                 vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
+              StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
+                                 vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
 
             // New key.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
index 9ff1141..e9ce739 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
@@ -230,8 +230,8 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
            */
 
           if (!haveSaveKey ||
-              StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
-                                 vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
+              StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
+                                 vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
 
             // New key.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
index 49efe1a..dfdd6d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
@@ -290,8 +290,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
              */
 
             if (!haveSaveKey ||
-                StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
-                                   vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
+                StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
+                                   vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
               // New key.
 
               if (haveSaveKey) {

[23/50] [abbrv] hive git commit: HIVE-11504: Predicate pushing down doesn't work for float type for Parquet(Ferdinand Xu, reviewed by Sergio Pena and Owen O'Malley)

Posted by xu...@apache.org.

HIVE-11504: Predicate pushing down doesn't work for float type for Parquet(Ferdinand Xu, reviewed by Sergio Pena and Owen O'Malley)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8f930e58
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8f930e58
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8f930e58

Branch: refs/heads/beeline-cli
Commit: 8f930e588efd6ec937b9ad20fcf09030ae210ec3
Parents: a338f33
Author: Ferdinand Xu <ch...@intel.com>
Authored: Mon Aug 31 21:07:10 2015 -0400
Committer: Ferdinand Xu <ch...@intel.com>
Committed: Mon Aug 31 21:07:10 2015 -0400

----------------------------------------------------------------------
 .../hive/ql/io/parquet/LeafFilterFactory.java   |   29 +-
 .../read/TestParquetFilterPredicate.java        |   21 +
 .../clientpositive/parquet_ppd_partition.q      |    9 +
 .../clientpositive/parquet_predicate_pushdown.q |  297 +++-
 .../clientpositive/parquet_ppd_partition.q.out  |   47 +
 .../parquet_predicate_pushdown.q.out            | 1309 +++++++++++++++++-
 6 files changed, 1660 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
index 1ceea6e..3e00612 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
@@ -31,6 +31,7 @@ import static org.apache.parquet.filter2.predicate.FilterApi.ltEq;
 import static org.apache.parquet.filter2.predicate.FilterApi.binaryColumn;
 import static org.apache.parquet.filter2.predicate.FilterApi.booleanColumn;
 import static org.apache.parquet.filter2.predicate.FilterApi.doubleColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.floatColumn;
 import static org.apache.parquet.filter2.predicate.FilterApi.intColumn;
 
 public class LeafFilterFactory {
@@ -83,6 +84,25 @@ public class LeafFilterFactory {
     }
   }
 
+  class FloatFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+    @Override
+    public FilterPredicate buildPredict(Operator op, Object constant, String columnName) {
+      switch (op) {
+      case LESS_THAN:
+        return lt(floatColumn(columnName), ((Number) constant).floatValue());
+      case IS_NULL:
+      case EQUALS:
+      case NULL_SAFE_EQUALS:
+        return eq(floatColumn(columnName),
+            (constant == null) ? null : ((Number) constant).floatValue());
+      case LESS_THAN_EQUALS:
+        return ltEq(FilterApi.floatColumn(columnName), ((Number) constant).floatValue());
+      default:
+        throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+      }
+    }
+  }
+
   class DoubleFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
 
     @Override
@@ -158,8 +178,13 @@ public class LeafFilterFactory {
         } else {
           return new LongFilterPredicateLeafBuilder();
         }
-      case FLOAT:   // float and double
-        return new DoubleFilterPredicateLeafBuilder();
+      case FLOAT:
+        if (parquetType.asPrimitiveType().getPrimitiveTypeName() ==
+            PrimitiveType.PrimitiveTypeName.FLOAT) {
+          return new FloatFilterPredicateLeafBuilder();
+        } else {
+          return new DoubleFilterPredicateLeafBuilder();
+        }
       case STRING:  // string, char, varchar
         return new BinaryFilterPredicateLeafBuilder();
       case BOOLEAN:

http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
index ac5c1a0..2be2596 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
@@ -48,4 +48,25 @@ public class TestParquetFilterPredicate {
     String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))";
     assertEquals(expected, p.toString());
   }
+
+  @Test
+  public void testFilterFloatColumns() {
+    MessageType schema =
+        MessageTypeParser.parseMessageType("message test {  required float a; required int32 b; }");
+    SearchArgument sarg = SearchArgumentFactory.newBuilder()
+        .startNot()
+        .startOr()
+        .isNull("a", PredicateLeaf.Type.FLOAT)
+        .between("a", PredicateLeaf.Type.FLOAT, 10.2, 20.3)
+        .in("b", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
+        .end()
+        .end()
+        .build();
+
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
+
+    String expected =
+        "and(and(not(eq(a, null)), not(and(lt(a, 20.3), not(lteq(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))";
+    assertEquals(expected, p.toString());
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/queries/clientpositive/parquet_ppd_partition.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_partition.q b/ql/src/test/queries/clientpositive/parquet_ppd_partition.q
new file mode 100644
index 0000000..08af84f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_partition.q
@@ -0,0 +1,9 @@
+SET hive.optimize.index.filter=true;
+SET hive.optimize.ppd=true;
+
+-- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET;
+ALTER TABLE part1 ADD PARTITION (p='p1');
+INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b');
+SELECT * FROM part1 WHERE p='p1';
+DROP TABLE part1 PURGE;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q b/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
index 08af84f..32767e8 100644
--- a/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
+++ b/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
@@ -1,9 +1,292 @@
-SET hive.optimize.index.filter=true;
 SET hive.optimize.ppd=true;
 
--- Test predicate with partitioned columns
-CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET;
-ALTER TABLE part1 ADD PARTITION (p='p1');
-INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b');
-SELECT * FROM part1 WHERE p='p1';
-DROP TABLE part1 PURGE;
\ No newline at end of file
+-- SORT_QUERY_RESULTS
+CREATE TABLE tbl_pred(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS PARQUET;
+
+CREATE TABLE staging(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging;
+
+INSERT INTO TABLE tbl_pred select * from staging;
+
+-- no predicate case. the explain plan should not have filter expression in table scan operator
+
+SELECT SUM(HASH(t)) FROM tbl_pred;
+
+SET hive.optimize.index.filter=true;
+SELECT SUM(HASH(t)) FROM tbl_pred;
+SET hive.optimize.index.filter=false;
+
+EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred;
+SET hive.optimize.index.filter=false;
+
+-- all the following queries have predicates which are pushed down to table scan operator if
+-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
+-- in table scan operator.
+
+SELECT * FROM tbl_pred WHERE t<2 limit 1;
+SET hive.optimize.index.filter=true;
+SELECT * FROM tbl_pred WHERE t<2 limit 1;
+SET hive.optimize.index.filter=false;
+
+SELECT * FROM tbl_pred WHERE t>2 limit 1;
+SET hive.optimize.index.filter=true;
+SELECT * FROM tbl_pred WHERE t>2 limit 1;
+SET hive.optimize.index.filter=false;
+
+SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2;
+
+SET hive.optimize.index.filter=true;
+SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2;
+SET hive.optimize.index.filter=false;
+
+EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2;
+SET hive.optimize.index.filter=false;
+
+SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+  ;
+
+SET hive.optimize.index.filter=true;
+SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+  ;
+SET hive.optimize.index.filter=false;
+
+EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+  ;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+  ;
+SET hive.optimize.index.filter=false;
+
+SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s;
+
+set hive.optimize.index.filter=true;
+SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s;
+set hive.optimize.index.filter=false;
+
+EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s;
+SET hive.optimize.index.filter=false;
+
+SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3;
+
+SET hive.optimize.index.filter=true;
+SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3;
+SET hive.optimize.index.filter=false;
+
+EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3;
+SET hive.optimize.index.filter=false;
+
+SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3;
+
+SET hive.optimize.index.filter=true;
+SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3;
+SET hive.optimize.index.filter=false;
+
+SET hive.optimize.index.filter=true;
+SELECT f, i, b FROM tbl_pred
+  WHERE f IS NOT NULL
+  AND f < 123.2
+  AND f > 1.92
+  AND f >= 9.99
+  AND f BETWEEN 1.92 AND 123.2
+  AND i IS NOT NULL
+  AND i < 67627
+  AND i > 60627
+  AND i >= 60626
+  AND i BETWEEN 60626 AND 67627
+  AND b IS NOT NULL
+  AND b < 4294967861
+  AND b > 4294967261
+  AND b >= 4294967260
+  AND b BETWEEN 4294967261 AND 4294967861
+  SORT BY f DESC
+  LIMIT 3;
+SET hive.optimize.index.filter=false;
+
+EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3;
+SET hive.optimize.index.filter=false;
+
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT f, i, b FROM tbl_pred
+  WHERE f IS NOT NULL
+  AND f < 123.2
+  AND f > 1.92
+  AND f >= 9.99
+  AND f BETWEEN 1.92 AND 123.2
+  AND i IS NOT NULL
+  AND i < 67627
+  AND i > 60627
+  AND i >= 60626
+  AND i BETWEEN 60626 AND 67627
+  AND b IS NOT NULL
+  AND b < 4294967861
+  AND b > 4294967261
+  AND b >= 4294967260
+  AND b BETWEEN 4294967261 AND 4294967861
+  SORT BY f DESC
+  LIMIT 3;
+SET hive.optimize.index.filter=false;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out b/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out
new file mode 100644
index 0000000..4186618
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out
@@ -0,0 +1,47 @@
+PREHOOK: query: -- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part1
+POSTHOOK: query: -- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part1
+PREHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@part1
+POSTHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@part1
+POSTHOOK: Output: default@part1@p=p1
+PREHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@part1@p=p1
+POSTHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@part1@p=p1
+POSTHOOK: Lineage: part1 PARTITION(p=p1).content SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: part1 PARTITION(p=p1).id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM part1 WHERE p='p1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part1
+PREHOOK: Input: default@part1@p=p1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM part1 WHERE p='p1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part1
+POSTHOOK: Input: default@part1@p=p1
+#### A masked pattern was here ####
+1	a	p1
+2	b	p1
+PREHOOK: query: DROP TABLE part1 PURGE
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@part1
+PREHOOK: Output: default@part1
+POSTHOOK: query: DROP TABLE part1 PURGE
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@part1
+POSTHOOK: Output: default@part1

http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
index 4186618..1dc2937 100644
--- a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
@@ -1,47 +1,1270 @@
-PREHOOK: query: -- Test predicate with partitioned columns
-CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+PREHOOK: query: -- SORT_QUERY_RESULTS
+CREATE TABLE tbl_pred(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS PARQUET
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
-PREHOOK: Output: default@part1
-POSTHOOK: query: -- Test predicate with partitioned columns
-CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+PREHOOK: Output: default@tbl_pred
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+CREATE TABLE tbl_pred(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS PARQUET
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
-POSTHOOK: Output: default@part1
-PREHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
-PREHOOK: type: ALTERTABLE_ADDPARTS
-PREHOOK: Output: default@part1
-POSTHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
-POSTHOOK: type: ALTERTABLE_ADDPARTS
-POSTHOOK: Output: default@part1
-POSTHOOK: Output: default@part1@p=p1
-PREHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
-PREHOOK: type: QUERY
-PREHOOK: Input: default@values__tmp__table__1
-PREHOOK: Output: default@part1@p=p1
-POSTHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@values__tmp__table__1
-POSTHOOK: Output: default@part1@p=p1
-POSTHOOK: Lineage: part1 PARTITION(p=p1).content SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
-POSTHOOK: Lineage: part1 PARTITION(p=p1).id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-PREHOOK: query: SELECT * FROM part1 WHERE p='p1'
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part1
-PREHOOK: Input: default@part1@p=p1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT * FROM part1 WHERE p='p1'
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part1
-POSTHOOK: Input: default@part1@p=p1
-#### A masked pattern was here ####
-1	a	p1
-2	b	p1
-PREHOOK: query: DROP TABLE part1 PURGE
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@part1
-PREHOOK: Output: default@part1
-POSTHOOK: query: DROP TABLE part1 PURGE
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@part1
-POSTHOOK: Output: default@part1
+POSTHOOK: Output: default@tbl_pred
+PREHOOK: query: CREATE TABLE staging(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@staging
+POSTHOOK: query: CREATE TABLE staging(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@staging
+PREHOOK: query: INSERT INTO TABLE tbl_pred select * from staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@tbl_pred
+POSTHOOK: query: INSERT INTO TABLE tbl_pred select * from staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@tbl_pred
+POSTHOOK: Lineage: tbl_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: tbl_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+PREHOOK: query: -- no predicate case. the explain plan should not have filter expression in table scan operator
+
+SELECT SUM(HASH(t)) FROM tbl_pred
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: -- no predicate case. the explain plan should not have filter expression in table scan operator
+
+SELECT SUM(HASH(t)) FROM tbl_pred
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+62430
+PREHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+62430
+PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: hash(t) (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: sum(_col0)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: hash(t) (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: sum(_col0)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- all the following queries have predicates which are pushed down to table scan operator if
+-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
+-- in table scan operator.
+
+SELECT * FROM tbl_pred WHERE t<2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: -- all the following queries have predicates which are pushed down to table scan operator if
+-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
+-- in table scan operator.
+
+SELECT * FROM tbl_pred WHERE t<2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-3	467	65575	4294967437	81.64	23.53	true	tom hernandez	2013-03-01 09:11:58.703188	32.85	study skills
+PREHOOK: query: SELECT * FROM tbl_pred WHERE t<2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM tbl_pred WHERE t<2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-3	467	65575	4294967437	81.64	23.53	true	tom hernandez	2013-03-01 09:11:58.703188	32.85	study skills
+PREHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+124	336	65664	4294967435	74.72	42.47	true	bob davidson	2013-03-01 09:11:58.703302	45.4	yard duty
+PREHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+124	336	65664	4294967435	74.72	42.47	true	bob davidson	2013-03-01 09:11:58.703302	45.4	yard duty
+PREHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-8
+PREHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-8
+PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean)
+              Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: hash(t) (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(_col0)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+  WHERE t IS NOT NULL
+  AND t < 0
+  AND t > -2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            filterExpr: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean)
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean)
+              Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: hash(t) (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(_col0)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-1	bob laertes
+-1	bob young
+PREHOOK: query: SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-1	bob laertes
+-1	bob young
+PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((t = -1) and s is not null) and (s like 'bob%')) (type: boolean)
+              Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: -1 (type: tinyint), s (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE t <=> -1
+  AND s IS NOT NULL
+  AND s LIKE 'bob%'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            filterExpr: (((t = -1) and s is not null) and (s like 'bob%')) (type: boolean)
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((t = -1) and s is not null) and (s like 'bob%')) (type: boolean)
+              Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: -1 (type: tinyint), s (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+26	bob ovid
+26	bob quirinius
+27	bob ovid
+PREHOOK: query: SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+26	bob ovid
+26	bob quirinius
+27	bob ovid
+PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((s is not null and (s like 'bob%')) and (not (t) IN (-1, -2, -3))) and t BETWEEN 25 AND 30) (type: boolean)
+              Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: t (type: tinyint), s (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: tinyint), _col1 (type: string)
+                  sort order: ++
+                  Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+  WHERE s IS NOT NULL
+  AND s LIKE 'bob%'
+  AND t NOT IN (-1,-2,-3)
+  AND t BETWEEN 25 AND 30
+  SORT BY t,s
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            filterExpr: (((s is not null and (s like 'bob%')) and (not (t) IN (-1, -2, -3))) and t BETWEEN 25 AND 30) (type: boolean)
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((s is not null and (s like 'bob%')) and (not (t) IN (-1, -2, -3))) and t BETWEEN 25 AND 30) (type: boolean)
+              Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: t (type: tinyint), s (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: tinyint), _col1 (type: string)
+                  sort order: ++
+                  Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+101	327	11.48	gabriella ellison
+15	334	11.12	jessica robinson
+7	320	11.54	bob ellison
+PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+101	327	11.48	gabriella ellison
+15	334	11.12	jessica robinson
+7	320	11.54	bob ellison
+PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+              Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col3 (type: string)
+                  sort order: -
+                  Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 3
+            Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 3
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  ORDER BY s DESC
+  LIMIT 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+              Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col3 (type: string)
+                  sort order: -
+                  Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 3
+            Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 3
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+15	334	11.12	jessica robinson
+PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+15	334	11.12	jessica robinson
+PREHOOK: query: SELECT f, i, b FROM tbl_pred
+  WHERE f IS NOT NULL
+  AND f < 123.2
+  AND f > 1.92
+  AND f >= 9.99
+  AND f BETWEEN 1.92 AND 123.2
+  AND i IS NOT NULL
+  AND i < 67627
+  AND i > 60627
+  AND i >= 60626
+  AND i BETWEEN 60626 AND 67627
+  AND b IS NOT NULL
+  AND b < 4294967861
+  AND b > 4294967261
+  AND b >= 4294967260
+  AND b BETWEEN 4294967261 AND 4294967861
+  SORT BY f DESC
+  LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f, i, b FROM tbl_pred
+  WHERE f IS NOT NULL
+  AND f < 123.2
+  AND f > 1.92
+  AND f >= 9.99
+  AND f BETWEEN 1.92 AND 123.2
+  AND i IS NOT NULL
+  AND i < 67627
+  AND i > 60627
+  AND i >= 60626
+  AND i BETWEEN 60626 AND 67627
+  AND b IS NOT NULL
+  AND b < 4294967861
+  AND b > 4294967261
+  AND b >= 4294967260
+  AND b BETWEEN 4294967261 AND 4294967861
+  SORT BY f DESC
+  LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+99.68	65658	4294967503
+99.91	65763	4294967324
+99.92	65661	4294967404
+PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((((((((t > 10) and (t <> 101)) and (d >= 10.0)) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col3 (type: string)
+                  sort order: -
+                  Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 3
+            Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col3 (type: string)
+              sort order: -
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 3
+            Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 3
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+  WHERE t > 10
+  AND t <> 101
+  AND d >= ROUND(9.99)
+  AND d < 12
+  AND t IS NOT NULL
+  AND s LIKE '%son'
+  AND s NOT LIKE '%car%'
+  AND t > 0
+  AND si BETWEEN 300 AND 400
+  SORT BY s DESC
+  LIMIT 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            filterExpr: ((((((((t > 10) and (t <> 101)) and (d >= 10.0)) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((((((((t > 10) and (t <> 101)) and (d >= 10.0)) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col3 (type: string)
+                  sort order: -
+                  Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 3
+            Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col3 (type: string)
+              sort order: -
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 3
+            Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 3
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN SELECT f, i, b FROM tbl_pred
+  WHERE f IS NOT NULL
+  AND f < 123.2
+  AND f > 1.92
+  AND f >= 9.99
+  AND f BETWEEN 1.92 AND 123.2
+  AND i IS NOT NULL
+  AND i < 67627
+  AND i > 60627
+  AND i >= 60626
+  AND i BETWEEN 60626 AND 67627
+  AND b IS NOT NULL
+  AND b < 4294967861
+  AND b > 4294967261
+  AND b >= 4294967260
+  AND b BETWEEN 4294967261 AND 4294967861
+  SORT BY f DESC
+  LIMIT 3
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT f, i, b FROM tbl_pred
+  WHERE f IS NOT NULL
+  AND f < 123.2
+  AND f > 1.92
+  AND f >= 9.99
+  AND f BETWEEN 1.92 AND 123.2
+  AND i IS NOT NULL
+  AND i < 67627
+  AND i > 60627
+  AND i >= 60626
+  AND i BETWEEN 60626 AND 67627
+  AND b IS NOT NULL
+  AND b < 4294967861
+  AND b > 4294967261
+  AND b >= 4294967260
+  AND b BETWEEN 4294967261 AND 4294967861
+  SORT BY f DESC
+  LIMIT 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl_pred
+            filterExpr: ((((((((((((f < 123.2) and (f > 1.92)) and (f >= 9.99)) and f BETWEEN 1.92 AND 123.2) and (i < 67627)) and (i > 60627)) and (i >= 60626)) and i BETWEEN 60626 AND 67627) and (b < 4294967861)) and (b > 4294967261)) and (b >= 4294967260)) and b BETWEEN 4294967261 AND 4294967861) (type: boolean)
+            Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((((((((((((f < 123.2) and (f > 1.92)) and (f >= 9.99)) and f BETWEEN 1.92 AND 123.2) and (i < 67627)) and (i > 60627)) and (i >= 60626)) and i BETWEEN 60626 AND 67627) and (b < 4294967861)) and (b > 4294967261)) and (b >= 4294967260)) and b BETWEEN 4294967261 AND 4294967861) (type: boolean)
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: f (type: float), i (type: int), b (type: bigint)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: float)
+                  sort order: -
+                  Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: int), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 3
+            Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: float)
+              sort order: -
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: int), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 3
+            Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 3
+      Processor Tree:
+        ListSink
+

[40/50] [abbrv] hive git commit: HIVE-11657 : HIVE-2573 introduces some issues during metastore init (and CLI init) (Sergey Shelukhin, reviewed by Sushanth Sowmyan)

Posted by xu...@apache.org.

HIVE-11657 : HIVE-2573 introduces some issues during metastore init (and CLI init) (Sergey Shelukhin, reviewed by Sushanth Sowmyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/730a4040
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/730a4040
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/730a4040

Branch: refs/heads/beeline-cli
Commit: 730a40401c81e18fe773ac41012125184f776a04
Parents: bb4f5e7
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Sep 3 19:57:40 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Sep 3 19:57:40 2015 -0700

----------------------------------------------------------------------
 .../hive/metastore/RetryingMetaStoreClient.java | 32 +++++++----
 .../hadoop/hive/ql/exec/FunctionTask.java       |  2 +-
 .../apache/hadoop/hive/ql/metadata/Hive.java    | 56 ++++++++++++++++----
 3 files changed, 70 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/730a4040/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
index e282981..5087098 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
@@ -161,16 +161,27 @@ public class RetryingMetaStoreClient implements InvocationHandler {
       } catch (UndeclaredThrowableException e) {
         throw e.getCause();
       } catch (InvocationTargetException e) {
-        if ((e.getCause() instanceof TApplicationException) ||
-            (e.getCause() instanceof TProtocolException) ||
-            (e.getCause() instanceof TTransportException)) {
-          caughtException = (TException) e.getCause();
-        } else if ((e.getCause() instanceof MetaException) &&
-            e.getCause().getMessage().matches
-            ("(?s).*(JDO[a-zA-Z]*|TApplication|TProtocol|TTransport)Exception.*")) {
-          caughtException = (MetaException) e.getCause();
+        Throwable t = e.getCause();
+        if (t instanceof TApplicationException) {
+          TApplicationException tae = (TApplicationException)t;
+          switch (tae.getType()) {
+          case TApplicationException.UNSUPPORTED_CLIENT_TYPE:
+          case TApplicationException.UNKNOWN_METHOD:
+          case TApplicationException.WRONG_METHOD_NAME:
+          case TApplicationException.INVALID_PROTOCOL:
+            throw t;
+          default:
+            // TODO: most other options are probably unrecoverable... throw?
+            caughtException = tae;
+          }
+        } else if ((t instanceof TProtocolException) || (t instanceof TTransportException)) {
+          // TODO: most protocol exceptions are probably unrecoverable... throw?
+          caughtException = (TException)t;
+        } else if ((t instanceof MetaException) && t.getMessage().matches(
+            "(?s).*(JDO[a-zA-Z]*|TProtocol|TTransport)Exception.*")) {
+          caughtException = (MetaException)t;
         } else {
-          throw e.getCause();
+          throw t;
         }
       } catch (MetaException e) {
         if (e.getMessage().matches("(?s).*(IO|TTransport)Exception.*")) {
@@ -180,7 +191,8 @@ public class RetryingMetaStoreClient implements InvocationHandler {
         }
       }
 
-      if (retriesMade >=  retryLimit) {
+
+      if (retriesMade >= retryLimit) {
         throw caughtException;
       }
       retriesMade++;

http://git-wip-us.apache.org/repos/asf/hive/blob/730a4040/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java
index 6390740..7671d29 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java
@@ -98,7 +98,7 @@ public class FunctionTask extends Task<FunctionWork> {
 
     if (work.getReloadFunctionDesc() != null) {
       try {
-        Hive.reloadFunctions();
+        Hive.get().reloadFunctions();
       } catch (Exception e) {
         setException(e);
         LOG.error(stringifyException(e));

http://git-wip-us.apache.org/repos/asf/hive/blob/730a4040/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 396c070..c449aee 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -113,6 +113,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
 import static org.apache.hadoop.hive.serde.serdeConstants.COLLECTION_DELIM;
@@ -160,24 +161,61 @@ public class Hive {
     }
   };
 
+  // Note that while this is an improvement over static initialization, it is still not,
+  // technically, valid, cause nothing prevents us from connecting to several metastores in
+  // the same process. This will still only get the functions from the first metastore.
+  private final static AtomicInteger didRegisterAllFuncs = new AtomicInteger(0);
+  private final static int REG_FUNCS_NO = 0, REG_FUNCS_DONE = 2, REG_FUNCS_PENDING = 1;
+
   // register all permanent functions. need improvement
-  static {
+  private void registerAllFunctionsOnce() {
+    boolean breakLoop = false;
+    while (!breakLoop) {
+      int val = didRegisterAllFuncs.get();
+      switch (val) {
+      case REG_FUNCS_NO: {
+        if (didRegisterAllFuncs.compareAndSet(val, REG_FUNCS_PENDING)) {
+          breakLoop = true;
+          break;
+        }
+        continue;
+      }
+      case REG_FUNCS_PENDING: {
+        synchronized (didRegisterAllFuncs) {
+          try {
+            didRegisterAllFuncs.wait(100);
+          } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+            return;
+          }
+        }
+        continue;
+      }
+      case REG_FUNCS_DONE: return;
+      default: throw new AssertionError(val);
+      }
+    }
     try {
       reloadFunctions();
     } catch (Exception e) {
-      LOG.warn("Failed to access metastore. This class should not accessed in runtime.",e);
+      LOG.warn("Failed to register all functions.", e);
+    } finally {
+      boolean result = didRegisterAllFuncs.compareAndSet(REG_FUNCS_PENDING, REG_FUNCS_DONE);
+      assert result;
+      synchronized (didRegisterAllFuncs) {
+        didRegisterAllFuncs.notifyAll();
+      }
     }
   }
 
-  public static void reloadFunctions() throws HiveException {
-    Hive db = Hive.get();
-    for (Function function : db.getAllFunctions()) {
+  public void reloadFunctions() throws HiveException {
+    for (Function function : getAllFunctions()) {
       String functionName = function.getFunctionName();
       try {
         LOG.info("Registering function " + functionName + " " + function.getClassName());
-        FunctionRegistry.registerPermanentFunction(
-                FunctionUtils.qualifyFunctionName(functionName, function.getDbName()), function.getClassName(),
-                false, FunctionTask.toFunctionResource(function.getResourceUris()));
+        FunctionRegistry.registerPermanentFunction(FunctionUtils.qualifyFunctionName(
+                    functionName, function.getDbName()), function.getClassName(), false,
+                    FunctionTask.toFunctionResource(function.getResourceUris()));
       } catch (Exception e) {
         LOG.warn("Failed to register persistent function " +
                 functionName + ":" + function.getClassName() + ". Ignore and continue.");
@@ -268,6 +306,7 @@ public class Hive {
    */
   private Hive(HiveConf c) throws HiveException {
     conf = c;
+    registerAllFunctionsOnce();
   }
 
 
@@ -2675,7 +2714,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
     }
 
     //Check if different encryption zones
-    HadoopShims.HdfsFileStatus destStatus = null;
     HadoopShims.HdfsEncryptionShim hdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim();
     return hdfsEncryptionShim != null && (hdfsEncryptionShim.isPathEncrypted(srcf) || hdfsEncryptionShim.isPathEncrypted(destf))
       && !hdfsEncryptionShim.arePathsOnSameEncryptionZone(srcf, destf);

[07/50] [abbrv] hive git commit: HIVE-11123 : Fix how to confirm the RDBMS product name at Metastore. (Shinichi Yamashita, reviewed by Sergey Shelukhin and Deepesh Khandelwal)

Posted by xu...@apache.org.

HIVE-11123 : Fix how to confirm the RDBMS product name at Metastore. (Shinichi Yamashita, reviewed by Sergey Shelukhin and Deepesh Khandelwal)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fb152e45
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fb152e45
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fb152e45

Branch: refs/heads/beeline-cli
Commit: fb152e45064fcb2846b198ba14e7f7cc13ada4bb
Parents: 9670a2b
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Aug 27 10:54:58 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Aug 27 10:54:58 2015 -0700

----------------------------------------------------------------------
 .../hive/metastore/MetaStoreDirectSql.java      | 40 +++++++-------------
 1 file changed, 13 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/fb152e45/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 5776ec6..522fcc2 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -148,16 +148,16 @@ class MetaStoreDirectSql {
 
   private DB determineDbType() {
     DB dbType = DB.OTHER;
-    if (runDbCheck("SET @@session.sql_mode=ANSI_QUOTES", "MySql")) {
-      dbType = DB.MYSQL;
-    } else if (runDbCheck("SELECT version FROM v$instance", "Oracle")) {
-      dbType = DB.ORACLE;
-    } else if (runDbCheck("SELECT @@version", "MSSQL")) {
-      dbType = DB.MSSQL;
-    } else {
-      // TODO: maybe we should use getProductName to identify all the DBs
-      String productName = getProductName();
-      if (productName != null && productName.toLowerCase().contains("derby")) {
+    String productName = getProductName();
+    if (productName != null) {
+      productName = productName.toLowerCase();
+      if (productName.contains("mysql")) {
+        dbType = DB.MYSQL;
+      } else if (productName.contains("oracle")) {
+        dbType = DB.ORACLE;
+      } else if (productName.contains("microsoft sql server")) {
+        dbType = DB.MSSQL;
+      } else if (productName.contains("derby")) {
         dbType = DB.DERBY;
       }
     }
@@ -210,6 +210,9 @@ class MetaStoreDirectSql {
 
   private boolean runTestQuery() {
     Transaction tx = pm.currentTransaction();
+    if (!tx.isActive()) {
+      tx.begin();
+    }
     Query query = null;
     // Run a self-test query. If it doesn't work, we will self-disable. What a PITA...
     String selfTestQuery = "select \"DB_ID\" from \"DBS\"";
@@ -261,23 +264,6 @@ class MetaStoreDirectSql {
     }
   }
 
-  private boolean runDbCheck(String queryText, String name) {
-    Transaction tx = pm.currentTransaction();
-    if (!tx.isActive()) {
-      tx.begin();
-    }
-    try {
-      executeNoResult(queryText);
-      return true;
-    } catch (Throwable t) {
-      LOG.debug(name + " check failed, assuming we are not on " + name + ": " + t.getMessage());
-      tx.rollback();
-      tx = pm.currentTransaction();
-      tx.begin();
-      return false;
-    }
-  }
-
   public Database getDatabase(String dbName) throws MetaException{
     Query queryDbSelector = null;
     Query queryDbParams = null;

[19/50] [abbrv] hive git commit: "Alter index rebuild" statements submitted through HiveServer2 fail when Sentry is enabled (Aihua Xu, reviewed by Chao Sun and Ashutosh Chauhan)

Posted by xu...@apache.org.

"Alter index rebuild" statements submitted through HiveServer2 fail when Sentry is enabled (Aihua Xu, reviewed by Chao Sun and Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dcf21cd6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dcf21cd6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dcf21cd6

Branch: refs/heads/beeline-cli
Commit: dcf21cd6fa98fb5db01ef661bb3b9f94d9ca2d15
Parents: af91308
Author: Aihua Xu <ai...@gmail.com>
Authored: Sat Aug 29 12:57:52 2015 -0700
Committer: Chao Sun <su...@apache.org>
Committed: Sat Aug 29 12:57:52 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/optimizer/IndexUtils.java    | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/dcf21cd6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
index 92cae67..0b30258 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.index.IndexWhereProcessor;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.session.SessionState;
 
 /**
  * Utility class for index support.
@@ -213,13 +214,17 @@ public final class IndexUtils {
     return hive.getIndexes(table.getTTable().getDbName(), table.getTTable().getTableName(), max);
   }
 
-  public static Task<?> createRootTask(HiveConf builderConf, Set<ReadEntity> inputs,
-      Set<WriteEntity> outputs, StringBuilder command,
+  public static Task<?> createRootTask(
+      HiveConf builderConf,
+      Set<ReadEntity> inputs,
+      Set<WriteEntity> outputs,
+      StringBuilder command,
       LinkedHashMap<String, String> partSpec,
-      String indexTableName, String dbName){
+      String indexTableName,
+      String dbName){
     // Don't try to index optimize the query to build the index
     HiveConf.setBoolVar(builderConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER, false);
-    Driver driver = new Driver(builderConf);
+    Driver driver = new Driver(builderConf, SessionState.get().getUserName());
     driver.compile(command.toString(), false);
 
     Task<?> rootTask = driver.getPlan().getRootTasks().get(0);

[33/50] [abbrv] hive git commit: HIVE-11604 : HIVE return wrong results in some queries with PTF function (Yongzhi Chen via Szehon)

Posted by xu...@apache.org.

HIVE-11604 : HIVE return wrong results in some queries with PTF function (Yongzhi Chen via Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/308ae90a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/308ae90a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/308ae90a

Branch: refs/heads/beeline-cli
Commit: 308ae90aa396a2d99660f6ccf931c031ce9aa8a1
Parents: 3ff3c6f
Author: Szehon Ho <sz...@cloudera.com>
Authored: Wed Sep 2 11:49:04 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Wed Sep 2 11:49:04 2015 -0700

----------------------------------------------------------------------
 .../ql/optimizer/IdentityProjectRemover.java    |  15 +
 .../queries/clientpositive/ptfgroupbyjoin.q     |  61 +++
 .../results/clientpositive/ptfgroupbyjoin.q.out | 519 +++++++++++++++++++
 .../clientpositive/tez/explainuser_1.q.out      |  31 +-
 4 files changed, 612 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
index e3d3ce6..135b90b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
@@ -30,8 +30,10 @@ import com.google.common.collect.Iterators;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.PTFOperator;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -111,6 +113,19 @@ public class IdentityProjectRemover implements Transform {
         // For RS-SEL-RS case. reducer operator in reducer task cannot be null in task compiler
         return null;
       }
+      List<Operator<? extends OperatorDesc>> ancestorList = new ArrayList<Operator<? extends OperatorDesc>>();
+      ancestorList.addAll(sel.getParentOperators());
+      while (!ancestorList.isEmpty()) {
+        Operator<? extends OperatorDesc> curParent = ancestorList.remove(0);
+            // PTF need a SelectOp.
+        if ((curParent instanceof PTFOperator)) {
+          return null;
+        }
+        if ((curParent instanceof FilterOperator) && curParent.getParentOperators() != null) {
+          ancestorList.addAll(curParent.getParentOperators());
+        }
+      }
+
       if(sel.isIdentitySelect()) {
         parent.removeChildAndAdoptItsChildren(sel);
         LOG.debug("Identity project remover optimization removed : " + sel);

http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q b/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q
new file mode 100644
index 0000000..61d034e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q
@@ -0,0 +1,61 @@
+create table tlb1 (id int, fkey int, val string);
+create table tlb2 (fid int, name string);
+insert into table tlb1 values(100,1,'abc');
+insert into table tlb1 values(200,1,'efg');
+insert into table tlb2 values(1, 'key1');
+
+explain 
+select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey, 
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd 
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey, 
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd 
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+explain
+select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+    select id, fkey,
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+    select id, fkey,
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+
+set hive.optimize.ppd=false;
+
+explain 
+select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey,
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey, 
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd 
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out b/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out
new file mode 100644
index 0000000..9368df9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out
@@ -0,0 +1,519 @@
+PREHOOK: query: create table tlb1 (id int, fkey int, val string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tlb1
+POSTHOOK: query: create table tlb1 (id int, fkey int, val string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tlb1
+PREHOOK: query: create table tlb2 (fid int, name string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tlb2
+POSTHOOK: query: create table tlb2 (fid int, name string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tlb2
+PREHOOK: query: insert into table tlb1 values(100,1,'abc')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@tlb1
+POSTHOOK: query: insert into table tlb1 values(100,1,'abc')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@tlb1
+POSTHOOK: Lineage: tlb1.fkey EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: tlb1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tlb1.val SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: insert into table tlb1 values(200,1,'efg')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@tlb1
+POSTHOOK: query: insert into table tlb1 values(200,1,'efg')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@tlb1
+POSTHOOK: Lineage: tlb1.fkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: tlb1.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tlb1.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: insert into table tlb2 values(1, 'key1')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@tlb2
+POSTHOOK: query: insert into table tlb2 values(1, 'key1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@tlb2
+POSTHOOK: Lineage: tlb2.fid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tlb2.name SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain 
+select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey, 
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd 
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey, 
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd 
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tlb1
+            Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: id (type: int), fkey (type: int)
+              outputColumnNames: id, fkey
+              Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: id (type: int), fkey (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                  Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: int, _col1: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col0, _col1
+                  partition by: _col0, _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: row_number_window_0
+                        name: row_number
+                        window function: GenericUDAFRowNumberEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: _col1 is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: int)
+              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
+          TableScan
+            alias: aaa
+            Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: fid is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: fid (type: int)
+                sort order: +
+                Map-reduce partition columns: fid (type: int)
+                Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+                value expressions: name (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: int)
+            1 fid (type: int)
+          outputColumnNames: _col0, _col1, _col4
+          Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey, 
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd 
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tlb1
+PREHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey, 
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd 
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tlb1
+POSTHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+200	1	key1
+100	1	key1
+PREHOOK: query: explain
+select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+    select id, fkey,
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+    select id, fkey,
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tlb1
+            Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: id (type: int), fkey (type: int)
+              outputColumnNames: id, fkey
+              Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: id (type: int), fkey (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                  Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: int, _col1: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col0, _col1
+                  partition by: _col0, _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: row_number_window_0
+                        name: row_number
+                        window function: GenericUDAFRowNumberEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: _col1 is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: int), row_number_window_0 (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: int)
+              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int), _col2 (type: int)
+          TableScan
+            alias: aaa
+            Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: fid is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: fid (type: int)
+                sort order: +
+                Map-reduce partition columns: fid (type: int)
+                Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+                value expressions: name (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: int)
+            1 fid (type: int)
+          outputColumnNames: _col0, _col1, _col2, _col4
+          Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string), _col2 (type: int)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+    select id, fkey,
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tlb1
+PREHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+    select id, fkey,
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tlb1
+POSTHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+200	1	key1	1
+100	1	key1	1
+PREHOOK: query: explain 
+select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey,
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey,
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tlb1
+            Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: id (type: int), fkey (type: int)
+              outputColumnNames: id, fkey
+              Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: id (type: int), fkey (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                  Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: int, _col1: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col0, _col1
+                  partition by: _col0, _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: row_number_window_0
+                        name: row_number
+                        window function: GenericUDAFRowNumberEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              Filter Operator
+                predicate: _col1 is not null (type: boolean)
+                Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: int)
+              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
+          TableScan
+            alias: aaa
+            Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: fid is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: fid (type: int)
+                sort order: +
+                Map-reduce partition columns: fid (type: int)
+                Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+                value expressions: name (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: int)
+            1 fid (type: int)
+          outputColumnNames: _col0, _col1, _col4
+          Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey, 
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd 
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tlb1
+PREHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name
+from (
+    select id, fkey, 
+    row_number() over (partition by id, fkey) as rnum
+    from tlb1 group by id, fkey
+ ) ddd 
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tlb1
+POSTHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+200	1	key1
+100	1	key1

http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index 9756b0c..4d797f2 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -6967,22 +6967,25 @@ Stage-0
                               Map-reduce partition columns:rand() (type: double)
                               sort order:+++
                               Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
-                              PTF Operator [PTF_3]
-                                 Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}]
+                              Select Operator [SEL_4]
+                                 outputColumnNames:["_col1","_col2","_col5"]
                                  Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
-                                 Select Operator [SEL_2]
-                                 |  outputColumnNames:["_col1","_col2","_col5"]
-                                 |  Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
-                                 |<-Map 1 [SIMPLE_EDGE]
-                                    Reduce Output Operator [RS_1]
-                                       key expressions:p_mfgr (type: string), p_name (type: string)
-                                       Map-reduce partition columns:p_mfgr (type: string)
-                                       sort order:++
-                                       Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
-                                       value expressions:p_size (type: int)
-                                       TableScan [TS_0]
-                                          alias:part
+                                 PTF Operator [PTF_3]
+                                    Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}]
+                                    Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+                                    Select Operator [SEL_2]
+                                    |  outputColumnNames:["_col1","_col2","_col5"]
+                                    |  Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+                                    |<-Map 1 [SIMPLE_EDGE]
+                                       Reduce Output Operator [RS_1]
+                                          key expressions:p_mfgr (type: string), p_name (type: string)
+                                          Map-reduce partition columns:p_mfgr (type: string)
+                                          sort order:++
                                           Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+                                          value expressions:p_size (type: int)
+                                          TableScan [TS_0]
+                                             alias:part
+                                             Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
 
 PREHOOK: query: explain
 select abc.*

[13/50] [abbrv] hive git commit: Revert "HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)"

Posted by xu...@apache.org.

Revert "HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)"

This reverts commit b6d1143aa7aaa20de035898f34df2d6b581895b6.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ef40ca6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ef40ca6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ef40ca6

Branch: refs/heads/beeline-cli
Commit: 2ef40ca66ab0b9fbcf9bca5e6b8c5d7bd6d580c6
Parents: ce25816
Author: Gopal V <go...@apache.org>
Authored: Fri Aug 28 02:43:44 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Fri Aug 28 02:43:44 2015 -0700

----------------------------------------------------------------------
 .../optimizer/DynamicPartitionPruningOptimization.java  | 12 ------------
 1 file changed, 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2ef40ca6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index 5ebd28a..f475926 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -189,18 +189,6 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
       LOG.debug("TableScan: " + ts);
     }
 
-    if (ts == null) {
-      // could be a reduce sink
-      LOG.warn("Could not find the table scan for " + filter); 
-      return null;
-    } else {
-      Table table = ts.getConf().getTableMetadata();
-      if (table != null && !table.isPartitioned()) {
-        // table is not partitioned, skip optimizer
-        return null;
-      }
-    }
-
     // collect the dynamic pruning conditions
     removerContext.dynLists.clear();
     walkExprTree(desc.getPredicate(), removerContext);

[22/50] [abbrv] hive git commit: HIVE-11701 : Make tez tests AM logs work with new log4j2 changes (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

Posted by xu...@apache.org.

HIVE-11701 : Make tez tests AM logs work with new log4j2 changes (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a338f33c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a338f33c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a338f33c

Branch: refs/heads/beeline-cli
Commit: a338f33cc2b17d90d391466090839f76bebc1163
Parents: 78e7015
Author: Sergey Shelukhin <se...@apache.org>
Authored: Mon Aug 31 15:54:08 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Mon Aug 31 15:54:08 2015 -0700

----------------------------------------------------------------------
 data/conf/tez/hive-site.xml | 5 +++++
 1 file changed, 5 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a338f33c/data/conf/tez/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
index 2f9415a..b4abe90 100644
--- a/data/conf/tez/hive-site.xml
+++ b/data/conf/tez/hive-site.xml
@@ -258,4 +258,9 @@
   <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
 </property>
 
+<property>
+  <name>tez.am.launch.cmd-opts</name>
+  <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
+</property>
+
 </configuration>

[27/50] [abbrv] hive git commit: HIVE-11536 follow up, seems earlier commit had strange line endings

Posted by xu...@apache.org.

HIVE-11536 follow up, seems earlier commit had strange line endings


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c0690a69
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c0690a69
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c0690a69

Branch: refs/heads/beeline-cli
Commit: c0690a69dcd4976435f8b33084f9b9b3c0c16889
Parents: b22e54e
Author: Alan Gates <ga...@hortonworks.com>
Authored: Tue Sep 1 09:57:24 2015 -0700
Committer: Alan Gates <ga...@hortonworks.com>
Committed: Tue Sep 1 09:57:24 2015 -0700

----------------------------------------------------------------------
 .../src/test/queries/db/rowtype_attribute.sql   |  42 ++++----
 hplsql/src/test/queries/db/type_attribute.sql   |  14 +--
 .../local/create_procedure_no_params.sql        |  36 +++----
 .../test/queries/offline/create_table_ora.sql   | 104 +++++++++----------
 4 files changed, 98 insertions(+), 98 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/db/rowtype_attribute.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/rowtype_attribute.sql b/hplsql/src/test/queries/db/rowtype_attribute.sql
index 6a84b57..2fc65ce 100644
--- a/hplsql/src/test/queries/db/rowtype_attribute.sql
+++ b/hplsql/src/test/queries/db/rowtype_attribute.sql
@@ -1,22 +1,22 @@
-DECLARE
-  v1 default.src%ROWTYPE;
-  v2 src %ROWTYPE;
-  v3 src % ROWTYPE;  
-  CURSOR c1 IS SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1;
-BEGIN
-  SELECT 'A' AS key, 'B' AS value INTO v1 FROM src LIMIT 1;
-  PRINT v1.key || v1.value;
-  
-  OPEN c1;
-  FETCH c1 INTO v2;
-  PRINT v2.key || v2.value;
-  CLOSE c1;
-  
-  FOR rec IN (SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1)
-  LOOP
-    PRINT rec.key || rec.value;
-  END LOOP; 
-  
-  EXECUTE IMMEDIATE 'SELECT ''A'' AS key, ''B'' AS value FROM src LIMIT 1' INTO v3;
-  PRINT v3.key || v3.value; 
+DECLARE
+  v1 default.src%ROWTYPE;
+  v2 src %ROWTYPE;
+  v3 src % ROWTYPE;  
+  CURSOR c1 IS SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1;
+BEGIN
+  SELECT 'A' AS key, 'B' AS value INTO v1 FROM src LIMIT 1;
+  PRINT v1.key || v1.value;
+  
+  OPEN c1;
+  FETCH c1 INTO v2;
+  PRINT v2.key || v2.value;
+  CLOSE c1;
+  
+  FOR rec IN (SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1)
+  LOOP
+    PRINT rec.key || rec.value;
+  END LOOP; 
+  
+  EXECUTE IMMEDIATE 'SELECT ''A'' AS key, ''B'' AS value FROM src LIMIT 1' INTO v3;
+  PRINT v3.key || v3.value; 
 END
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/db/type_attribute.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/type_attribute.sql b/hplsql/src/test/queries/db/type_attribute.sql
index 2d93bfd..059c291 100644
--- a/hplsql/src/test/queries/db/type_attribute.sql
+++ b/hplsql/src/test/queries/db/type_attribute.sql
@@ -1,8 +1,8 @@
-DECLARE
-  v1 default.src.key%TYPE;
-  v2 src.Key %TYPE;
-  v3 src.key3 % TYPE;
-BEGIN
-  SELECT 'A', 'B', 1 INTO v1, v2, v3 FROM src LIMIT 1;
-  PRINT v1 || v2 || v3;
+DECLARE
+  v1 default.src.key%TYPE;
+  v2 src.Key %TYPE;
+  v3 src.key3 % TYPE;
+BEGIN
+  SELECT 'A', 'B', 1 INTO v1, v2, v3 FROM src LIMIT 1;
+  PRINT v1 || v2 || v3;
 END
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/local/create_procedure_no_params.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/local/create_procedure_no_params.sql b/hplsql/src/test/queries/local/create_procedure_no_params.sql
index 535ba98..c4c2992 100644
--- a/hplsql/src/test/queries/local/create_procedure_no_params.sql
+++ b/hplsql/src/test/queries/local/create_procedure_no_params.sql
@@ -1,19 +1,19 @@
-create procedure sp1
-begin
-  print 'a';
-end;
-
-create procedure sp2()
-begin
-  print 'b';
-end;
-
-call sp1;
-call sp1();
-sp1;
-sp1();
-
-call sp2;
-call sp2();
-sp2;
+create procedure sp1
+begin
+  print 'a';
+end;
+
+create procedure sp2()
+begin
+  print 'b';
+end;
+
+call sp1;
+call sp1();
+sp1;
+sp1();
+
+call sp2;
+call sp2();
+sp2;
 sp2();
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/offline/create_table_ora.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/offline/create_table_ora.sql b/hplsql/src/test/queries/offline/create_table_ora.sql
index bb1d9c7..40a543a 100644
--- a/hplsql/src/test/queries/offline/create_table_ora.sql
+++ b/hplsql/src/test/queries/offline/create_table_ora.sql
@@ -1,53 +1,53 @@
-CREATE TABLE ora_t1 (
-  n1   NUMBER(3,0),
-  v1   VARCHAR2(10)
-);
-
-CREATE TABLE "USER"."EMP" 
-   (    "EMPNO" NUMBER(4,0), 
-        "ENAME" VARCHAR2(10 BYTE), 
-        "JOB" VARCHAR2(9 BYTE), 
-        "MGR" NUMBER(4,0), 
-        "HIREDATE" DATE, 
-        "SAL" NUMBER(7,2), 
-        "COMM" NUMBER(7,2), 
-        "DEPTNO" NUMBER(2,0)
-   ) SEGMENT CREATION IMMEDIATE 
-  PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255 NOCOMPRESS LOGGING
-  STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
-  PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
-  TABLESPACE "USERS" ;
-  
-CREATE TABLE language (
-  id              NUMBER(7)     NOT NULL PRIMARY KEY,
-  cd              CHAR(2)       NOT NULL,
-  description     VARCHAR2(50)
-);
-CREATE TABLE author (
-  id              NUMBER(7)     NOT NULL PRIMARY KEY,
-  first_name      VARCHAR2(50),
-  last_name       VARCHAR2(50)  NOT NULL,
-  date_of_birth   DATE,
-  year_of_birth   NUMBER(7),
-  distinguished   NUMBER(1)
-);
-CREATE TABLE book (
-  id              NUMBER(7)     NOT NULL PRIMARY KEY,
-  author_id       NUMBER(7)     NOT NULL,
-  title           VARCHAR2(400) NOT NULL,
-  published_in    NUMBER(7)     NOT NULL,
-  language_id     NUMBER(7)     NOT NULL,
-  CONSTRAINT fk_book_author     FOREIGN KEY (author_id)   REFERENCES author(id),
-  CONSTRAINT fk_book_language   FOREIGN KEY (language_id) REFERENCES language(id)
-);
-CREATE TABLE book_store (
-  name            VARCHAR2(400) NOT NULL UNIQUE
-);
-CREATE TABLE book_to_book_store (
-  name            VARCHAR2(400) NOT NULL,
-  book_id         INTEGER       NOT NULL,
-  stock           INTEGER,
-  PRIMARY KEY(name, book_id),
-  CONSTRAINT fk_b2bs_book_store FOREIGN KEY (name)        REFERENCES book_store (name) ON DELETE CASCADE,
-  CONSTRAINT fk_b2bs_book       FOREIGN KEY (book_id)     REFERENCES book (id)         ON DELETE CASCADE
+CREATE TABLE ora_t1 (
+  n1   NUMBER(3,0),
+  v1   VARCHAR2(10)
+);
+
+CREATE TABLE "USER"."EMP" 
+   (    "EMPNO" NUMBER(4,0), 
+        "ENAME" VARCHAR2(10 BYTE), 
+        "JOB" VARCHAR2(9 BYTE), 
+        "MGR" NUMBER(4,0), 
+        "HIREDATE" DATE, 
+        "SAL" NUMBER(7,2), 
+        "COMM" NUMBER(7,2), 
+        "DEPTNO" NUMBER(2,0)
+   ) SEGMENT CREATION IMMEDIATE 
+  PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255 NOCOMPRESS LOGGING
+  STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
+  PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
+  TABLESPACE "USERS" ;
+  
+CREATE TABLE language (
+  id              NUMBER(7)     NOT NULL PRIMARY KEY,
+  cd              CHAR(2)       NOT NULL,
+  description     VARCHAR2(50)
+);
+CREATE TABLE author (
+  id              NUMBER(7)     NOT NULL PRIMARY KEY,
+  first_name      VARCHAR2(50),
+  last_name       VARCHAR2(50)  NOT NULL,
+  date_of_birth   DATE,
+  year_of_birth   NUMBER(7),
+  distinguished   NUMBER(1)
+);
+CREATE TABLE book (
+  id              NUMBER(7)     NOT NULL PRIMARY KEY,
+  author_id       NUMBER(7)     NOT NULL,
+  title           VARCHAR2(400) NOT NULL,
+  published_in    NUMBER(7)     NOT NULL,
+  language_id     NUMBER(7)     NOT NULL,
+  CONSTRAINT fk_book_author     FOREIGN KEY (author_id)   REFERENCES author(id),
+  CONSTRAINT fk_book_language   FOREIGN KEY (language_id) REFERENCES language(id)
+);
+CREATE TABLE book_store (
+  name            VARCHAR2(400) NOT NULL UNIQUE
+);
+CREATE TABLE book_to_book_store (
+  name            VARCHAR2(400) NOT NULL,
+  book_id         INTEGER       NOT NULL,
+  stock           INTEGER,
+  PRIMARY KEY(name, book_id),
+  CONSTRAINT fk_b2bs_book_store FOREIGN KEY (name)        REFERENCES book_store (name) ON DELETE CASCADE,
+  CONSTRAINT fk_b2bs_book       FOREIGN KEY (book_id)     REFERENCES book (id)         ON DELETE CASCADE
 );
\ No newline at end of file

[03/50] [abbrv] hive git commit: HIVE-11638: ExprNodeDesc hashMap accidentally degrades into O(N) instead of O(1) (Gopal V, reviewed by Jesus Camacho Rodriguez)

Posted by xu...@apache.org.

HIVE-11638: ExprNodeDesc hashMap accidentally degrades into O(N) instead of O(1) (Gopal V, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/037fb02a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/037fb02a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/037fb02a

Branch: refs/heads/beeline-cli
Commit: 037fb02a8edc1266d0beb02eee1fb90737fc1ef7
Parents: bb7153f
Author: Gopal V <go...@apache.org>
Authored: Thu Aug 27 02:27:35 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Aug 27 02:27:35 2015 +0200

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java   | 4 ++--
 .../java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java   | 8 ++++++++
 .../java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java  | 9 +++++----
 3 files changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
index 366b74b..6a81170 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
@@ -19,7 +19,7 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -53,7 +53,7 @@ public class LeadLagInfo {
   public void addLLFuncExprForTopExpr(ExprNodeDesc topExpr, ExprNodeGenericFuncDesc llFuncExpr) {
     addLeadLagExpr(llFuncExpr);
     mapTopExprToLLFunExprs = mapTopExprToLLFunExprs == null ?
-        new HashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
+        new IdentityHashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
     List<ExprNodeGenericFuncDesc> funcList = mapTopExprToLLFunExprs.get(topExpr);
     if (funcList == null) {
       funcList = new ArrayList<ExprNodeGenericFuncDesc>();

http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
index 15267b9..328bd86 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
@@ -60,6 +60,14 @@ public abstract class ExprNodeDesc implements Serializable, Node {
     return typeInfo.hashCode();
   }
 
+  @Override
+  public final boolean equals(Object o) {
+    // prevent equals from being overridden in sub-classes
+    // always use ExprNodeDescEqualityWrapper
+    // if you need any other equality than Object.equals()
+    return (o == this);
+  }
+
   public TypeInfo getTypeInfo() {
     return typeInfo;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
index 33ad3e8..f23facf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.ppd;
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -98,17 +99,17 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
   public ExprWalkerInfo() {
     pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
     nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
-    exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
-    newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
+    exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
+    newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
   }
 
   public ExprWalkerInfo(Operator<? extends OperatorDesc> op) {
     this.op = op;
 
     pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
-    exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
+    exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
     nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
-    newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
+    newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
   }
 
   /**

[37/50] [abbrv] hive git commit: HIVE-11383: Upgrade Hive to Calcite 1.4 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Posted by xu...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index 4d797f2..7595c3e 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -2713,62 +2713,62 @@ Stage-0
       limit:-1
       Stage-1
          Reducer 6
-         File Output Operator [FS_50]
+         File Output Operator [FS_53]
             compressed:false
             Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
             table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
-            Select Operator [SEL_49]
+            Select Operator [SEL_52]
             |  outputColumnNames:["_col0","_col1","_col2"]
             |  Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
             |<-Reducer 5 [SIMPLE_EDGE]
-               Reduce Output Operator [RS_48]
+               Reduce Output Operator [RS_51]
                   key expressions:_col1 (type: bigint), _col0 (type: string)
                   sort order:++
                   Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
                   value expressions:_col2 (type: bigint)
-                  Group By Operator [GBY_46]
+                  Group By Operator [GBY_49]
                   |  aggregations:["count(VALUE._col0)"]
                   |  keys:KEY._col0 (type: string), KEY._col1 (type: bigint)
                   |  outputColumnNames:["_col0","_col1","_col2"]
                   |  Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
                   |<-Reducer 4 [SIMPLE_EDGE]
-                     Reduce Output Operator [RS_45]
+                     Reduce Output Operator [RS_48]
                         key expressions:_col0 (type: string), _col1 (type: bigint)
                         Map-reduce partition columns:_col0 (type: string), _col1 (type: bigint)
                         sort order:++
                         Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions:_col2 (type: bigint)
-                        Group By Operator [GBY_44]
+                        Group By Operator [GBY_47]
                            aggregations:["count()"]
                            keys:_col0 (type: string), _col1 (type: bigint)
                            outputColumnNames:["_col0","_col1","_col2"]
                            Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
-                           Merge Join Operator [MERGEJOIN_63]
+                           Merge Join Operator [MERGEJOIN_66]
                            |  condition map:[{"":"Left Semi Join 0 to 1"},{"":"Left Semi Join 0 to 2"}]
                            |  keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"}
                            |  outputColumnNames:["_col0","_col1"]
                            |  Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
                            |<-Map 10 [SIMPLE_EDGE]
-                           |  Reduce Output Operator [RS_41]
+                           |  Reduce Output Operator [RS_44]
                            |     key expressions:_col0 (type: string)
                            |     Map-reduce partition columns:_col0 (type: string)
                            |     sort order:+
-                           |     Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
-                           |     Group By Operator [GBY_35]
+                           |     Statistics:Num rows: 3 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
+                           |     Group By Operator [GBY_38]
                            |        keys:_col0 (type: string)
                            |        outputColumnNames:["_col0"]
-                           |        Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
-                           |        Select Operator [SEL_31]
+                           |        Statistics:Num rows: 3 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
+                           |        Select Operator [SEL_34]
                            |           outputColumnNames:["_col0"]
-                           |           Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
-                           |           Filter Operator [FIL_61]
-                           |              predicate:key is not null (type: boolean)
-                           |              Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
-                           |              TableScan [TS_30]
+                           |           Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
+                           |           Filter Operator [FIL_64]
+                           |              predicate:(UDFToDouble(key) > 0.0) (type: boolean)
+                           |              Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
+                           |              TableScan [TS_32]
                            |                 alias:cbo_t3
                            |                 Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE
                            |<-Reducer 3 [SIMPLE_EDGE]
-                           |  Reduce Output Operator [RS_37]
+                           |  Reduce Output Operator [RS_40]
                            |     key expressions:_col0 (type: string)
                            |     Map-reduce partition columns:_col0 (type: string)
                            |     sort order:+
@@ -2783,16 +2783,16 @@ Stage-0
                            |           sort order:-+
                            |           Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE
                            |           value expressions:_col0 (type: string)
-                           |           Filter Operator [FIL_57]
+                           |           Filter Operator [FIL_60]
                            |              predicate:(((_col1 + 1) >= 0) and ((_col1 > 0) or (UDFToDouble(_col0) >= 0.0))) (type: boolean)
                            |              Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE
                            |              Select Operator [SEL_9]
                            |                 outputColumnNames:["_col0","_col1","_col2","_col3"]
                            |                 Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE
-                           |                 Filter Operator [FIL_58]
+                           |                 Filter Operator [FIL_61]
                            |                    predicate:(((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0) and ((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1))) (type: boolean)
                            |                    Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE
-                           |                    Select Operator [SEL_62]
+                           |                    Select Operator [SEL_65]
                            |                       outputColumnNames:["_col1","_col2","_col3"]
                            |                       Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE
                            |                       Group By Operator [GBY_8]
@@ -2815,19 +2815,19 @@ Stage-0
                            |                                Select Operator [SEL_2]
                            |                                   outputColumnNames:["_col0","_col1","_col2"]
                            |                                   Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
-                           |                                   Filter Operator [FIL_59]
+                           |                                   Filter Operator [FIL_62]
                            |                                      predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean)
                            |                                      Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
                            |                                      TableScan [TS_0]
                            |                                         alias:cbo_t1
                            |                                         Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE
                            |<-Reducer 9 [SIMPLE_EDGE]
-                              Reduce Output Operator [RS_39]
+                              Reduce Output Operator [RS_42]
                                  key expressions:_col0 (type: string)
                                  Map-reduce partition columns:_col0 (type: string)
                                  sort order:+
                                  Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE
-                                 Group By Operator [GBY_33]
+                                 Group By Operator [GBY_36]
                                     keys:_col0 (type: string)
                                     outputColumnNames:["_col0"]
                                     Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE
@@ -2862,8 +2862,8 @@ Stage-0
                                                       Select Operator [SEL_19]
                                                          outputColumnNames:["_col0","_col1","_col2"]
                                                          Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
-                                                         Filter Operator [FIL_60]
-                                                            predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean)
+                                                         Filter Operator [FIL_63]
+                                                            predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean)
                                                             Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
                                                             TableScan [TS_17]
                                                                alias:cbo_t2
@@ -3181,47 +3181,47 @@ Stage-0
       limit:-1
       Stage-1
          Reducer 2
-         File Output Operator [FS_14]
+         File Output Operator [FS_16]
             compressed:false
-            Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
             table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
-            Merge Join Operator [MERGEJOIN_19]
+            Merge Join Operator [MERGEJOIN_21]
             |  condition map:[{"":"Left Semi Join 0 to 1"}]
             |  keys:{"1":"_col0 (type: string), _col1 (type: string)","0":"_col1 (type: string), _col0 (type: string)"}
             |  outputColumnNames:["_col0","_col1"]
-            |  Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+            |  Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
             |<-Map 1 [SIMPLE_EDGE]
-            |  Reduce Output Operator [RS_9]
+            |  Reduce Output Operator [RS_11]
             |     key expressions:_col1 (type: string), _col0 (type: string)
             |     Map-reduce partition columns:_col1 (type: string), _col0 (type: string)
             |     sort order:++
-            |     Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+            |     Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             |     Select Operator [SEL_2]
             |        outputColumnNames:["_col0","_col1"]
-            |        Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-            |        Filter Operator [FIL_17]
-            |           predicate:(value is not null and key is not null) (type: boolean)
-            |           Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+            |        Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+            |        Filter Operator [FIL_19]
+            |           predicate:((value > 'val_9') and key is not null) (type: boolean)
+            |           Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             |           TableScan [TS_0]
             |              alias:b
             |              Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
             |<-Map 3 [SIMPLE_EDGE]
-               Reduce Output Operator [RS_11]
+               Reduce Output Operator [RS_13]
                   key expressions:_col0 (type: string), _col1 (type: string)
                   Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
                   sort order:++
                   Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator [GBY_7]
+                  Group By Operator [GBY_9]
                      keys:_col0 (type: string), _col1 (type: string)
                      outputColumnNames:["_col0","_col1"]
                      Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
-                     Select Operator [SEL_5]
+                     Select Operator [SEL_7]
                         outputColumnNames:["_col0","_col1"]
                         Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                        Filter Operator [FIL_18]
+                        Filter Operator [FIL_20]
                            predicate:((value > 'val_9') and key is not null) (type: boolean)
                            Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                           TableScan [TS_3]
+                           TableScan [TS_5]
                               alias:b
                               Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
 
@@ -3253,47 +3253,47 @@ Stage-0
       limit:-1
       Stage-1
          Reducer 2
-         File Output Operator [FS_14]
+         File Output Operator [FS_16]
             compressed:false
-            Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
             table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
-            Merge Join Operator [MERGEJOIN_19]
+            Merge Join Operator [MERGEJOIN_21]
             |  condition map:[{"":"Left Semi Join 0 to 1"}]
             |  keys:{"1":"_col0 (type: string), _col1 (type: string)","0":"_col1 (type: string), _col0 (type: string)"}
             |  outputColumnNames:["_col0","_col1"]
-            |  Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+            |  Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
             |<-Map 1 [SIMPLE_EDGE]
-            |  Reduce Output Operator [RS_9]
+            |  Reduce Output Operator [RS_11]
             |     key expressions:_col1 (type: string), _col0 (type: string)
             |     Map-reduce partition columns:_col1 (type: string), _col0 (type: string)
             |     sort order:++
-            |     Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+            |     Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             |     Select Operator [SEL_2]
             |        outputColumnNames:["_col0","_col1"]
-            |        Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-            |        Filter Operator [FIL_17]
-            |           predicate:(value is not null and key is not null) (type: boolean)
-            |           Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+            |        Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+            |        Filter Operator [FIL_19]
+            |           predicate:((value > 'val_9') and key is not null) (type: boolean)
+            |           Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             |           TableScan [TS_0]
             |              alias:b
             |              Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
             |<-Map 3 [SIMPLE_EDGE]
-               Reduce Output Operator [RS_11]
+               Reduce Output Operator [RS_13]
                   key expressions:_col0 (type: string), _col1 (type: string)
                   Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
                   sort order:++
                   Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator [GBY_7]
+                  Group By Operator [GBY_9]
                      keys:_col0 (type: string), _col1 (type: string)
                      outputColumnNames:["_col0","_col1"]
                      Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
-                     Select Operator [SEL_5]
+                     Select Operator [SEL_7]
                         outputColumnNames:["_col0","_col1"]
                         Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                        Filter Operator [FIL_18]
+                        Filter Operator [FIL_20]
                            predicate:((value > 'val_9') and key is not null) (type: boolean)
                            Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                           TableScan [TS_3]
+                           TableScan [TS_5]
                               alias:b
                               Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
 
@@ -3315,48 +3315,48 @@ Stage-0
       limit:-1
       Stage-1
          Reducer 2
-         File Output Operator [FS_14]
+         File Output Operator [FS_16]
             compressed:false
-            Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
-            Merge Join Operator [MERGEJOIN_19]
+            Merge Join Operator [MERGEJOIN_21]
             |  condition map:[{"":"Left Semi Join 0 to 1"}]
             |  keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
             |  outputColumnNames:["_col0","_col1"]
-            |  Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
+            |  Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             |<-Map 1 [SIMPLE_EDGE]
-            |  Reduce Output Operator [RS_9]
+            |  Reduce Output Operator [RS_11]
             |     key expressions:_col0 (type: string)
             |     Map-reduce partition columns:_col0 (type: string)
             |     sort order:+
-            |     Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+            |     Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             |     value expressions:_col1 (type: string)
             |     Select Operator [SEL_2]
             |        outputColumnNames:["_col0","_col1"]
-            |        Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-            |        Filter Operator [FIL_17]
-            |           predicate:key is not null (type: boolean)
-            |           Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+            |        Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+            |        Filter Operator [FIL_19]
+            |           predicate:(key > '9') (type: boolean)
+            |           Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             |           TableScan [TS_0]
             |              alias:src_cbo
             |              Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
             |<-Map 3 [SIMPLE_EDGE]
-               Reduce Output Operator [RS_11]
+               Reduce Output Operator [RS_13]
                   key expressions:_col0 (type: string)
                   Map-reduce partition columns:_col0 (type: string)
                   sort order:+
                   Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator [GBY_7]
+                  Group By Operator [GBY_9]
                      keys:_col0 (type: string)
                      outputColumnNames:["_col0"]
                      Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
-                     Select Operator [SEL_5]
+                     Select Operator [SEL_7]
                         outputColumnNames:["_col0"]
                         Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-                        Filter Operator [FIL_18]
+                        Filter Operator [FIL_20]
                            predicate:(key > '9') (type: boolean)
                            Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-                           TableScan [TS_3]
+                           TableScan [TS_5]
                               alias:src_cbo
                               Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
 
@@ -3382,92 +3382,92 @@ Stage-0
       limit:-1
       Stage-1
          Reducer 3
-         File Output Operator [FS_26]
+         File Output Operator [FS_28]
             compressed:false
             Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
             table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
-            Select Operator [SEL_25]
+            Select Operator [SEL_27]
                outputColumnNames:["_col0","_col1"]
                Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
-               Merge Join Operator [MERGEJOIN_36]
+               Merge Join Operator [MERGEJOIN_38]
                |  condition map:[{"":"Inner Join 0 to 1"}]
                |  keys:{"1":"_col0 (type: int)","0":"_col1 (type: int)"}
                |  outputColumnNames:["_col1","_col2"]
                |  Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
                |<-Reducer 2 [SIMPLE_EDGE]
-               |  Reduce Output Operator [RS_21]
+               |  Reduce Output Operator [RS_23]
                |     key expressions:_col1 (type: int)
                |     Map-reduce partition columns:_col1 (type: int)
                |     sort order:+
                |     Statistics:Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
                |     value expressions:_col2 (type: int)
-               |     Merge Join Operator [MERGEJOIN_35]
+               |     Merge Join Operator [MERGEJOIN_37]
                |     |  condition map:[{"":"Left Semi Join 0 to 1"}]
-               |     |  keys:{"1":"_col0 (type: int), _col1 (type: int)","0":"_col0 (type: int), _col3 (type: int)"}
+               |     |  keys:{"1":"_col0 (type: int)","0":"_col0 (type: int)"}
                |     |  outputColumnNames:["_col1","_col2"]
                |     |  Statistics:Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
                |     |<-Map 1 [SIMPLE_EDGE]
-               |     |  Reduce Output Operator [RS_16]
-               |     |     key expressions:_col0 (type: int), _col3 (type: int)
-               |     |     Map-reduce partition columns:_col0 (type: int), _col3 (type: int)
-               |     |     sort order:++
+               |     |  Reduce Output Operator [RS_18]
+               |     |     key expressions:_col0 (type: int)
+               |     |     Map-reduce partition columns:_col0 (type: int)
+               |     |     sort order:+
                |     |     Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
                |     |     value expressions:_col1 (type: int), _col2 (type: int)
                |     |     Select Operator [SEL_2]
-               |     |        outputColumnNames:["_col0","_col1","_col2","_col3"]
+               |     |        outputColumnNames:["_col0","_col1","_col2"]
                |     |        Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
-               |     |        Filter Operator [FIL_32]
+               |     |        Filter Operator [FIL_34]
                |     |           predicate:(((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
                |     |           Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
                |     |           TableScan [TS_0]
                |     |              alias:lineitem
                |     |              Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE
                |     |<-Map 4 [SIMPLE_EDGE]
-               |        Reduce Output Operator [RS_18]
-               |           key expressions:_col0 (type: int), _col1 (type: int)
-               |           Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
-               |           sort order:++
-               |           Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
-               |           Group By Operator [GBY_14]
-               |              keys:_col0 (type: int), _col1 (type: int)
-               |              outputColumnNames:["_col0","_col1"]
-               |              Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+               |        Reduce Output Operator [RS_20]
+               |           key expressions:_col0 (type: int)
+               |           Map-reduce partition columns:_col0 (type: int)
+               |           sort order:+
+               |           Statistics:Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+               |           Group By Operator [GBY_16]
+               |              keys:_col0 (type: int)
+               |              outputColumnNames:["_col0"]
+               |              Statistics:Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
                |              Select Operator [SEL_5]
-               |                 outputColumnNames:["_col0","_col1"]
-               |                 Statistics:Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
-               |                 Filter Operator [FIL_33]
-               |                    predicate:(((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+               |                 outputColumnNames:["_col0"]
+               |                 Statistics:Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+               |                 Filter Operator [FIL_35]
+               |                    predicate:(((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
                |                    Statistics:Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE
                |                    TableScan [TS_3]
                |                       alias:lineitem
                |                       Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE
                |<-Reducer 6 [SIMPLE_EDGE]
-                  Reduce Output Operator [RS_23]
+                  Reduce Output Operator [RS_25]
                      key expressions:_col0 (type: int)
                      Map-reduce partition columns:_col0 (type: int)
                      sort order:+
                      Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
-                     Group By Operator [GBY_11]
+                     Group By Operator [GBY_13]
                      |  keys:KEY._col0 (type: int)
                      |  outputColumnNames:["_col0"]
                      |  Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
                      |<-Map 5 [SIMPLE_EDGE]
-                        Reduce Output Operator [RS_10]
+                        Reduce Output Operator [RS_12]
                            key expressions:_col0 (type: int)
                            Map-reduce partition columns:_col0 (type: int)
                            sort order:+
                            Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
-                           Group By Operator [GBY_9]
+                           Group By Operator [GBY_11]
                               keys:_col0 (type: int)
                               outputColumnNames:["_col0"]
                               Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
-                              Select Operator [SEL_7]
+                              Select Operator [SEL_9]
                                  outputColumnNames:["_col0"]
                                  Statistics:Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
-                                 Filter Operator [FIL_34]
+                                 Filter Operator [FIL_36]
                                     predicate:l_partkey is not null (type: boolean)
                                     Statistics:Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
-                                    TableScan [TS_6]
+                                    TableScan [TS_8]
                                        alias:lineitem
                                        Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE
 
@@ -3496,125 +3496,125 @@ Stage-0
       limit:-1
       Stage-1
          Reducer 4
-         File Output Operator [FS_36]
+         File Output Operator [FS_38]
             compressed:false
             Statistics:Num rows: 34 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE
             table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
-            Merge Join Operator [MERGEJOIN_49]
+            Merge Join Operator [MERGEJOIN_51]
             |  condition map:[{"":"Left Semi Join 0 to 1"}]
             |  keys:{"1":"_col0 (type: bigint)","0":"_col2 (type: bigint)"}
             |  outputColumnNames:["_col0","_col1","_col2"]
             |  Statistics:Num rows: 34 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE
             |<-Reducer 3 [SIMPLE_EDGE]
-            |  Reduce Output Operator [RS_31]
+            |  Reduce Output Operator [RS_33]
             |     key expressions:_col2 (type: bigint)
             |     Map-reduce partition columns:_col2 (type: bigint)
             |     sort order:+
-            |     Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
+            |     Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
             |     value expressions:_col0 (type: string), _col1 (type: string)
-            |     Filter Operator [FIL_42]
+            |     Filter Operator [FIL_44]
             |        predicate:_col2 is not null (type: boolean)
-            |        Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
-            |        Group By Operator [GBY_18]
+            |        Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
+            |        Group By Operator [GBY_20]
             |        |  aggregations:["count(VALUE._col0)"]
             |        |  keys:KEY._col0 (type: string), KEY._col1 (type: string)
             |        |  outputColumnNames:["_col0","_col1","_col2"]
-            |        |  Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
+            |        |  Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
             |        |<-Reducer 2 [SIMPLE_EDGE]
-            |           Reduce Output Operator [RS_17]
+            |           Reduce Output Operator [RS_19]
             |              key expressions:_col0 (type: string), _col1 (type: string)
             |              Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
             |              sort order:++
-            |              Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
+            |              Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
             |              value expressions:_col2 (type: bigint)
-            |              Group By Operator [GBY_16]
+            |              Group By Operator [GBY_18]
             |                 aggregations:["count()"]
             |                 keys:_col0 (type: string), _col1 (type: string)
             |                 outputColumnNames:["_col0","_col1","_col2"]
-            |                 Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
-            |                 Merge Join Operator [MERGEJOIN_48]
+            |                 Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
+            |                 Merge Join Operator [MERGEJOIN_50]
             |                 |  condition map:[{"":"Left Semi Join 0 to 1"}]
             |                 |  keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
             |                 |  outputColumnNames:["_col0","_col1"]
-            |                 |  Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
+            |                 |  Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             |                 |<-Map 1 [SIMPLE_EDGE]
-            |                 |  Reduce Output Operator [RS_11]
+            |                 |  Reduce Output Operator [RS_13]
             |                 |     key expressions:_col0 (type: string)
             |                 |     Map-reduce partition columns:_col0 (type: string)
             |                 |     sort order:+
-            |                 |     Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+            |                 |     Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             |                 |     value expressions:_col1 (type: string)
             |                 |     Select Operator [SEL_2]
             |                 |        outputColumnNames:["_col0","_col1"]
-            |                 |        Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-            |                 |        Filter Operator [FIL_43]
-            |                 |           predicate:key is not null (type: boolean)
-            |                 |           Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+            |                 |        Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+            |                 |        Filter Operator [FIL_45]
+            |                 |           predicate:(key > '8') (type: boolean)
+            |                 |           Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             |                 |           TableScan [TS_0]
             |                 |              alias:b
             |                 |              Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
             |                 |<-Map 5 [SIMPLE_EDGE]
-            |                    Reduce Output Operator [RS_13]
+            |                    Reduce Output Operator [RS_15]
             |                       key expressions:_col0 (type: string)
             |                       Map-reduce partition columns:_col0 (type: string)
             |                       sort order:+
             |                       Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
-            |                       Group By Operator [GBY_9]
+            |                       Group By Operator [GBY_11]
             |                          keys:_col0 (type: string)
             |                          outputColumnNames:["_col0"]
             |                          Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
-            |                          Select Operator [SEL_7]
+            |                          Select Operator [SEL_9]
             |                             outputColumnNames:["_col0"]
             |                             Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-            |                             Filter Operator [FIL_44]
+            |                             Filter Operator [FIL_46]
             |                                predicate:(key > '8') (type: boolean)
             |                                Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-            |                                TableScan [TS_5]
+            |                                TableScan [TS_7]
             |                                   alias:b
             |                                   Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
             |<-Reducer 7 [SIMPLE_EDGE]
-               Reduce Output Operator [RS_33]
+               Reduce Output Operator [RS_35]
                   key expressions:_col0 (type: bigint)
                   Map-reduce partition columns:_col0 (type: bigint)
                   sort order:+
                   Statistics:Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator [GBY_29]
+                  Group By Operator [GBY_31]
                      keys:_col0 (type: bigint)
                      outputColumnNames:["_col0"]
                      Statistics:Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
-                     Select Operator [SEL_27]
+                     Select Operator [SEL_29]
                         outputColumnNames:["_col0"]
                         Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
-                        Filter Operator [FIL_45]
+                        Filter Operator [FIL_47]
                            predicate:_col1 is not null (type: boolean)
                            Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
-                           Select Operator [SEL_47]
+                           Select Operator [SEL_49]
                               outputColumnNames:["_col1"]
                               Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
-                              Group By Operator [GBY_26]
+                              Group By Operator [GBY_28]
                               |  aggregations:["count(VALUE._col0)"]
                               |  keys:KEY._col0 (type: string)
                               |  outputColumnNames:["_col0","_col1"]
                               |  Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE
                               |<-Map 6 [SIMPLE_EDGE]
-                                 Reduce Output Operator [RS_25]
+                                 Reduce Output Operator [RS_27]
                                     key expressions:_col0 (type: string)
                                     Map-reduce partition columns:_col0 (type: string)
                                     sort order:+
                                     Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE
                                     value expressions:_col1 (type: bigint)
-                                    Group By Operator [GBY_24]
+                                    Group By Operator [GBY_26]
                                        aggregations:["count()"]
                                        keys:_col0 (type: string)
                                        outputColumnNames:["_col0","_col1"]
                                        Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE
-                                       Select Operator [SEL_22]
+                                       Select Operator [SEL_24]
                                           outputColumnNames:["_col0"]
                                           Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-                                          Filter Operator [FIL_46]
+                                          Filter Operator [FIL_48]
                                              predicate:(key > '9') (type: boolean)
                                              Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-                                             TableScan [TS_20]
+                                             TableScan [TS_22]
                                                 alias:b
                                                 Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
 

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/subquery_exists.q.out b/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
index 503a5e9..41aa048 100644
--- a/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
@@ -41,17 +41,17 @@ STAGE PLANS:
                   alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (value is not null and key is not null) (type: boolean)
-                    Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((value > 'val_9') and key is not null) (type: boolean)
+                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col1 (type: string), _col0 (type: string)
                         sort order: ++
                         Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
-                        Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -83,10 +83,10 @@ STAGE PLANS:
                   0 _col1 (type: string), _col0 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/subquery_in.q.out b/ql/src/test/results/clientpositive/tez/subquery_in.q.out
index 38f4bc6..2b1237b 100644
--- a/ql/src/test/results/clientpositive/tez/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/tez/subquery_in.q.out
@@ -31,17 +31,17 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (key > '9') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
         Map 3 
             Map Operator Tree:
@@ -74,10 +74,10 @@ STAGE PLANS:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -149,17 +149,17 @@ STAGE PLANS:
                   alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (key is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((key > '9') and value is not null) (type: boolean)
+                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string), _col1 (type: string)
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                        Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -191,10 +191,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -627,17 +627,17 @@ STAGE PLANS:
                   alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (key is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((key > '9') and value is not null) (type: boolean)
+                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string), _col1 (type: string)
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                        Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -669,10 +669,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out b/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
index af80260..b9d601a 100644
--- a/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
@@ -150,11 +150,11 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         input vertices:
                           1 Map 2
-                        Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
                         HybridGraceHashJoin: true
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
                           table:
                               input format: org.apache.hadoop.mapred.TextInputFormat
                               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -166,22 +166,22 @@ STAGE PLANS:
                   alias: t1
                   Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: a is not null (type: boolean)
-                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (a > 2) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: a (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         keys: _col0 (type: int)
                         mode: hash
                         outputColumnNames: _col0
-                        Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: int)
                           sort order: +
                           Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
 
   Stage: Stage-0

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
index 29b86bd..e7fec82 100644
--- a/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
@@ -202,15 +202,15 @@ STAGE PLANS:
                     predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
                     Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
-                      outputColumnNames: _col0, _col1, _col2, _col3
+                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+                      outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Left Semi Join 0 to 1
                         keys:
-                          0 _col0 (type: int), _col3 (type: int)
-                          1 _col0 (type: int), _col1 (type: int)
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
                         outputColumnNames: _col1, _col2
                         input vertices:
                           1 Map 2
@@ -228,21 +228,21 @@ STAGE PLANS:
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+                    predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
                     Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: l_orderkey (type: int), l_linenumber (type: int)
-                      outputColumnNames: _col0, _col1
+                      expressions: l_orderkey (type: int)
+                      outputColumnNames: _col0
                       Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
-                        keys: _col0 (type: int), _col1 (type: int)
+                        keys: _col0 (type: int)
                         mode: hash
-                        outputColumnNames: _col0, _col1
+                        outputColumnNames: _col0
                         Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
-                          key expressions: _col0 (type: int), _col1 (type: int)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
                           Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
         Map 3 
             Map Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/vector_inner_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_inner_join.q.out b/ql/src/test/results/clientpositive/vector_inner_join.q.out
index 3e8d2f4..4775ae9 100644
--- a/ql/src/test/results/clientpositive/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/vector_inner_join.q.out
@@ -136,17 +136,17 @@ STAGE PLANS:
             alias: t1
             Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: a is not null (type: boolean)
-              Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              predicate: (a > 2) (type: boolean)
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: a (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
                   keys: _col0 (type: int)
                   mode: hash
                   outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                   HashTable Sink Operator
                     keys:
                       0 _col0 (type: int)
@@ -172,10 +172,10 @@ STAGE PLANS:
                     0 _col0 (type: int)
                     1 _col0 (type: int)
                   outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
index 16916cb..fdd7ea8 100644
--- a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
@@ -465,30 +465,30 @@ STAGE PLANS:
   Stage: Stage-11
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_1:lineitem 
+        $hdt$_1:$hdt$_1:lineitem 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_1:lineitem 
+        $hdt$_1:$hdt$_1:lineitem 
           TableScan
             alias: lineitem
             Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+              predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
               Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: l_orderkey (type: int), l_linenumber (type: int)
-                outputColumnNames: _col0, _col1
+                expressions: l_orderkey (type: int)
+                outputColumnNames: _col0
                 Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
-                  keys: _col0 (type: int), _col1 (type: int)
+                  keys: _col0 (type: int)
                   mode: hash
-                  outputColumnNames: _col0, _col1
+                  outputColumnNames: _col0
                   Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                   HashTable Sink Operator
                     keys:
-                      0 _col0 (type: int), _col3 (type: int)
-                      1 _col0 (type: int), _col1 (type: int)
+                      0 _col0 (type: int)
+                      1 _col0 (type: int)
 
   Stage: Stage-8
     Map Reduce
@@ -500,15 +500,15 @@ STAGE PLANS:
               predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
               Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
-                outputColumnNames: _col0, _col1, _col2, _col3
+                expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+                outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                 Map Join Operator
                   condition map:
                        Left Semi Join 0 to 1
                   keys:
-                    0 _col0 (type: int), _col3 (type: int)
-                    1 _col0 (type: int), _col1 (type: int)
+                    0 _col0 (type: int)
+                    1 _col0 (type: int)
                   outputColumnNames: _col1, _col2
                   Statistics: Num rows: 14 Data size: 1714 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator

[09/50] [abbrv] hive git commit: HIVE-11664: Make tez container logs work with new log4j2 changes (Prasanth Jayachandran reviewed by Gopal V)

Posted by xu...@apache.org.

HIVE-11664: Make tez container logs work with new log4j2 changes (Prasanth Jayachandran reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/607b0e8a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/607b0e8a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/607b0e8a

Branch: refs/heads/beeline-cli
Commit: 607b0e8a6b4da164606b87c4d012059276b3a994
Parents: 9e85bbf
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Thu Aug 27 17:14:17 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Thu Aug 27 17:14:17 2015 -0500

----------------------------------------------------------------------
 data/conf/tez/hive-site.xml                    |  4 ++
 ql/src/main/resources/tez-container-log4j2.xml | 49 +++++++++++++++++++++
 2 files changed, 53 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/607b0e8a/data/conf/tez/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
index e0238aa..2f9415a 100644
--- a/data/conf/tez/hive-site.xml
+++ b/data/conf/tez/hive-site.xml
@@ -253,5 +253,9 @@
   </description>
 </property>
 
+<property>
+  <name>hive.tez.java.opts</name>
+  <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
+</property>
 
 </configuration>

http://git-wip-us.apache.org/repos/asf/hive/blob/607b0e8a/ql/src/main/resources/tez-container-log4j2.xml
----------------------------------------------------------------------
diff --git a/ql/src/main/resources/tez-container-log4j2.xml b/ql/src/main/resources/tez-container-log4j2.xml
new file mode 100644
index 0000000..be949dc
--- /dev/null
+++ b/ql/src/main/resources/tez-container-log4j2.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<Configuration status="trace" strict="true" name="TezContainerLog4j2"
+ packages="org.apache.hadoop.hive.ql.log">
+
+  <Properties>
+    <Property name="tez.container.log.threshold">ALL</Property>
+    <Property name="tez.container.log.level">INFO</Property>
+    <Property name="tez.container.root.logger">CLA</Property>
+    <Property name="tez.container.log.dir">${sys:yarn.app.container.log.dir}</Property>
+    <Property name="tez.container.log.file">syslog</Property>
+  </Properties>
+
+  <Appenders>
+    <RollingFile name="CLA" fileName="${sys:tez.container.log.dir}/${sys:tez.container.log.file}"
+     filePattern="${sys:tez.container.log.dir}/${sys:tez.container.log.file}.%d{yyyy-MM-dd}">
+      <PatternLayout pattern="%d{ISO8601} %p [%t] %c{2}: %m%n" />
+      <Policies>
+        <!-- Rollover at mignight (interval = 1 means daily) -->
+        <TimeBasedTriggeringPolicy interval="1" modulate="true"/>
+      </Policies>
+      <!-- 30-day backup -->
+      <!-- <DefaultRolloverStrategy max="30"/> -->
+    </RollingFile>
+  </Appenders>
+
+  <Loggers>
+    <Root level="${sys:tez.container.log.threshold}">
+      <AppenderRef ref="${sys:tez.container.root.logger}" level="${sys:tez.container.log.level}"/>
+    </Root>
+  </Loggers>
+
+</Configuration>

[21/50] [abbrv] hive git commit: HIVE-11670 : Strip out password information from TezSessionState configuration (Hari Subramaniyan, reviewed by Vikram Dixit K)

Posted by xu...@apache.org.

HIVE-11670 : Strip out password information from TezSessionState configuration (Hari Subramaniyan, reviewed by Vikram Dixit K)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/78e70159
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/78e70159
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/78e70159

Branch: refs/heads/beeline-cli
Commit: 78e701590f1cb8b8b07a4871280a31f7c3d35034
Parents: 5aa16ec
Author: Hari Subramaniyan <ha...@apache.org>
Authored: Mon Aug 31 11:55:07 2015 -0700
Committer: Hari Subramaniyan <ha...@apache.org>
Committed: Mon Aug 31 11:55:07 2015 -0700

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java   | 2 ++
 1 file changed, 2 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/78e70159/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
index 8555c6a..568ebbe 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.hive.shims.Utils;
@@ -165,6 +166,7 @@ public class TezSessionState {
     // generate basic tez config
     TezConfiguration tezConfig = new TezConfiguration(conf);
     tezConfig.set(TezConfiguration.TEZ_AM_STAGING_DIR, tezScratchDir.toUri().toString());
+    Utilities.stripHivePasswordDetails(tezConfig);
 
     if (HiveConf.getBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED)) {
       int n = HiveConf.getIntVar(conf, ConfVars.HIVE_PREWARM_NUM_CONTAINERS);