You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/09/04 04:54:50 UTC
[01/28] hive git commit: Patch HIVE-11536
Repository: hive
Updated Branches:
refs/heads/llap 0a2036980 -> 772c4b90f
Patch HIVE-11536
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/da95f633
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/da95f633
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/da95f633
Branch: refs/heads/llap
Commit: da95f6336acf9ab6a660f17c2a7555587f903038
Parents: ab03dc9
Author: Dmitry Tolpeko <dm...@gmail.com>
Authored: Thu Aug 20 09:30:19 2015 -0700
Committer: Dmitry Tolpeko <dm...@gmail.com>
Committed: Thu Aug 20 09:30:19 2015 -0700
----------------------------------------------------------------------
.../antlr4/org/apache/hive/hplsql/Hplsql.g4 | 85 +++++++++--
.../java/org/apache/hive/hplsql/Column.java | 65 +++++++++
.../main/java/org/apache/hive/hplsql/Exec.java | 142 +++++++++++++++----
.../java/org/apache/hive/hplsql/Expression.java | 6 +
.../main/java/org/apache/hive/hplsql/Meta.java | 118 +++++++++++++++
.../main/java/org/apache/hive/hplsql/Row.java | 97 +++++++++++++
.../java/org/apache/hive/hplsql/Select.java | 16 ++-
.../main/java/org/apache/hive/hplsql/Stmt.java | 73 ++++++----
.../main/java/org/apache/hive/hplsql/Var.java | 37 ++++-
.../apache/hive/hplsql/functions/Function.java | 13 ++
.../org/apache/hive/hplsql/TestHplsqlLocal.java | 7 +-
.../apache/hive/hplsql/TestHplsqlOffline.java | 2 +-
.../src/test/queries/db/rowtype_attribute.sql | 22 +++
hplsql/src/test/queries/db/type_attribute.sql | 8 ++
.../local/create_procedure_no_params.sql | 19 +++
.../test/queries/offline/create_table_ora.sql | 55 ++++++-
.../test/results/db/rowtype_attribute.out.txt | 42 ++++++
.../src/test/results/db/type_attribute.out.txt | 15 ++
.../local/create_procedure_no_params.out.txt | 26 ++++
.../results/offline/create_table_ora.out.txt | 38 +++++
20 files changed, 802 insertions(+), 84 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4
----------------------------------------------------------------------
diff --git a/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4 b/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4
index 6027206..ff772fe 100644
--- a/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4
+++ b/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4
@@ -138,7 +138,7 @@ break_stmt :
;
call_stmt :
- T_CALL ident (T_OPEN_P expr_func_params T_CLOSE_P | expr_func_params)?
+ T_CALL ident (T_OPEN_P expr_func_params? T_CLOSE_P | expr_func_params)?
;
declare_stmt : // Declaration statement
@@ -211,11 +211,18 @@ create_table_column_inline_cons :
dtype_default
| T_NOT? T_NULL
| T_PRIMARY T_KEY
+ | T_UNIQUE
+ | T_REFERENCES table_name T_OPEN_P ident T_CLOSE_P create_table_fk_action*
| T_IDENTITY T_OPEN_P L_INT (T_COMMA L_INT)* T_CLOSE_P
;
create_table_column_cons :
- T_PRIMARY T_KEY T_CLUSTERED? T_OPEN_P ident (T_ASC | T_DESC)? (T_COMMA ident (T_ASC | T_DESC)?)* T_CLOSE_P index_storage_clause?
+ T_PRIMARY T_KEY T_CLUSTERED? T_OPEN_P ident (T_ASC | T_DESC)? (T_COMMA ident (T_ASC | T_DESC)?)* T_CLOSE_P index_storage_clause?
+ | T_FOREIGN T_KEY T_OPEN_P ident (T_COMMA ident)* T_CLOSE_P T_REFERENCES table_name T_OPEN_P ident (T_COMMA ident)* T_CLOSE_P create_table_fk_action*
+ ;
+
+create_table_fk_action :
+ T_ON (T_UPDATE | T_DELETE) (T_NO T_ACTION | T_RESTRICT | T_SET T_NULL | T_SET T_DEFAULT | T_CASCADE)
;
create_table_options :
@@ -224,11 +231,21 @@ create_table_options :
create_table_options_item :
T_ON T_COMMIT (T_DELETE | T_PRESERVE) T_ROWS
+ | create_table_options_ora_item
| create_table_options_db2_item
| create_table_options_hive_item
| create_table_options_mssql_item
;
+create_table_options_ora_item :
+ T_SEGMENT T_CREATION (T_IMMEDIATE | T_DEFERRED)
+ | (T_PCTFREE | T_PCTUSED | T_INITRANS | T_MAXTRANS) L_INT
+ | T_NOCOMPRESS
+ | (T_LOGGING | T_NOLOGGING)
+ | T_STORAGE T_OPEN_P (ident | L_INT)+ T_CLOSE_P
+ | T_TABLESPACE ident
+ ;
+
create_table_options_db2_item :
T_IN ident
| T_WITH T_REPLACE
@@ -282,11 +299,11 @@ dtype : // Data types
| T_TINYINT
| T_VARCHAR
| T_VARCHAR2
- | L_ID // User-defined data type
+ | L_ID ('%' (T_TYPE | T_ROWTYPE))? // User-defined or derived data type
;
dtype_len : // Data type length or size specification
- T_OPEN_P (L_INT | T_MAX) (T_COMMA L_INT)? T_CLOSE_P
+ T_OPEN_P (L_INT | T_MAX) (T_CHAR | T_BYTE)? (T_COMMA L_INT)? T_CLOSE_P
;
dtype_attr :
@@ -300,7 +317,7 @@ dtype_default : // Default clause in variable declaration
;
create_function_stmt :
- (T_ALTER | T_CREATE (T_OR T_REPLACE)? | T_REPLACE) T_FUNCTION ident create_routine_params? create_function_return (T_AS | T_IS)? single_block_stmt
+ (T_ALTER | T_CREATE (T_OR T_REPLACE)? | T_REPLACE)? T_FUNCTION ident create_routine_params? create_function_return (T_AS | T_IS)? single_block_stmt
;
create_function_return :
@@ -308,7 +325,7 @@ create_function_return :
;
create_procedure_stmt :
- (T_ALTER | T_CREATE (T_OR T_REPLACE)? | T_REPLACE) (T_PROCEDURE | T_PROC) ident create_routine_params? create_routine_options? (T_AS | T_IS)? label? proc_block (ident T_SEMICOLON)?
+ (T_ALTER | T_CREATE (T_OR T_REPLACE)? | T_REPLACE)? (T_PROCEDURE | T_PROC) ident create_routine_params? create_routine_options? (T_AS | T_IS)? label? proc_block (ident T_SEMICOLON)?
;
create_routine_params :
@@ -598,7 +615,7 @@ select_list_item :
;
select_list_alias :
- T_AS? L_ID
+ {!_input.LT(1).getText().equalsIgnoreCase("FROM")}? T_AS? ident
| T_OPEN_P T_TITLE L_S_STRING T_CLOSE_P
;
@@ -899,7 +916,7 @@ expr_func_params :
;
func_param :
- (ident T_EQUAL T_GREATER?)? expr
+ {!_input.LT(1).getText().equalsIgnoreCase("INTO")}? (ident T_EQUAL T_GREATER?)? expr
;
hive :
@@ -963,7 +980,8 @@ null_const : // NULL constant
;
non_reserved_words : // Tokens that are not reserved words and can be used as identifiers
- T_ACTIVITY_COUNT
+ T_ACTION
+ | T_ACTIVITY_COUNT
| T_ALL
| T_ALLOCATE
| T_ALTER
@@ -982,8 +1000,10 @@ non_reserved_words : // Tokens that are not reserved words
| T_BIT
| T_BREAK
| T_BY
+ | T_BYTE
| T_CALL
- | T_CALLER
+ | T_CALLER
+ | T_CASCADE
| T_CASE
| T_CASESPECIFIC
| T_CAST
@@ -1002,6 +1022,7 @@ non_reserved_words : // Tokens that are not reserved words
| T_COUNT
| T_COUNT_BIG
| T_CREATE
+ | T_CREATION
| T_CREATOR
| T_CS
| T_CUME_DIST
@@ -1019,6 +1040,7 @@ non_reserved_words : // Tokens that are not reserved words
| T_DECIMAL
| T_DECLARE
| T_DEFAULT
+ | T_DEFERRED
| T_DEFINED
| T_DEFINER
| T_DELETE
@@ -1049,6 +1071,7 @@ non_reserved_words : // Tokens that are not reserved words
| T_FIRST_VALUE
| T_FLOAT
| T_FOR
+ | T_FOREIGN
| T_FORMAT
| T_FOUND
| T_FROM
@@ -1071,6 +1094,7 @@ non_reserved_words : // Tokens that are not reserved words
| T_IN
| T_INCLUDE
| T_INDEX
+ | T_INITRANS
| T_INNER
| T_INOUT
| T_INSERT
@@ -1097,11 +1121,13 @@ non_reserved_words : // Tokens that are not reserved words
| T_LOCAL
| T_LOCATOR
| T_LOCATORS
- | T_LOGGED
+ | T_LOGGED
+ | T_LOGGING
| T_LOOP
| T_MAP
| T_MATCHED
- | T_MAX
+ | T_MAX
+ | T_MAXTRANS
| T_MERGE
| T_MESSAGE_TEXT
| T_MICROSECOND
@@ -1110,7 +1136,10 @@ non_reserved_words : // Tokens that are not reserved words
| T_MULTISET
| T_NCHAR
| T_NVARCHAR
+ | T_NO
+ | T_NOCOMPRESS
| T_NOCOUNT
+ | T_NOLOGGING
| T_NOT
| T_NOTFOUND
// T_NULL reserved word
@@ -1129,18 +1158,22 @@ non_reserved_words : // Tokens that are not reserved words
| T_OVERWRITE
| T_OWNER
| T_PART_LOC
- | T_PARTITION
+ | T_PARTITION
+ | T_PCTFREE
+ | T_PCTUSED
| T_PRESERVE
| T_PRIMARY
| T_PRINT
| T_PROC
| T_PROCEDURE
| T_QUOTED_IDENTIFIER
- | T_RANK
+ | T_RANK
+ | T_REFERENCES
| T_REGEXP
| T_RR
| T_REPLACE
| T_RESIGNAL
+ | T_RESTRICT
| T_RESULT
| T_RESULT_SET_LOCATOR
| T_RETURN
@@ -1156,6 +1189,7 @@ non_reserved_words : // Tokens that are not reserved words
| T_ROW_NUMBER
| T_SCHEMA
| T_SECURITY
+ | T_SEGMENT
| T_SEL
| T_SELECT
| T_SET
@@ -1170,12 +1204,14 @@ non_reserved_words : // Tokens that are not reserved words
| T_SQLWARNING
| T_STEP
| T_STDEV
+ | T_STORAGE
| T_STRING
| T_SUBSTRING
| T_SUM
| T_SYSDATE
| T_SYS_REFCURSOR
| T_TABLE
+ | T_TABLESPACE
| T_TEMPORARY
| T_TERMINATED
| T_TEXTIMAGE_ON
@@ -1210,6 +1246,7 @@ non_reserved_words : // Tokens that are not reserved words
;
// Lexer rules
+T_ACTION : A C T I O N ;
T_ALL : A L L ;
T_ALLOCATE : A L L O C A T E ;
T_ALTER : A L T E R ;
@@ -1228,8 +1265,10 @@ T_BIGINT : B I G I N T ;
T_BIT : B I T ;
T_BREAK : B R E A K ;
T_BY : B Y ;
+T_BYTE : B Y T E ;
T_CALL : C A L L ;
T_CALLER : C A L L E R ;
+T_CASCADE : C A S C A D E ;
T_CASE : C A S E ;
T_CASESPECIFIC : C A S E S P E C I F I C ;
T_CAST : C A S T ;
@@ -1248,6 +1287,7 @@ T_CONTINUE : C O N T I N U E ;
T_COUNT : C O U N T ;
T_COUNT_BIG : C O U N T '_' B I G;
T_CREATE : C R E A T E ;
+T_CREATION : C R E A T I O N ;
T_CREATOR : C R E A T O R ;
T_CS : C S;
T_CURRENT : C U R R E N T ;
@@ -1261,6 +1301,7 @@ T_DEC : D E C ;
T_DECIMAL : D E C I M A L ;
T_DECLARE : D E C L A R E ;
T_DEFAULT : D E F A U L T ;
+T_DEFERRED : D E F E R R E D ;
T_DEFINED : D E F I N E D ;
T_DEFINER : D E F I N E R ;
T_DELETE : D E L E T E ;
@@ -1289,6 +1330,7 @@ T_FIELDS : F I E L D S ;
T_FILE : F I L E ;
T_FLOAT : F L O A T ;
T_FOR : F O R ;
+T_FOREIGN : F O R E I G N ;
T_FORMAT : F O R M A T ;
T_FOUND : F O U N D ;
T_FROM : F R O M ;
@@ -1311,6 +1353,7 @@ T_IMMEDIATE : I M M E D I A T E ;
T_IN : I N ;
T_INCLUDE : I N C L U D E ;
T_INDEX : I N D E X ;
+T_INITRANS : I N I T R A N S ;
T_INNER : I N N E R ;
T_INOUT : I N O U T;
T_INSERT : I N S E R T ;
@@ -1335,10 +1378,12 @@ T_LOCAL : L O C A L ;
T_LOCATOR : L O C A T O R ;
T_LOCATORS : L O C A T O R S ;
T_LOGGED : L O G G E D ;
+T_LOGGING : L O G G I N G ;
T_LOOP : L O O P ;
T_MAP : M A P ;
T_MATCHED : M A T C H E D ;
T_MAX : M A X ;
+T_MAXTRANS : M A X T R A N S ;
T_MERGE : M E R G E ;
T_MESSAGE_TEXT : M E S S A G E '_' T E X T ;
T_MICROSECOND : M I C R O S E C O N D ;
@@ -1347,7 +1392,10 @@ T_MIN : M I N ;
T_MULTISET : M U L T I S E T ;
T_NCHAR : N C H A R ;
T_NVARCHAR : N V A R C H A R ;
+T_NO : N O ;
T_NOCOUNT : N O C O U N T ;
+T_NOCOMPRESS : N O C O M P R E S S ;
+T_NOLOGGING : N O L O G G I N G ;
T_NOT : N O T ;
T_NOTFOUND : N O T F O U N D ;
T_NULL : N U L L ;
@@ -1366,15 +1414,19 @@ T_OVER : O V E R ;
T_OVERWRITE : O V E R W R I T E ;
T_OWNER : O W N E R ;
T_PARTITION : P A R T I T I O N ;
+T_PCTFREE : P C T F R E E ;
+T_PCTUSED : P C T U S E D ;
T_PRESERVE : P R E S E R V E ;
T_PRIMARY : P R I M A R Y ;
T_PRINT : P R I N T ;
T_PROC : P R O C ;
T_PROCEDURE : P R O C E D U R E;
T_QUOTED_IDENTIFIER : Q U O T E D '_' I D E N T I F I E R ;
+T_REFERENCES : R E F E R E N C E S ;
T_REGEXP : R E G E X P ;
T_REPLACE : R E P L A C E ;
T_RESIGNAL : R E S I G N A L ;
+T_RESTRICT : R E S T R I C T ;
T_RESULT : R E S U L T ;
T_RESULT_SET_LOCATOR : R E S U L T '_' S E T '_' L O C A T O R ;
T_RETURN : R E T U R N ;
@@ -1385,12 +1437,14 @@ T_RLIKE : R L I K E ;
T_ROLLBACK : R O L L B A C K ;
T_ROW : R O W ;
T_ROWS : R O W S ;
+T_ROWTYPE : R O W T Y P E ;
T_ROW_COUNT : R O W '_' C O U N T ;
T_RR : R R;
T_RS : R S ;
T_TRIM : T R I M ;
T_SCHEMA : S C H E M A ;
T_SECURITY : S E C U R I T Y ;
+T_SEGMENT : S E G M E N T ;
T_SEL : S E L ;
T_SELECT : S E L E C T ;
T_SET : S E T ;
@@ -1404,11 +1458,13 @@ T_SQLINSERT : S Q L I N S E R T ;
T_SQLSTATE : S Q L S T A T E ;
T_SQLWARNING : S Q L W A R N I N G ;
T_STEP : S T E P ;
+T_STORAGE : S T O R A G E ;
T_STRING : S T R I N G ;
T_SUBSTRING : S U B S T R I N G ;
T_SUM : S U M ;
T_SYS_REFCURSOR : S Y S '_' R E F C U R S O R ;
T_TABLE : T A B L E ;
+T_TABLESPACE : T A B L E S P A C E ;
T_TEMPORARY : T E M P O R A R Y ;
T_TERMINATED : T E R M I N A T E D ;
T_TEXTIMAGE_ON : T E X T I M A G E '_' O N ;
@@ -1418,6 +1474,7 @@ T_TINYINT : T I N Y I N T ;
T_TITLE : T I T L E ;
T_TO : T O ;
T_TOP : T O P ;
+T_TYPE : T Y P E ;
T_UNION : U N I O N ;
T_UNIQUE : U N I Q U E ;
T_UPDATE : U P D A T E ;
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/main/java/org/apache/hive/hplsql/Column.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Column.java b/hplsql/src/main/java/org/apache/hive/hplsql/Column.java
new file mode 100644
index 0000000..252a870
--- /dev/null
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Column.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.hplsql;
+
+/**
+ * Table column
+ */
+public class Column {
+
+ String name;
+ String type;
+ Var value;
+
+ Column(String name, String type) {
+ this.name = name;
+ this.type = type;
+ }
+
+ /**
+ * Set the column value
+ */
+ void setValue(Var value) {
+ this.value = value;
+ }
+
+ /**
+ * Get the column name
+ */
+ String getName() {
+ return name;
+ }
+
+ /**
+ * Get the column type
+ */
+ String getType() {
+ return type;
+ }
+
+ /**
+ * Get the column value
+ */
+ Var getValue() {
+ return value;
+ }
+}
+
+
+
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/main/java/org/apache/hive/hplsql/Exec.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Exec.java b/hplsql/src/main/java/org/apache/hive/hplsql/Exec.java
index 268c218..f5592e1 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Exec.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Exec.java
@@ -30,6 +30,8 @@ import java.util.Map.Entry;
import java.util.Stack;
import java.util.Iterator;
import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import org.antlr.v4.runtime.ANTLRInputStream;
@@ -48,7 +50,7 @@ import org.apache.hive.hplsql.functions.*;
*/
public class Exec extends HplsqlBaseVisitor<Integer> {
- public static final String VERSION = "HPL/SQL 0.3.11";
+ public static final String VERSION = "HPL/SQL 0.3.13";
public static final String SQLCODE = "SQLCODE";
public static final String SQLSTATE = "SQLSTATE";
public static final String HOSTCODE = "HOSTCODE";
@@ -96,6 +98,7 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
StringBuilder localUdf = new StringBuilder();
boolean initRoutines = false;
public boolean buildSql = false;
+ public boolean inCallStmt = false;
boolean udfRegistered = false;
boolean udfRun = false;
@@ -285,16 +288,29 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
* Find an existing variable by name
*/
public Var findVariable(String name) {
- Scope cur = exec.currentScope;
+ String name1 = name;
+ Scope cur = exec.currentScope;
+ ArrayList<String> qualified = exec.meta.splitIdentifier(name);
+ if (qualified != null) {
+ name1 = qualified.get(0);
+ }
String name2 = null;
if (name.startsWith(":")) {
name2 = name.substring(1);
- }
+ }
while (cur != null) {
for (Var v : cur.vars) {
- if (name.equalsIgnoreCase(v.getName()) ||
+ if (name1.equalsIgnoreCase(v.getName()) ||
(name2 != null && name2.equalsIgnoreCase(v.getName()))) {
- return v;
+ if (qualified != null) {
+ if (v.type == Var.Type.ROW && v.value != null) {
+ Row row = (Row)v.value;
+ return row.getValue(qualified.get(1));
+ }
+ }
+ else {
+ return v;
+ }
}
}
cur = cur.parent;
@@ -675,7 +691,7 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
conf = new Conf();
conf.init();
conn = new Conn(this);
- meta = new Meta();
+ meta = new Meta(this);
initOptions();
expr = new Expression(this);
@@ -1024,37 +1040,72 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
*/
@Override
public Integer visitDeclare_var_item(HplsqlParser.Declare_var_itemContext ctx) {
- String type = getFormattedText(ctx.dtype());
+ String type = null;
+ Row row = null;
String len = null;
String scale = null;
Var default_ = null;
- if (ctx.dtype_len() != null) {
- len = ctx.dtype_len().L_INT(0).getText();
- if (ctx.dtype_len().L_INT(1) != null) {
- scale = ctx.dtype_len().L_INT(1).getText();
+ if (ctx.dtype().T_ROWTYPE() != null) {
+ row = meta.getRowDataType(ctx, exec.conf.defaultConnection, ctx.dtype().L_ID().getText());
+ if (row == null) {
+ type = Var.DERIVED_ROWTYPE;
+ }
+ }
+ else {
+ type = getDataType(ctx);
+ if (ctx.dtype_len() != null) {
+ len = ctx.dtype_len().L_INT(0).getText();
+ if (ctx.dtype_len().L_INT(1) != null) {
+ scale = ctx.dtype_len().L_INT(1).getText();
+ }
+ }
+ if (ctx.dtype_default() != null) {
+ default_ = evalPop(ctx.dtype_default());
}
- }
- if (ctx.dtype_default() != null) {
- default_ = evalPop(ctx.dtype_default());
}
int cnt = ctx.ident().size(); // Number of variables declared with the same data type and default
for (int i = 0; i < cnt; i++) {
String name = ctx.ident(i).getText();
- Var var = new Var(name, type, len, scale, default_);
- addVariable(var);
- if (trace) {
- if (default_ != null) {
- trace(ctx, "DECLARE " + name + " " + type + " = " + var.toSqlString());
- }
- else {
- trace(ctx, "DECLARE " + name + " " + type);
+ if (row == null) {
+ Var var = new Var(name, type, len, scale, default_);
+ addVariable(var);
+ if (trace) {
+ if (default_ != null) {
+ trace(ctx, "DECLARE " + name + " " + type + " = " + var.toSqlString());
+ }
+ else {
+ trace(ctx, "DECLARE " + name + " " + type);
+ }
}
}
+ else {
+ addVariable(new Var(name, row));
+ if (trace) {
+ trace(ctx, "DECLARE " + name + " " + ctx.dtype().getText());
+ }
+ }
}
return 0;
}
/**
+ * Get the variable data type
+ */
+ String getDataType(HplsqlParser.Declare_var_itemContext ctx) {
+ String type = null;
+ if (ctx.dtype().T_TYPE() != null) {
+ type = meta.getDataType(ctx, exec.conf.defaultConnection, ctx.dtype().L_ID().getText());
+ if (type == null) {
+ type = Var.DERIVED_TYPE;
+ }
+ }
+ else {
+ type = getFormattedText(ctx.dtype());
+ }
+ return type;
+ }
+
+ /**
* ALLOCATE CURSOR statement
*/
@Override
@@ -1179,6 +1230,11 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
}
@Override
+ public Integer visitCreate_table_options_ora_item(HplsqlParser.Create_table_options_ora_itemContext ctx) {
+ return 0;
+ }
+
+ @Override
public Integer visitCreate_table_options_mssql_item(HplsqlParser.Create_table_options_mssql_itemContext ctx) {
return 0;
}
@@ -1457,16 +1513,24 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
*/
@Override
public Integer visitExec_stmt(HplsqlParser.Exec_stmtContext ctx) {
- return exec.stmt.exec(ctx);
+ exec.inCallStmt = true;
+ Integer rc = exec.stmt.exec(ctx);
+ exec.inCallStmt = false;
+ return rc;
}
/**
* CALL statement
*/
@Override
- public Integer visitCall_stmt(HplsqlParser.Call_stmtContext ctx) {
- if (exec.function.execProc(ctx.expr_func_params(), ctx.ident().getText())) {
- return 0;
+ public Integer visitCall_stmt(HplsqlParser.Call_stmtContext ctx) {
+ try {
+ exec.inCallStmt = true;
+ if (exec.function.execProc(ctx.expr_func_params(), ctx.ident().getText())) {
+ return 0;
+ }
+ } finally {
+ exec.inCallStmt = false;
}
return -1;
}
@@ -1795,7 +1859,12 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
}
}
else {
- exec.stackPush(new Var(Var.Type.IDENT, ident));
+ if (!exec.buildSql && !exec.inCallStmt && exec.function.isProc(ident) && exec.function.execProc(null, ident)) {
+ return 0;
+ }
+ else {
+ exec.stackPush(new Var(Var.Type.IDENT, ident));
+ }
}
return 0;
}
@@ -2041,6 +2110,25 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
}
/**
+ * Trace values retrived from the database
+ */
+ public void trace(ParserRuleContext ctx, Var var, ResultSet rs, ResultSetMetaData rm, int idx) throws SQLException {
+ if (var.type != Var.Type.ROW) {
+ trace(ctx, "COLUMN: " + rm.getColumnName(idx) + ", " + rm.getColumnTypeName(idx));
+ trace(ctx, "SET " + var.getName() + " = " + var.toString());
+ }
+ else {
+ Row row = (Row)var.value;
+ int cnt = row.size();
+ for (int j = 1; j <= cnt; j++) {
+ Var v = row.getValue(j - 1);
+ trace(ctx, "COLUMN: " + rm.getColumnName(j) + ", " + rm.getColumnTypeName(j));
+ trace(ctx, "SET " + v.getName() + " = " + v.toString());
+ }
+ }
+ }
+
+ /**
* Informational messages
*/
public void info(ParserRuleContext ctx, String message) {
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/main/java/org/apache/hive/hplsql/Expression.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Expression.java b/hplsql/src/main/java/org/apache/hive/hplsql/Expression.java
index f811626..7269798 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Expression.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Expression.java
@@ -327,6 +327,9 @@ public class Expression {
else if (v1.type == Type.TIMESTAMP && v2.type == Type.INTERVAL) {
exec.stackPush(new Var(((Interval)v2.value).timestampChange((Timestamp)v1.value, true /*add*/), v1.scale));
}
+ else {
+ evalNull();
+ }
}
/**
@@ -350,6 +353,9 @@ public class Expression {
else if (v1.type == Type.TIMESTAMP && v2.type == Type.INTERVAL) {
exec.stackPush(new Var(((Interval)v2.value).timestampChange((Timestamp)v1.value, false /*subtract*/), v1.scale));
}
+ else {
+ evalNull();
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/main/java/org/apache/hive/hplsql/Meta.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Meta.java b/hplsql/src/main/java/org/apache/hive/hplsql/Meta.java
index 97d0fd2..485bcdf 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Meta.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Meta.java
@@ -18,12 +18,106 @@
package org.apache.hive.hplsql;
+import java.sql.ResultSet;
import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.antlr.v4.runtime.ParserRuleContext;
/**
* Metadata
*/
public class Meta {
+
+ HashMap<String, HashMap<String, Row>> dataTypes = new HashMap<String, HashMap<String, Row>>();
+
+ Exec exec;
+ boolean trace = false;
+ boolean info = false;
+
+ Meta(Exec e) {
+ exec = e;
+ trace = exec.getTrace();
+ info = exec.getInfo();
+ }
+
+ /**
+ * Get the data type of column (column name is qualified i.e. schema.table.column)
+ */
+ String getDataType(ParserRuleContext ctx, String conn, String column) {
+ String type = null;
+ HashMap<String, Row> map = dataTypes.get(conn);
+ if (map == null) {
+ map = new HashMap<String, Row>();
+ dataTypes.put(conn, map);
+ }
+ ArrayList<String> twoparts = splitIdentifierToTwoParts(column);
+ if (twoparts != null) {
+ String tab = twoparts.get(0);
+ String col = twoparts.get(1).toUpperCase();
+ Row row = map.get(tab);
+ if (row != null) {
+ type = row.getType(col);
+ }
+ else {
+ row = readColumns(ctx, conn, tab, map);
+ if (row != null) {
+ type = row.getType(col);
+ }
+ }
+ }
+ return type;
+ }
+
+ /**
+ * Get data types for all columns of the table
+ */
+ Row getRowDataType(ParserRuleContext ctx, String conn, String table) {
+ HashMap<String, Row> map = dataTypes.get(conn);
+ if (map == null) {
+ map = new HashMap<String, Row>();
+ dataTypes.put(conn, map);
+ }
+ Row row = map.get(table);
+ if (row == null) {
+ row = readColumns(ctx, conn, table, map);
+ }
+ return row;
+ }
+
+ /**
+ * Read the column data from the database and cache it
+ */
+ Row readColumns(ParserRuleContext ctx, String conn, String table, HashMap<String, Row> map) {
+ Row row = null;
+ String sql = null;
+ Conn.Type connType = exec.getConnectionType(conn);
+ if (connType == Conn.Type.HIVE) {
+ sql = "DESCRIBE " + table;
+ }
+ if (sql != null) {
+ Query query = new Query(sql);
+ exec.executeQuery(ctx, query, conn);
+ if (!query.error()) {
+ ResultSet rs = query.getResultSet();
+ try {
+ while (rs.next()) {
+ String col = rs.getString(1);
+ String typ = rs.getString(2);
+ if (row == null) {
+ row = new Row();
+ }
+ row.addColumn(col.toUpperCase(), typ);
+ }
+ map.put(table, row);
+ }
+ catch (Exception e) {}
+ }
+ exec.closeQuery(query, conn);
+ }
+ return row;
+ }
+
/**
* Normalize identifier name (convert "" [] to `` i.e.)
*/
@@ -55,6 +149,30 @@ public class Meta {
}
/**
+ * Split qualified object to 2 parts: schema.tab.col -> schema.tab|col; tab.col -> tab|col
+ */
+ public ArrayList<String> splitIdentifierToTwoParts(String name) {
+ ArrayList<String> parts = splitIdentifier(name);
+ ArrayList<String> twoparts = null;
+ if (parts != null) {
+ StringBuilder id = new StringBuilder();
+ int i = 0;
+ for (; i < parts.size() - 1; i++) {
+ id.append(parts.get(i));
+ if (i + 1 < parts.size() - 1) {
+ id.append(".");
+ }
+ }
+ twoparts = new ArrayList<String>();
+ twoparts.add(id.toString());
+ id.setLength(0);
+ id.append(parts.get(i));
+ twoparts.add(id.toString());
+ }
+ return twoparts;
+ }
+
+ /**
* Split identifier to parts (schema, table, colum name etc.)
* @return null if identifier contains single part
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/main/java/org/apache/hive/hplsql/Row.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Row.java b/hplsql/src/main/java/org/apache/hive/hplsql/Row.java
new file mode 100644
index 0000000..deeacaf
--- /dev/null
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Row.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.hplsql;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+
+/**
+ * Table row (all columns)
+ */
+public class Row {
+
+ ArrayList<Column> columns = new ArrayList<Column>();
+ HashMap<String, Column> columnMap = new HashMap<String, Column>();
+
+ /**
+ * Constructors
+ */
+ Row() {}
+
+ Row(Row row) {
+ for (Column c : row.columns) {
+ addColumn(c.name, c.type);
+ }
+ }
+
+ /**
+ * Add a column with specified data type
+ */
+ void addColumn(String name, String type) {
+ Column column = new Column(name, type);
+ columns.add(column);
+ columnMap.put(name, column);
+ }
+
+ /**
+ * Get the data type by column name
+ */
+ String getType(String name) {
+ Column column = columnMap.get(name);
+ if (column != null) {
+ return column.getType();
+ }
+ return null;
+ }
+
+ /**
+ * Get value by index
+ */
+ Var getValue(int i) {
+ return columns.get(i).getValue();
+ }
+
+ /**
+ * Get value by column name
+ */
+ Var getValue(String name) {
+ Column column = columnMap.get(name.toUpperCase());
+ if (column != null) {
+ return column.getValue();
+ }
+ return null;
+ }
+
+ /**
+ * Get columns
+ */
+ ArrayList<Column> getColumns() {
+ return columns;
+ }
+
+ /**
+ * Get the number of columns
+ */
+ int size() {
+ return columns.size();
+ }
+}
+
+
+
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/main/java/org/apache/hive/hplsql/Select.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Select.java b/hplsql/src/main/java/org/apache/hive/hplsql/Select.java
index 32ca74a..71ca848 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Select.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Select.java
@@ -92,11 +92,15 @@ public class Select {
String into_name = getIntoVariable(ctx, i - 1);
Var var = exec.findVariable(into_name);
if (var != null) {
- var.setValue(rs, rm, i);
+ if (var.type != Var.Type.ROW) {
+ var.setValue(rs, rm, i);
+ }
+ else {
+ var.setValues(rs, rm);
+ }
if (trace) {
- trace(ctx, "COLUMN: " + rm.getColumnName(i) + ", " + rm.getColumnTypeName(i));
- trace(ctx, "SET " + var.getName() + " = " + var.toString());
- }
+ trace(ctx, var, rs, rm, i);
+ }
}
else {
trace(ctx, "Variable not found: " + into_name);
@@ -439,4 +443,8 @@ public class Select {
void trace(ParserRuleContext ctx, String message) {
exec.trace(ctx, message);
}
+
+ void trace(ParserRuleContext ctx, Var var, ResultSet rs, ResultSetMetaData rm, int idx) throws SQLException {
+ exec.trace(ctx, var, rs, rm, idx);
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/main/java/org/apache/hive/hplsql/Stmt.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Stmt.java b/hplsql/src/main/java/org/apache/hive/hplsql/Stmt.java
index 77c3c4f..6193f49 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Stmt.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Stmt.java
@@ -381,11 +381,15 @@ public class Stmt {
for(int i=1; i <= cols; i++) {
Var var = exec.findVariable(ctx.L_ID(i).getText());
if(var != null) {
- var.setValue(rs, rsm, i);
- if(trace) {
- trace(ctx, "COLUMN: " + rsm.getColumnName(i) + ", " + rsm.getColumnTypeName(i));
- trace(ctx, "SET " + var.getName() + " = " + var.toString());
- }
+ if (var.type != Var.Type.ROW) {
+ var.setValue(rs, rsm, i);
+ }
+ else {
+ var.setValues(rs, rsm);
+ }
+ if (trace) {
+ trace(ctx, var, rs, rsm, i);
+ }
}
else if(trace) {
trace(ctx, "Variable not found: " + ctx.L_ID(i).getText());
@@ -738,19 +742,16 @@ public class Stmt {
if (rs != null) {
ResultSetMetaData rm = rs.getMetaData();
int cols = rm.getColumnCount();
- Var[] vars = new Var[cols];
- for (int i = 0; i < cols; i++) {
- vars[i] = new Var();
- vars[i].setName(cursor + "." + rm.getColumnName(i + 1));
- vars[i].setType(rm.getColumnType(i + 1));
- exec.addVariable(vars[i]);
- if (trace) {
- trace(ctx, "Column: " + vars[i].getName() + " " + rm.getColumnTypeName(i + 1));
- }
- }
+ Row row = new Row();
+ for (int i = 1; i <= cols; i++) {
+ row.addColumn(rm.getColumnName(i), rm.getColumnTypeName(i));
+ }
+ Var var = new Var(cursor, row);
+ exec.addVariable(var);
while (rs.next()) {
- for (int i = 0; i < cols; i++) {
- vars[i].setValue(rs, rm, i + 1);
+ var.setValues(rs, rm);
+ if (trace) {
+ trace(ctx, var, rs, rm, 0);
}
visit(ctx.block());
exec.incRowCount();
@@ -817,21 +818,24 @@ public class Stmt {
ResultSet rs = query.getResultSet();
if (rs != null) {
try {
- ResultSetMetaData rsm = rs.getMetaData();
- // Assign to variables
- if(ctx.T_INTO() != null) {
+ ResultSetMetaData rm = rs.getMetaData();
+ if (ctx.T_INTO() != null) {
int cols = ctx.L_ID().size();
- if(rs.next()) {
- for(int i=0; i < cols; i++) {
+ if (rs.next()) {
+ for (int i = 0; i < cols; i++) {
Var var = exec.findVariable(ctx.L_ID(i).getText());
- if(var != null) {
- var.setValue(rs, rsm, i+1);
- if(trace) {
- trace(ctx, "COLUMN: " + rsm.getColumnName(i+1) + ", " + rsm.getColumnTypeName(i+1));
- trace(ctx, "SET " + var.getName() + " = " + var.toString());
+ if (var != null) {
+ if (var.type != Var.Type.ROW) {
+ var.setValue(rs, rm, i + 1);
+ }
+ else {
+ var.setValues(rs, rm);
+ }
+ if (trace) {
+ trace(ctx, var, rs, rm, i + 1);
}
}
- else if(trace) {
+ else if (trace) {
trace(ctx, "Variable not found: " + ctx.L_ID(i).getText());
}
}
@@ -840,7 +844,7 @@ public class Stmt {
}
// Print the results
else {
- int cols = rsm.getColumnCount();
+ int cols = rm.getColumnCount();
while(rs.next()) {
for(int i = 1; i <= cols; i++) {
if(i > 1) {
@@ -864,8 +868,11 @@ public class Stmt {
* EXEC to execute a stored procedure
*/
public Boolean execProc(HplsqlParser.Exec_stmtContext ctx) {
- if (exec.function.execProc(ctx.expr_func_params(), evalPop(ctx.expr()).toString())) {
- return true;
+ String name = evalPop(ctx.expr()).toString();
+ if (exec.function.isProc(name)) {
+ if (exec.function.execProc(ctx.expr_func_params(), name)) {
+ return true;
+ }
}
return false;
}
@@ -1118,4 +1125,8 @@ public class Stmt {
void trace(ParserRuleContext ctx, String message) {
exec.trace(ctx, message);
}
+
+ void trace(ParserRuleContext ctx, Var var, ResultSet rs, ResultSetMetaData rm, int idx) throws SQLException {
+ exec.trace(ctx, var, rs, rm, idx);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/main/java/org/apache/hive/hplsql/Var.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Var.java b/hplsql/src/main/java/org/apache/hive/hplsql/Var.java
index 95eb526..b31a14d 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Var.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Var.java
@@ -32,7 +32,10 @@ import java.sql.Timestamp;
public class Var {
// Data types
- public enum Type {BOOL, CURSOR, DATE, DECIMAL, FILE, IDENT, BIGINT, INTERVAL, RS_LOCATOR, STRING, STRINGLIST, TIMESTAMP, NULL};
+ public enum Type {BOOL, CURSOR, DATE, DECIMAL, DERIVED_TYPE, DERIVED_ROWTYPE, FILE, IDENT, BIGINT, INTERVAL, ROW,
+ RS_LOCATOR, STRING, STRINGLIST, TIMESTAMP, NULL};
+ public static final String DERIVED_TYPE = "DERIVED%TYPE";
+ public static final String DERIVED_ROWTYPE = "DERIVED%ROWTYPE";
public static Var Empty = new Var();
public static Var Null = new Var(Type.NULL);
@@ -102,6 +105,12 @@ public class Var {
value = b;
}
+ public Var(String name, Row row) {
+ this.name = name;
+ this.type = Type.ROW;
+ this.value = new Row(row);
+ }
+
public Var(Type type, String name) {
this.type = type;
this.name = name;
@@ -140,9 +149,13 @@ public class Var {
* Cast a new value to the variable
*/
public Var cast(Var val) {
- if (val == null || val.value == null) {
+ if (val == null || val.value == null) {
value = null;
}
+ else if (type == Type.DERIVED_TYPE) {
+ type = val.type;
+ value = val.value;
+ }
else if (type == val.type && type == Type.STRING) {
cast((String)val.value);
}
@@ -211,7 +224,7 @@ public class Var {
}
/**
- * Set the new value from a result set
+ * Set the new value from the result set
*/
public Var setValue(ResultSet rs, ResultSetMetaData rsm, int idx) throws SQLException {
int type = rsm.getColumnType(idx);
@@ -227,6 +240,21 @@ public class Var {
}
return this;
}
+
+ /**
+ * Set ROW values from the result set
+ */
+ public Var setValues(ResultSet rs, ResultSetMetaData rsm) throws SQLException {
+ Row row = (Row)this.value;
+ int idx = 1;
+ for (Column column : row.getColumns()) {
+ Var var = new Var(column.getName(), column.getType(), null, null, null);
+ var.setValue(rs, rsm, idx);
+ column.setValue(var);
+ idx++;
+ }
+ return this;
+ }
/**
* Set the data type from string representation
@@ -274,6 +302,9 @@ public class Var {
else if (type.toUpperCase().startsWith("RESULT_SET_LOCATOR")) {
return Type.RS_LOCATOR;
}
+ else if (type.equalsIgnoreCase(Var.DERIVED_TYPE)) {
+ return Type.DERIVED_TYPE;
+ }
return Type.NULL;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/main/java/org/apache/hive/hplsql/functions/Function.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/functions/Function.java b/hplsql/src/main/java/org/apache/hive/hplsql/functions/Function.java
index ab0814d..ae7acae 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/functions/Function.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/functions/Function.java
@@ -71,6 +71,9 @@ public class Function {
if (execUser(ctx, name)) {
return;
}
+ else if (isProc(name) && execProc(ctx, name)) {
+ return;
+ }
if (name.indexOf(".") != -1) { // Name can be qualified and spaces are allowed between parts
String[] parts = name.split("\\.");
StringBuilder str = new StringBuilder();
@@ -201,6 +204,16 @@ public class Function {
}
/**
+ * Check if the stored procedure with the specified name is defined
+ */
+ public boolean isProc(String name) {
+ if (procMap.get(name.toUpperCase()) != null) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
* Execute a stored procedure using CALL or EXEC statement passing parameters
*/
public boolean execProc(HplsqlParser.Expr_func_paramsContext ctx, String name) {
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlLocal.java
----------------------------------------------------------------------
diff --git a/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlLocal.java b/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlLocal.java
index 4a08a18..6a67cd0 100644
--- a/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlLocal.java
+++ b/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlLocal.java
@@ -92,6 +92,11 @@ public class TestHplsqlLocal {
public void testCreateProcedure() throws Exception {
run("create_procedure");
}
+
+ @Test
+ public void testCreateProcedureNoParams() throws Exception {
+ run("create_procedure_no_params");
+ }
@Test
public void testDate() throws Exception {
@@ -301,7 +306,7 @@ public class TestHplsqlLocal {
BufferedReader reader = new BufferedReader(new StringReader(s));
String line = null;
while ((line = reader.readLine()) != null) {
- if (!line.startsWith("log4j:")) {
+ if (!line.startsWith("log4j:") && !line.contains("INFO Log4j")) {
sb.append(line);
sb.append("\n");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlOffline.java
----------------------------------------------------------------------
diff --git a/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlOffline.java b/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlOffline.java
index 0063cac..eeaa395 100644
--- a/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlOffline.java
+++ b/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlOffline.java
@@ -66,7 +66,7 @@ public class TestHplsqlOffline {
BufferedReader reader = new BufferedReader(new StringReader(s));
String line = null;
while ((line = reader.readLine()) != null) {
- if (!line.startsWith("log4j:")) {
+ if (!line.startsWith("log4j:") && !line.contains("INFO Log4j")) {
sb.append(line);
sb.append("\n");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/test/queries/db/rowtype_attribute.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/rowtype_attribute.sql b/hplsql/src/test/queries/db/rowtype_attribute.sql
new file mode 100644
index 0000000..6a84b57
--- /dev/null
+++ b/hplsql/src/test/queries/db/rowtype_attribute.sql
@@ -0,0 +1,22 @@
+DECLARE
+ v1 default.src%ROWTYPE;
+ v2 src %ROWTYPE;
+ v3 src % ROWTYPE;
+ CURSOR c1 IS SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1;
+BEGIN
+ SELECT 'A' AS key, 'B' AS value INTO v1 FROM src LIMIT 1;
+ PRINT v1.key || v1.value;
+
+ OPEN c1;
+ FETCH c1 INTO v2;
+ PRINT v2.key || v2.value;
+ CLOSE c1;
+
+ FOR rec IN (SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1)
+ LOOP
+ PRINT rec.key || rec.value;
+ END LOOP;
+
+ EXECUTE IMMEDIATE 'SELECT ''A'' AS key, ''B'' AS value FROM src LIMIT 1' INTO v3;
+ PRINT v3.key || v3.value;
+END
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/test/queries/db/type_attribute.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/type_attribute.sql b/hplsql/src/test/queries/db/type_attribute.sql
new file mode 100644
index 0000000..2d93bfd
--- /dev/null
+++ b/hplsql/src/test/queries/db/type_attribute.sql
@@ -0,0 +1,8 @@
+DECLARE
+ v1 default.src.key%TYPE;
+ v2 src.Key %TYPE;
+ v3 src.key3 % TYPE;
+BEGIN
+ SELECT 'A', 'B', 1 INTO v1, v2, v3 FROM src LIMIT 1;
+ PRINT v1 || v2 || v3;
+END
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/test/queries/local/create_procedure_no_params.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/local/create_procedure_no_params.sql b/hplsql/src/test/queries/local/create_procedure_no_params.sql
new file mode 100644
index 0000000..535ba98
--- /dev/null
+++ b/hplsql/src/test/queries/local/create_procedure_no_params.sql
@@ -0,0 +1,19 @@
+create procedure sp1
+begin
+ print 'a';
+end;
+
+create procedure sp2()
+begin
+ print 'b';
+end;
+
+call sp1;
+call sp1();
+sp1;
+sp1();
+
+call sp2;
+call sp2();
+sp2;
+sp2();
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/test/queries/offline/create_table_ora.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/offline/create_table_ora.sql b/hplsql/src/test/queries/offline/create_table_ora.sql
index c47c75c..bb1d9c7 100644
--- a/hplsql/src/test/queries/offline/create_table_ora.sql
+++ b/hplsql/src/test/queries/offline/create_table_ora.sql
@@ -1,4 +1,53 @@
-CREATE TABLE ora_t1 (
- n1 NUMBER(3,0),
- v1 VARCHAR2(10)
+CREATE TABLE ora_t1 (
+ n1 NUMBER(3,0),
+ v1 VARCHAR2(10)
+);
+
+CREATE TABLE "USER"."EMP"
+ ( "EMPNO" NUMBER(4,0),
+ "ENAME" VARCHAR2(10 BYTE),
+ "JOB" VARCHAR2(9 BYTE),
+ "MGR" NUMBER(4,0),
+ "HIREDATE" DATE,
+ "SAL" NUMBER(7,2),
+ "COMM" NUMBER(7,2),
+ "DEPTNO" NUMBER(2,0)
+ ) SEGMENT CREATION IMMEDIATE
+ PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255 NOCOMPRESS LOGGING
+ STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
+ PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
+ TABLESPACE "USERS" ;
+
+CREATE TABLE language (
+ id NUMBER(7) NOT NULL PRIMARY KEY,
+ cd CHAR(2) NOT NULL,
+ description VARCHAR2(50)
+);
+CREATE TABLE author (
+ id NUMBER(7) NOT NULL PRIMARY KEY,
+ first_name VARCHAR2(50),
+ last_name VARCHAR2(50) NOT NULL,
+ date_of_birth DATE,
+ year_of_birth NUMBER(7),
+ distinguished NUMBER(1)
+);
+CREATE TABLE book (
+ id NUMBER(7) NOT NULL PRIMARY KEY,
+ author_id NUMBER(7) NOT NULL,
+ title VARCHAR2(400) NOT NULL,
+ published_in NUMBER(7) NOT NULL,
+ language_id NUMBER(7) NOT NULL,
+ CONSTRAINT fk_book_author FOREIGN KEY (author_id) REFERENCES author(id),
+ CONSTRAINT fk_book_language FOREIGN KEY (language_id) REFERENCES language(id)
+);
+CREATE TABLE book_store (
+ name VARCHAR2(400) NOT NULL UNIQUE
+);
+CREATE TABLE book_to_book_store (
+ name VARCHAR2(400) NOT NULL,
+ book_id INTEGER NOT NULL,
+ stock INTEGER,
+ PRIMARY KEY(name, book_id),
+ CONSTRAINT fk_b2bs_book_store FOREIGN KEY (name) REFERENCES book_store (name) ON DELETE CASCADE,
+ CONSTRAINT fk_b2bs_book FOREIGN KEY (book_id) REFERENCES book (id) ON DELETE CASCADE
);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/test/results/db/rowtype_attribute.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/rowtype_attribute.out.txt b/hplsql/src/test/results/db/rowtype_attribute.out.txt
new file mode 100644
index 0000000..fc22370
--- /dev/null
+++ b/hplsql/src/test/results/db/rowtype_attribute.out.txt
@@ -0,0 +1,42 @@
+Ln:2 DECLARE v1 default.src%ROWTYPE
+Ln:3 DECLARE v2 src%ROWTYPE
+Ln:4 DECLARE v3 src%ROWTYPE
+Ln:5 DECLARE CURSOR c1
+Ln:7 SELECT
+Ln:7 SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1
+Ln:7 SELECT completed successfully
+Ln:7 SELECT INTO statement executed
+Ln:7 COLUMN: key, string
+Ln:7 SET KEY = A
+Ln:7 COLUMN: value, string
+Ln:7 SET VALUE = B
+Ln:8 PRINT
+AB
+Ln:10 OPEN
+Ln:10 c1: SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1
+Ln:11 FETCH
+Ln:11 COLUMN: key, string
+Ln:11 SET KEY = A
+Ln:11 COLUMN: value, string
+Ln:11 SET VALUE = B
+Ln:12 PRINT
+AB
+Ln:13 CLOSE
+Ln:15 FOR CURSOR - ENTERED
+Ln:15 SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1
+Ln:15 SELECT completed successfully
+Ln:15 COLUMN: key, string
+Ln:15 SET key = A
+Ln:15 COLUMN: value, string
+Ln:15 SET value = B
+Ln:17 PRINT
+null
+Ln:15 FOR CURSOR - LEFT
+Ln:20 EXECUTE
+Ln:20 SQL statement: SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1
+Ln:20 COLUMN: key, string
+Ln:20 SET KEY = A
+Ln:20 COLUMN: value, string
+Ln:20 SET VALUE = B
+Ln:21 PRINT
+AB
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/test/results/db/type_attribute.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/type_attribute.out.txt b/hplsql/src/test/results/db/type_attribute.out.txt
new file mode 100644
index 0000000..e236f5b
--- /dev/null
+++ b/hplsql/src/test/results/db/type_attribute.out.txt
@@ -0,0 +1,15 @@
+Ln:2 DECLARE v1 string
+Ln:3 DECLARE v2 string
+Ln:4 DECLARE v3 DERIVED%TYPE
+Ln:6 SELECT
+Ln:6 SELECT 'A', 'B', 1 FROM src LIMIT 1
+Ln:6 SELECT completed successfully
+Ln:6 SELECT INTO statement executed
+Ln:6 COLUMN: _c0, string
+Ln:6 SET v1 = A
+Ln:6 COLUMN: _c1, string
+Ln:6 SET v2 = B
+Ln:6 COLUMN: _c2, int
+Ln:6 SET v3 = 1
+Ln:7 PRINT
+AB1
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/test/results/local/create_procedure_no_params.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/local/create_procedure_no_params.out.txt b/hplsql/src/test/results/local/create_procedure_no_params.out.txt
new file mode 100644
index 0000000..9e3bde9
--- /dev/null
+++ b/hplsql/src/test/results/local/create_procedure_no_params.out.txt
@@ -0,0 +1,26 @@
+Ln:1 CREATE PROCEDURE sp1
+Ln:6 CREATE PROCEDURE sp2
+EXEC PROCEDURE sp1
+Ln:3 PRINT
+a
+EXEC PROCEDURE sp1
+Ln:3 PRINT
+a
+EXEC PROCEDURE sp1
+Ln:3 PRINT
+a
+EXEC PROCEDURE sp1
+Ln:3 PRINT
+a
+EXEC PROCEDURE sp2
+Ln:8 PRINT
+b
+EXEC PROCEDURE sp2
+Ln:8 PRINT
+b
+EXEC PROCEDURE sp2
+Ln:8 PRINT
+b
+EXEC PROCEDURE sp2
+Ln:8 PRINT
+b
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/da95f633/hplsql/src/test/results/offline/create_table_ora.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/offline/create_table_ora.out.txt b/hplsql/src/test/results/offline/create_table_ora.out.txt
index 33af104..cf30c0f 100644
--- a/hplsql/src/test/results/offline/create_table_ora.out.txt
+++ b/hplsql/src/test/results/offline/create_table_ora.out.txt
@@ -1,4 +1,42 @@
Ln:1 CREATE TABLE
Ln:1 CREATE TABLE ora_t1 (n1 DECIMAL(3,0),
v1 STRING
+)
+Ln:6 CREATE TABLE
+Ln:6 CREATE TABLE `USER`.`EMP` (`EMPNO` DECIMAL(4,0),
+`ENAME` STRING,
+`JOB` STRING,
+`MGR` DECIMAL(4,0),
+`HIREDATE` DATE,
+`SAL` DECIMAL(7,2),
+`COMM` DECIMAL(7,2),
+`DEPTNO` DECIMAL(2,0)
+)
+Ln:21 CREATE TABLE
+Ln:21 CREATE TABLE language (id DECIMAL(7),
+cd CHAR(2),
+description STRING
+)
+Ln:26 CREATE TABLE
+Ln:26 CREATE TABLE author (id DECIMAL(7),
+first_name STRING,
+last_name STRING,
+date_of_birth DATE,
+year_of_birth DECIMAL(7),
+distinguished DECIMAL(1)
+)
+Ln:34 CREATE TABLE
+Ln:34 CREATE TABLE book (id DECIMAL(7),
+author_id DECIMAL(7),
+title STRING,
+published_in DECIMAL(7),
+language_id DECIMAL(7)
+)
+Ln:43 CREATE TABLE
+Ln:43 CREATE TABLE book_store (name STRING
+)
+Ln:46 CREATE TABLE
+Ln:46 CREATE TABLE book_to_book_store (name STRING,
+book_id INTEGER,
+stock INTEGER
)
\ No newline at end of file
[27/28] hive git commit: HIVE-11712: Duplicate groupby keys cause
ClassCastException (Jimmy, reviewed by Xuefu)
Posted by se...@apache.org.
HIVE-11712: Duplicate groupby keys cause ClassCastException (Jimmy, reviewed by Xuefu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bb4f5e70
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bb4f5e70
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bb4f5e70
Branch: refs/heads/llap
Commit: bb4f5e702b11720ca54e43ba4a6c3aff099b0f4c
Parents: c40382d
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Tue Sep 1 11:48:36 2015 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Thu Sep 3 09:57:23 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 4 +
.../queries/clientpositive/groupby1_map_nomap.q | 2 +
ql/src/test/queries/clientpositive/groupby6.q | 2 +
.../clientpositive/groupby_grouping_id2.q | 2 +
.../clientpositive/groupby_ppr_multi_distinct.q | 2 +
ql/src/test/queries/clientpositive/having2.q | 27 +
.../clientpositive/groupby1_map_nomap.q.out | 8 +-
.../test/results/clientpositive/groupby6.q.out | 8 +-
.../clientpositive/groupby_duplicate_key.q.out | 16 +-
.../clientpositive/groupby_grouping_id2.q.out | 28 +-
.../groupby_ppr_multi_distinct.q.out | 8 +-
.../test/results/clientpositive/having2.q.out | 353 ++++++++++++
.../spark/groupby1_map_nomap.q.out | 564 ++++++++++---------
.../results/clientpositive/spark/groupby6.q.out | 20 +-
.../spark/groupby_grouping_id2.q.out | 38 +-
.../spark/groupby_ppr_multi_distinct.q.out | 16 +-
16 files changed, 761 insertions(+), 337 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index b809a23..778c7b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -4620,6 +4620,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr,
groupByInputRowResolver);
+ if (ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
+ // Skip duplicated grouping keys
+ continue;
+ }
groupByKeys.add(grpByExprNode);
String field = getColumnInternalName(i);
outputColumnNames.add(field);
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby1_map_nomap.q b/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
index eb09a9c..b22a61e 100644
--- a/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
+++ b/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
@@ -2,6 +2,8 @@ set hive.map.aggr=true;
set hive.groupby.skewindata=false;
set hive.groupby.mapaggr.checkinterval=20;
+-- SORT_QUERY_RESULTS
+
CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;
EXPLAIN
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby6.q b/ql/src/test/queries/clientpositive/groupby6.q
index 3a3cc58..17597cb 100755
--- a/ql/src/test/queries/clientpositive/groupby6.q
+++ b/ql/src/test/queries/clientpositive/groupby6.q
@@ -1,6 +1,8 @@
set hive.map.aggr=false;
set hive.groupby.skewindata=true;
+-- SORT_QUERY_RESULTS
+
CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE;
EXPLAIN
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
index f451f17..5c05aad 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
@@ -4,6 +4,8 @@ LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;
set hive.groupby.skewindata = true;
+-- SORT_QUERY_RESULTS
+
SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP;
SELECT GROUPING__ID, count(*)
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q b/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
index 20c73bd..1249853 100644
--- a/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
+++ b/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
@@ -1,6 +1,8 @@
set hive.map.aggr=false;
set hive.groupby.skewindata=false;
+-- SORT_QUERY_RESULTS
+
CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;
EXPLAIN EXTENDED
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/having2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/having2.q b/ql/src/test/queries/clientpositive/having2.q
index 282b2c0..83ae1e1 100644
--- a/ql/src/test/queries/clientpositive/having2.q
+++ b/ql/src/test/queries/clientpositive/having2.q
@@ -63,3 +63,30 @@ SELECT customer_name, SUM(customer_balance), SUM(order_quantity) FROM default.te
(SUM(customer_balance) <= 4074689.000000041)
AND (COUNT(s1.discount) <= 822)
);
+
+explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+);
+
+explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name, s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+);
+
+explain
+SELECT distinct s1.customer_name as x, s1.customer_name as y
+FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+);
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
index cc985a5..7cdf240 100644
--- a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
+++ b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out
index b790224..d8cb2ac 100644
--- a/ql/src/test/results/clientpositive/groupby6.q.out
+++ b/ql/src/test/results/clientpositive/groupby6.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out b/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
index 2f2a6e6..fc95f41 100644
--- a/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
+++ b/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
@@ -21,14 +21,14 @@ STAGE PLANS:
outputColumnNames: key
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: key (type: string), '' (type: string), '' (type: string)
+ keys: key (type: string), '' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col2 (type: string)
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Group By Operator
@@ -99,14 +99,14 @@ STAGE PLANS:
outputColumnNames: key
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: key (type: string), 'X' (type: string), 'X' (type: string)
+ keys: key (type: string), 'X' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col2 (type: string)
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out b/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
index 4a0a9d2..544a7ae 100644
--- a/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
+++ b/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
@@ -14,25 +14,29 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@t1
-PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
-POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
-NULL NULL 0 6
+1 1 3 1
1 NULL 1 2
1 NULL 3 1
-1 1 3 1
-2 NULL 1 1
2 2 3 1
+2 NULL 1 1
+3 3 3 1
3 NULL 1 2
3 NULL 3 1
-3 3 3 1
-4 NULL 1 1
4 5 3 1
+4 NULL 1 1
+NULL NULL 0 6
PREHOOK: query: SELECT GROUPING__ID, count(*)
FROM
(
@@ -129,17 +133,17 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key,
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
-NULL NULL 0 6
+1 1 3 1
1 NULL 1 2
1 NULL 3 1
-1 1 3 1
-2 NULL 1 1
2 2 3 1
+2 NULL 1 1
+3 3 3 1
3 NULL 1 2
3 NULL 3 1
-3 3 3 1
-4 NULL 1 1
4 5 3 1
+4 NULL 1 1
+NULL NULL 0 6
PREHOOK: query: SELECT GROUPING__ID, count(*)
FROM
(
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
index c50abde..6eb3f66 100644
--- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/having2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/having2.q.out b/ql/src/test/results/clientpositive/having2.q.out
index aafd3b6..ba601f9 100644
--- a/ql/src/test/results/clientpositive/having2.q.out
+++ b/ql/src/test/results/clientpositive/having2.q.out
@@ -242,3 +242,356 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: customer_name is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: customer_name (type: string)
+ sort order: +
+ Map-reduce partition columns: customer_name (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: discount (type: double), customer_balance (type: double)
+ TableScan
+ alias: s2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 customer_name (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col6, _col18, _col21, _col54
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double), _col54 (type: string)
+ outputColumnNames: _col18, _col21, _col6, _col54
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col21), avg(_col6), count(_col54)
+ keys: _col18 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col1 <= 4074689.000000041) and (_col2 <= 822.0)) and (_col3 > 4)) (type: boolean)
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name, s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+GROUP BY s1.customer_name, s1.customer_name
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: customer_name is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: customer_name (type: string)
+ sort order: +
+ Map-reduce partition columns: customer_name (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: discount (type: double), customer_balance (type: double)
+ TableScan
+ alias: s2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 customer_name (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col6, _col18, _col21, _col54
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double), _col54 (type: string)
+ outputColumnNames: _col18, _col21, _col6, _col54
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col21), avg(_col6), count(_col54)
+ keys: _col18 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+ keys: KEY._col0 (type: string), KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: bigint)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col2 <= 4074689.000000041) and (_col3 <= 822.0)) and (_col4 > 4)) (type: boolean)
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT distinct s1.customer_name as x, s1.customer_name as y
+FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT distinct s1.customer_name as x, s1.customer_name as y
+FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
+HAVING (
+(SUM(s1.customer_balance) <= 4074689.000000041)
+AND (AVG(s1.discount) <= 822)
+AND (COUNT(s2.value) > 4)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: customer_name is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: customer_name (type: string)
+ sort order: +
+ Map-reduce partition columns: customer_name (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: discount (type: double), customer_balance (type: double)
+ TableScan
+ alias: s2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 customer_name (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col6, _col18, _col21, _col54
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double), _col54 (type: string)
+ outputColumnNames: _col18, _col21, _col6, _col54
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col21), avg(_col6), count(_col54)
+ keys: _col18 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+ keys: KEY._col0 (type: string), KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: bigint)
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col2 <= 4074689.000000041) and (_col3 <= 822.0)) and (_col4 > 4)) (type: boolean)
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
index 8fd9661..0799ff5 100644
--- a/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
@@ -97,312 +101,312 @@ POSTHOOK: query: SELECT dest1.* FROM dest1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
#### A masked pattern was here ####
-273 819.0
-275 275.0
-419 419.0
-118 236.0
-202 202.0
-282 564.0
-82 82.0
+0 0.0
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
116 116.0
-345 345.0
-332 332.0
-19 19.0
-42 84.0
-459 918.0
-190 190.0
-257 257.0
+118 236.0
+119 357.0
+12 24.0
+120 240.0
+125 250.0
+126 126.0
+128 384.0
+129 258.0
+131 131.0
+133 133.0
134 268.0
-165 330.0
+136 136.0
+137 274.0
138 552.0
-222 222.0
+143 143.0
+145 145.0
+146 292.0
+149 298.0
+15 30.0
+150 150.0
+152 304.0
+153 153.0
+155 155.0
+156 156.0
+157 157.0
+158 158.0
+160 160.0
+162 162.0
163 163.0
-219 438.0
-411 411.0
-305 305.0
-479 479.0
-28 28.0
-318 954.0
-244 244.0
+164 328.0
+165 330.0
+166 166.0
+167 501.0
+168 168.0
+169 676.0
+17 17.0
+170 170.0
+172 344.0
+174 348.0
+175 350.0
+176 352.0
+177 177.0
+178 178.0
+179 358.0
+18 36.0
+180 180.0
+181 181.0
+183 183.0
+186 186.0
+187 561.0
+189 189.0
+19 19.0
+190 190.0
+191 382.0
+192 192.0
+193 579.0
+194 194.0
+195 390.0
+196 196.0
+197 394.0
+199 597.0
+2 2.0
+20 20.0
+200 400.0
+201 201.0
+202 202.0
+203 406.0
+205 410.0
+207 414.0
208 624.0
-136 136.0
-24 48.0
-239 478.0
-84 168.0
-11 11.0
-367 734.0
-288 576.0
-150 150.0
-402 402.0
-466 1398.0
+209 418.0
+213 426.0
+214 214.0
+216 432.0
+217 434.0
+218 218.0
+219 438.0
+221 442.0
+222 222.0
+223 446.0
224 448.0
+226 226.0
+228 228.0
+229 458.0
+230 1150.0
+233 466.0
+235 235.0
237 474.0
-105 105.0
-484 484.0
-20 20.0
-400 400.0
-97 194.0
-280 560.0
-255 510.0
-103 206.0
+238 476.0
+239 478.0
+24 48.0
+241 241.0
242 484.0
-323 323.0
-309 618.0
-365 365.0
-178 178.0
+244 244.0
+247 247.0
+248 248.0
+249 249.0
+252 252.0
+255 510.0
+256 512.0
+257 257.0
+258 258.0
26 52.0
-404 808.0
-196 196.0
-448 448.0
-462 924.0
-389 389.0
-338 338.0
-167 501.0
-493 493.0
-33 33.0
-152 304.0
-477 477.0
-431 1293.0
-316 948.0
-125 250.0
-444 444.0
-457 457.0
-446 446.0
-310 310.0
-129 258.0
-183 183.0
-392 392.0
+260 260.0
+262 262.0
+263 263.0
+265 530.0
+266 266.0
+27 27.0
+272 544.0
+273 819.0
+274 274.0
+275 275.0
277 1108.0
-4 4.0
-80 80.0
-228 228.0
-145 145.0
-356 356.0
+278 556.0
+28 28.0
+280 560.0
+281 562.0
+282 564.0
+283 283.0
284 284.0
-455 455.0
-53 53.0
-149 298.0
-424 848.0
-37 74.0
+285 285.0
286 286.0
-327 981.0
-170 170.0
-187 561.0
-86 86.0
+287 287.0
+288 576.0
+289 289.0
291 291.0
-233 466.0
-439 878.0
-266 266.0
-2 2.0
-396 1188.0
+292 292.0
+296 296.0
+298 894.0
+30 30.0
+302 302.0
+305 305.0
+306 306.0
+307 614.0
+308 308.0
+309 618.0
+310 310.0
+311 933.0
+315 315.0
+316 948.0
+317 634.0
+318 954.0
+321 642.0
+322 644.0
+323 323.0
+325 650.0
+327 981.0
+33 33.0
+331 662.0
+332 332.0
+333 666.0
+335 335.0
336 336.0
-226 226.0
-176 352.0
-66 66.0
-497 497.0
-172 344.0
-491 491.0
-44 44.0
-200 400.0
-235 235.0
-77 77.0
-260 260.0
-406 1624.0
-460 460.0
-495 495.0
-143 143.0
-189 189.0
-453 453.0
-64 64.0
-158 158.0
+338 338.0
+339 339.0
+34 34.0
341 341.0
-475 475.0
-8 8.0
-394 394.0
-57 57.0
-169 676.0
-15 30.0
+342 684.0
+344 688.0
+345 345.0
+348 1740.0
35 105.0
-174 348.0
-325 650.0
-0 0.0
-248 248.0
-468 1872.0
-435 435.0
-51 102.0
-321 642.0
-413 826.0
+351 351.0
+353 706.0
+356 356.0
+360 360.0
+362 362.0
+364 364.0
+365 365.0
+366 366.0
+367 734.0
+368 368.0
369 1107.0
-480 1440.0
-156 156.0
-192 192.0
-213 426.0
+37 74.0
+373 373.0
374 374.0
-437 437.0
-17 17.0
-181 181.0
-482 482.0
-307 614.0
-194 194.0
-217 434.0
-95 190.0
-114 114.0
-262 262.0
+375 375.0
+377 377.0
378 378.0
-417 1251.0
-281 562.0
-180 180.0
-467 467.0
-201 201.0
-432 432.0
-238 476.0
-96 96.0
+379 379.0
+382 764.0
+384 1152.0
386 386.0
-283 283.0
-168 168.0
-209 418.0
-463 926.0
-377 377.0
-317 634.0
-252 252.0
-104 208.0
-373 373.0
-131 131.0
-494 494.0
-230 1150.0
-83 166.0
-191 382.0
-41 41.0
-193 579.0
-436 436.0
-496 496.0
-166 166.0
-229 458.0
-298 894.0
-133 133.0
-333 666.0
-65 65.0
-292 292.0
-364 364.0
-472 472.0
-274 274.0
-47 47.0
-401 2005.0
-67 134.0
-5 15.0
-18 36.0
-27 27.0
-344 688.0
-409 1227.0
-256 512.0
-85 85.0
-72 144.0
-54 54.0
+389 389.0
+392 392.0
393 393.0
-160 160.0
-438 1314.0
-263 263.0
-351 351.0
-207 414.0
-449 449.0
-111 111.0
-128 384.0
-289 289.0
-399 798.0
-489 1956.0
-205 410.0
-177 177.0
-119 357.0
-331 662.0
-348 1740.0
-478 956.0
-76 152.0
-458 916.0
-382 764.0
-157 157.0
-315 315.0
-469 2345.0
-302 302.0
+394 394.0
395 790.0
-384 1152.0
-162 162.0
-113 226.0
-98 196.0
-221 442.0
-203 406.0
-199 597.0
-454 1362.0
-218 218.0
-241 241.0
-272 544.0
-120 240.0
+396 1188.0
+397 794.0
+399 798.0
+4 4.0
+400 400.0
+401 2005.0
+402 402.0
403 1209.0
-366 366.0
-249 249.0
+404 808.0
+406 1624.0
+407 407.0
+409 1227.0
+41 41.0
+411 411.0
+413 826.0
+414 828.0
+417 1251.0
+418 418.0
+419 419.0
+42 84.0
421 421.0
-214 214.0
-92 92.0
-487 487.0
-258 258.0
+424 848.0
+427 427.0
429 858.0
-265 530.0
-175 350.0
-34 34.0
-368 368.0
-69 69.0
-414 828.0
-30 30.0
-492 984.0
-9 9.0
-296 296.0
-311 933.0
-247 247.0
-164 328.0
-306 306.0
-153 153.0
-339 339.0
-322 644.0
-10 10.0
+43 43.0
430 1290.0
-155 155.0
+431 1293.0
+432 432.0
+435 435.0
+436 436.0
+437 437.0
+438 1314.0
+439 878.0
+44 44.0
+443 443.0
+444 444.0
+446 446.0
+448 448.0
+449 449.0
452 452.0
-179 358.0
+453 453.0
+454 1362.0
+455 455.0
+457 457.0
+458 916.0
+459 918.0
+460 460.0
+462 924.0
+463 926.0
+466 1398.0
+467 467.0
+468 1872.0
+469 2345.0
+47 47.0
+470 470.0
+472 472.0
+475 475.0
+477 477.0
+478 956.0
+479 479.0
+480 1440.0
+481 481.0
+482 482.0
+483 483.0
+484 484.0
485 485.0
+487 487.0
+489 1956.0
490 490.0
-443 443.0
-379 379.0
-186 186.0
-100 200.0
-137 274.0
-483 483.0
-90 270.0
-481 481.0
-287 287.0
-146 292.0
-216 432.0
-342 684.0
-470 470.0
-362 362.0
-375 375.0
-407 407.0
-397 794.0
-58 116.0
+491 491.0
+492 984.0
+493 493.0
+494 494.0
+495 495.0
+496 496.0
+497 497.0
498 1494.0
-87 87.0
-195 390.0
-197 394.0
-78 78.0
-278 556.0
-12 24.0
-335 335.0
-360 360.0
-308 308.0
-223 446.0
-418 418.0
-43 43.0
-353 706.0
-74 74.0
-427 427.0
+5 15.0
+51 102.0
+53 53.0
+54 54.0
+57 57.0
+58 116.0
+64 64.0
+65 65.0
+66 66.0
+67 134.0
+69 69.0
70 210.0
-285 285.0
-126 126.0
+72 144.0
+74 74.0
+76 152.0
+77 77.0
+78 78.0
+8 8.0
+80 80.0
+82 82.0
+83 166.0
+84 168.0
+85 85.0
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/spark/groupby6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby6.q.out b/ql/src/test/results/clientpositive/spark/groupby6.q.out
index c3caccd..bb9b315 100644
--- a/ql/src/test/results/clientpositive/spark/groupby6.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby6.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
@@ -101,13 +105,13 @@ POSTHOOK: query: SELECT dest1.* FROM dest1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
#### A masked pattern was here ####
-4
-8
-6
0
+1
2
-7
+3
+4
5
+6
+7
+8
9
-3
-1
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out b/ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out
index 9a5c832..544a7ae 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out
@@ -14,25 +14,29 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@t1
-PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
-POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
-NULL NULL 0 6
1 1 3 1
-2 NULL 1 1
-2 2 3 1
-3 3 3 1
-4 NULL 1 1
1 NULL 1 2
1 NULL 3 1
+2 2 3 1
+2 NULL 1 1
+3 3 3 1
3 NULL 1 2
3 NULL 3 1
4 5 3 1
+4 NULL 1 1
+NULL NULL 0 6
PREHOOK: query: SELECT GROUPING__ID, count(*)
FROM
(
@@ -52,8 +56,8 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
0 1
-3 6
1 4
+3 6
PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1
JOIN
(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
@@ -129,17 +133,17 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key,
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
-1 NULL 3 1
+1 1 3 1
1 NULL 1 2
-NULL NULL 0 6
-4 5 3 1
-3 NULL 3 1
-3 NULL 1 2
-4 NULL 1 1
-2 NULL 1 1
+1 NULL 3 1
2 2 3 1
-1 1 3 1
+2 NULL 1 1
3 3 3 1
+3 NULL 1 2
+3 NULL 3 1
+4 5 3 1
+4 NULL 1 1
+NULL NULL 0 6
PREHOOK: query: SELECT GROUPING__ID, count(*)
FROM
(
@@ -159,8 +163,8 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
0 1
-3 6
1 4
+3 6
PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1
JOIN
(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
index 01ea4ea..ef1cba2 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
@@ -1,8 +1,12 @@
-PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@dest1
-POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
@@ -335,12 +339,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
#### A masked pattern was here ####
0 1 00.0 0 1
-2 69 251142.0 15780 69
-4 74 4105526.0 30965 74
-6 5 6796.0 331 5
-8 8 81524.0 595 8
1 71 132828.0 10044 71
+2 69 251142.0 15780 69
3 62 364008.0 20119 62
+4 74 4105526.0 30965 74
5 6 5794.0 278 6
+6 5 6796.0 331 5
7 6 71470.0 447 6
+8 8 81524.0 595 8
9 7 92094.0 577 7
[24/28] hive git commit: HIVE-11671 : Optimize RuleRegExp in DPP
codepath (Rajesh Balamohan, reviewed by Hari Subramaniyan)
Posted by se...@apache.org.
HIVE-11671 : Optimize RuleRegExp in DPP codepath (Rajesh Balamohan, reviewed by Hari Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/492c8b1d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/492c8b1d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/492c8b1d
Branch: refs/heads/llap
Commit: 492c8b1d88ffcb68ba4f77a3a49ae8fc768cdd7c
Parents: 1fc9320
Author: Hari Subramaniyan <ha...@apache.org>
Authored: Wed Sep 2 15:54:23 2015 -0700
Committer: Hari Subramaniyan <ha...@apache.org>
Committed: Wed Sep 2 15:54:23 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/lib/RuleRegExp.java | 22 +++++++++++---------
1 file changed, 12 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/492c8b1d/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
index c88ed68..fd5f133 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
@@ -125,13 +125,13 @@ public class RuleRegExp implements Rule {
*/
private int costPatternWithoutWildCardChar(Stack<Node> stack) throws SemanticException {
int numElems = (stack != null ? stack.size() : 0);
- String name = new String("");
int patLen = patternWithoutWildCardChar.length();
-
+ StringBuilder name = new StringBuilder(patLen + numElems);
for (int pos = numElems - 1; pos >= 0; pos--) {
- name = stack.get(pos).getName() + "%" + name;
+ String nodeName = stack.get(pos).getName() + "%";
+ name.insert(0, nodeName);
if (name.length() >= patLen) {
- if (patternWithoutWildCardChar.equals(name)) {
+ if (patternWithoutWildCardChar.contentEquals(name)) {
return patLen;
} else {
return -1;
@@ -153,13 +153,14 @@ public class RuleRegExp implements Rule {
private int costPatternWithORWildCardChar(Stack<Node> stack) throws SemanticException {
int numElems = (stack != null ? stack.size() : 0);
for (String pattern : patternORWildChar) {
- String name = new String("");
int patLen = pattern.length();
+ StringBuilder name = new StringBuilder(patLen + numElems);
for (int pos = numElems - 1; pos >= 0; pos--) {
- name = stack.get(pos).getName() + "%" + name;
+ String nodeName = stack.get(pos).getName() + "%";
+ name.insert(0, nodeName);
if (name.length() >= patLen) {
- if (pattern.equals(name)) {
+ if (pattern.contentEquals(name)) {
return patLen;
} else {
break;
@@ -181,11 +182,12 @@ public class RuleRegExp implements Rule {
* @throws SemanticException
*/
private int costPatternWithWildCardChar(Stack<Node> stack) throws SemanticException {
- int numElems = (stack != null ? stack.size() : 0);
- String name = "";
+ int numElems = (stack != null ? stack.size() : 0);
+ StringBuilder name = new StringBuilder();
Matcher m = patternWithWildCardChar.matcher("");
for (int pos = numElems - 1; pos >= 0; pos--) {
- name = stack.get(pos).getName() + "%" + name;
+ String nodeName = stack.get(pos).getName() + "%";
+ name.insert(0, nodeName);
m.reset(name);
if (m.matches()) {
return name.length();
[06/28] hive git commit: HIVE-11652: Avoid expensive call to
removeAll in DefaultGraphWalker (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan/Hari Sankar Sivarama Subramaniyan)
Posted by se...@apache.org.
HIVE-11652: Avoid expensive call to removeAll in DefaultGraphWalker (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan/Hari Sankar Sivarama Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af91308e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af91308e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af91308e
Branch: refs/heads/llap
Commit: af91308e5b6573ea6dc793912bcc628a5a40c000
Parents: 22fa921
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Sat Aug 29 11:40:03 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Sat Aug 29 11:42:59 2015 +0200
----------------------------------------------------------------------
.../hadoop/hive/ql/lib/DefaultGraphWalker.java | 80 ++++++++++++++------
.../hadoop/hive/ql/lib/ForwardWalker.java | 33 ++++----
.../hadoop/hive/ql/optimizer/ColumnPruner.java | 6 +-
.../hive/ql/optimizer/ConstantPropagate.java | 10 +--
4 files changed, 79 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
index 583c113..07d2734 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
@@ -22,7 +22,9 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.IdentityHashMap;
+import java.util.LinkedList;
import java.util.List;
+import java.util.Queue;
import java.util.Set;
import java.util.Stack;
@@ -36,7 +38,21 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
*/
public class DefaultGraphWalker implements GraphWalker {
- protected Stack<Node> opStack;
+ /**
+ * opStack keeps the nodes that have been visited, but have not been
+ * dispatched yet
+ */
+ protected final Stack<Node> opStack;
+ /**
+ * opQueue keeps the nodes in the order that the were dispatched.
+ * Then it is used to go through the processed nodes and store
+ * the results that the dispatcher has produced (if any)
+ */
+ protected final Queue<Node> opQueue;
+ /**
+ * toWalk stores the starting nodes for the graph that needs to be
+ * traversed
+ */
protected final List<Node> toWalk = new ArrayList<Node>();
protected final IdentityHashMap<Node, Object> retMap = new IdentityHashMap<Node, Object>();
protected final Dispatcher dispatcher;
@@ -50,13 +66,7 @@ public class DefaultGraphWalker implements GraphWalker {
public DefaultGraphWalker(Dispatcher disp) {
dispatcher = disp;
opStack = new Stack<Node>();
- }
-
- /**
- * @return the toWalk
- */
- public List<Node> getToWalk() {
- return toWalk;
+ opQueue = new LinkedList<Node>();
}
/**
@@ -108,10 +118,22 @@ public class DefaultGraphWalker implements GraphWalker {
while (toWalk.size() > 0) {
Node nd = toWalk.remove(0);
walk(nd);
+ // Some walkers extending DefaultGraphWalker e.g. ForwardWalker
+ // do not use opQueue and rely uniquely in the toWalk structure,
+ // thus we store the results produced by the dispatcher here
+ // TODO: rewriting the logic of those walkers to use opQueue
if (nodeOutput != null && getDispatchedList().contains(nd)) {
nodeOutput.put(nd, retMap.get(nd));
}
}
+
+ // Store the results produced by the dispatcher
+ while (!opQueue.isEmpty()) {
+ Node node = opQueue.poll();
+ if (nodeOutput != null && getDispatchedList().contains(node)) {
+ nodeOutput.put(node, retMap.get(node));
+ }
+ }
}
/**
@@ -121,23 +143,33 @@ public class DefaultGraphWalker implements GraphWalker {
* current operator in the graph
* @throws SemanticException
*/
- public void walk(Node nd) throws SemanticException {
- if (opStack.empty() || nd != opStack.peek()) {
- opStack.push(nd);
- }
+ public void walk(Node nd) throws SemanticException {
+ // Push the node in the stack
+ opStack.push(nd);
+
+ // While there are still nodes to dispatch...
+ while (!opStack.empty()) {
+ Node node = opStack.peek();
- if ((nd.getChildren() == null)
- || getDispatchedList().containsAll(nd.getChildren())) {
- // all children are done or no need to walk the children
- if (!getDispatchedList().contains(nd)) {
- dispatch(nd, opStack);
+ if (node.getChildren() == null ||
+ getDispatchedList().containsAll(node.getChildren())) {
+ // Dispatch current node
+ if (!getDispatchedList().contains(node)) {
+ dispatch(node, opStack);
+ opQueue.add(node);
+ }
+ opStack.pop();
+ continue;
}
- opStack.pop();
- return;
- }
- // add children, self to the front of the queue in that order
- getToWalk().add(0, nd);
- getToWalk().removeAll(nd.getChildren());
- getToWalk().addAll(0, nd.getChildren());
+
+ // Add a single child and restart the loop
+ for (Node childNode : node.getChildren()) {
+ if (!getDispatchedList().contains(childNode)) {
+ opStack.push(childNode);
+ break;
+ }
+ }
+ } // end while
}
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
index a2db3b5..67b4700 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
@@ -19,20 +19,17 @@
package org.apache.hadoop.hive.ql.lib;
import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
public class ForwardWalker extends DefaultGraphWalker {
/**
-* Constructor.
-*
-* @param disp
-* dispatcher to call for each op encountered
-*/
+ * Constructor.
+ *
+ * @param disp
+ * dispatcher to call for each op encountered
+ */
public ForwardWalker(Dispatcher disp) {
super(disp);
}
@@ -54,17 +51,17 @@ public class ForwardWalker extends DefaultGraphWalker {
@SuppressWarnings("unchecked")
protected void addAllParents(Node nd) {
Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
- getToWalk().removeAll(op.getParentOperators());
- getToWalk().addAll(0, op.getParentOperators());
+ toWalk.removeAll(op.getParentOperators());
+ toWalk.addAll(0, op.getParentOperators());
}
/**
-* walk the current operator and its descendants.
-*
-* @param nd
-* current operator in the graph
-* @throws SemanticException
-*/
+ * walk the current operator and its descendants.
+ *
+ * @param nd
+ * current operator in the graph
+ * @throws SemanticException
+ */
@Override
public void walk(Node nd) throws SemanticException {
if (opStack.empty() || nd != opStack.peek()) {
@@ -73,14 +70,14 @@ public class ForwardWalker extends DefaultGraphWalker {
if (allParentsDispatched(nd)) {
// all children are done or no need to walk the children
if (!getDispatchedList().contains(nd)) {
- getToWalk().addAll(nd.getChildren());
+ toWalk.addAll(nd.getChildren());
dispatch(nd, opStack);
}
opStack.pop();
return;
}
// add children, self to the front of the queue in that order
- getToWalk().add(0, nd);
+ toWalk.add(0, nd);
addAllParents(nd);
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
index 9a45458..735b448 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
@@ -174,10 +174,10 @@ public class ColumnPruner implements Transform {
return;
}
// move all the children to the front of queue
- getToWalk().removeAll(nd.getChildren());
- getToWalk().addAll(0, nd.getChildren());
+ toWalk.removeAll(nd.getChildren());
+ toWalk.addAll(0, nd.getChildren());
// add self to the end of the queue
- getToWalk().add(nd);
+ toWalk.add(nd);
opStack.pop();
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
index dd53ced..b6f1f27 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
@@ -151,17 +151,17 @@ public class ConstantPropagate implements Transform {
dispatch(nd, opStack);
opStack.pop();
} else {
- getToWalk().removeAll(parents);
- getToWalk().add(0, nd);
- getToWalk().addAll(0, parents);
+ toWalk.removeAll(parents);
+ toWalk.add(0, nd);
+ toWalk.addAll(0, parents);
return;
}
// move all the children to the front of queue
List<? extends Node> children = nd.getChildren();
if (children != null) {
- getToWalk().removeAll(children);
- getToWalk().addAll(children);
+ toWalk.removeAll(children);
+ toWalk.addAll(children);
}
}
}
[16/28] hive git commit: HIVE-11689 : minor flow changes to ORC split
generation (Sergey Shelukhin,
reviewed by Prasanth Jayachandran and Swarnim Kulkarni)
Posted by se...@apache.org.
HIVE-11689 : minor flow changes to ORC split generation (Sergey Shelukhin, reviewed by Prasanth Jayachandran and Swarnim Kulkarni)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f530f44d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f530f44d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f530f44d
Branch: refs/heads/llap
Commit: f530f44d1d95c2da2485d53f0855f8f8e0646005
Parents: c0690a6
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Sep 1 11:23:14 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Sep 1 11:23:14 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 169 +++++++++++--------
.../hive/ql/io/orc/TestInputOutputFormat.java | 13 +-
2 files changed, 107 insertions(+), 75 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f530f44d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 8c138b9..05efc5f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -28,6 +28,7 @@ import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
@@ -51,6 +52,7 @@ import org.apache.hadoop.hive.ql.io.AcidInputFormat;
import org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaMetaData;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.AcidUtils.Directory;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
@@ -371,6 +373,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
private final Configuration conf;
private static Cache<Path, FileInfo> footerCache;
private static ExecutorService threadPool = null;
+ private static ExecutorCompletionService<AcidDirInfo> ecs = null;
private final int numBuckets;
private final long maxSize;
private final long minSize;
@@ -416,6 +419,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
threadPool = Executors.newFixedThreadPool(numThreads,
new ThreadFactoryBuilder().setDaemon(true)
.setNameFormat("ORC_GET_SPLITS #%d").build());
+ ecs = new ExecutorCompletionService<AcidDirInfo>(threadPool);
}
if (footerCache == null && cacheStripeDetails) {
@@ -433,10 +437,34 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
}
}
+ /**
+ * The full ACID directory information needed for splits; no more calls to HDFS needed.
+ * We could just live with AcidUtils.Directory but...
+ * 1) That doesn't contain have base files.
+ * 2) We save fs for convenience to avoid getting it twice.
+ */
+ @VisibleForTesting
+ static final class AcidDirInfo {
+ public AcidDirInfo(FileSystem fs, Path splitPath, Directory acidInfo,
+ List<HdfsFileStatusWithId> baseOrOriginalFiles) {
+ this.splitPath = splitPath;
+ this.acidInfo = acidInfo;
+ this.baseOrOriginalFiles = baseOrOriginalFiles;
+ this.fs = fs;
+ }
+
+ final FileSystem fs;
+ final Path splitPath;
+ final AcidUtils.Directory acidInfo;
+ final List<HdfsFileStatusWithId> baseOrOriginalFiles;
+ }
+
+ @VisibleForTesting
interface SplitStrategy<T> {
List<T> getSplits() throws IOException;
}
+ @VisibleForTesting
static final class SplitInfo extends ACIDSplitStrategy {
private final Context context;
private final FileSystem fs;
@@ -638,7 +666,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
* Given a directory, get the list of files and blocks in those files.
* To parallelize file generator use "mapreduce.input.fileinputformat.list-status.num-threads"
*/
- static final class FileGenerator implements Callable<SplitStrategy> {
+ static final class FileGenerator implements Callable<AcidDirInfo> {
private final Context context;
private final FileSystem fs;
private final Path dir;
@@ -652,69 +680,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
}
@Override
- public SplitStrategy call() throws IOException {
- final SplitStrategy splitStrategy;
+ public AcidDirInfo call() throws IOException {
AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir,
context.conf, context.transactionList, useFileIds);
- List<DeltaMetaData> deltas = AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories());
Path base = dirInfo.getBaseDirectory();
- List<HdfsFileStatusWithId> original = dirInfo.getOriginalFiles();
- boolean[] covered = new boolean[context.numBuckets];
- boolean isOriginal = base == null;
-
- // if we have a base to work from
- if (base != null || !original.isEmpty()) {
-
- // find the base files (original or new style)
- List<HdfsFileStatusWithId> children = original;
- if (base != null) {
- children = findBaseFiles(base, useFileIds);
- }
-
- long totalFileSize = 0;
- for (HdfsFileStatusWithId child : children) {
- totalFileSize += child.getFileStatus().getLen();
- AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename
- (child.getFileStatus().getPath(), context.conf);
- int b = opts.getBucket();
- // If the bucket is in the valid range, mark it as covered.
- // I wish Hive actually enforced bucketing all of the time.
- if (b >= 0 && b < covered.length) {
- covered[b] = true;
- }
- }
-
- int numFiles = children.size();
- long avgFileSize = totalFileSize / numFiles;
- int totalFiles = context.numFilesCounter.addAndGet(numFiles);
- switch(context.splitStrategyKind) {
- case BI:
- // BI strategy requested through config
- splitStrategy = new BISplitStrategy(context, fs, dir, children, isOriginal,
- deltas, covered);
- break;
- case ETL:
- // ETL strategy requested through config
- splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal,
- deltas, covered);
- break;
- default:
- // HYBRID strategy
- if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) {
- splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal, deltas,
- covered);
- } else {
- splitStrategy = new BISplitStrategy(context, fs, dir, children, isOriginal, deltas,
- covered);
- }
- break;
- }
- } else {
- // no base, only deltas
- splitStrategy = new ACIDSplitStrategy(dir, context.numBuckets, deltas, covered);
- }
-
- return splitStrategy;
+ // find the base files (original or new style)
+ List<HdfsFileStatusWithId> children = (base == null)
+ ? dirInfo.getOriginalFiles() : findBaseFiles(base, useFileIds);
+ return new AcidDirInfo(fs, dir, dirInfo, children);
}
private List<HdfsFileStatusWithId> findBaseFiles(
@@ -1052,21 +1025,24 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
// use threads to resolve directories into splits
Context context = new Context(conf, numSplits);
List<OrcSplit> splits = Lists.newArrayList();
- List<Future<?>> pathFutures = Lists.newArrayList();
- List<Future<?>> splitFutures = Lists.newArrayList();
+ List<Future<AcidDirInfo>> pathFutures = Lists.newArrayList();
+ List<Future<List<OrcSplit>>> splitFutures = Lists.newArrayList();
// multi-threaded file statuses and split strategy
boolean useFileIds = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
- for (Path dir : getInputPaths(conf)) {
+ Path[] paths = getInputPaths(conf);
+ for (Path dir : paths) {
FileSystem fs = dir.getFileSystem(conf);
FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds);
- pathFutures.add(context.threadPool.submit(fileGenerator));
+ pathFutures.add(Context.ecs.submit(fileGenerator));
}
// complete path futures and schedule split generation
try {
- for (Future<?> pathFuture : pathFutures) {
- SplitStrategy splitStrategy = (SplitStrategy) pathFuture.get();
+ for (int notIndex = 0; notIndex < paths.length; ++notIndex) {
+ AcidDirInfo adi = Context.ecs.take().get();
+ SplitStrategy splitStrategy = determineSplitStrategy(
+ context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseOrOriginalFiles);
if (isDebugEnabled) {
LOG.debug(splitStrategy);
@@ -1075,7 +1051,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
if (splitStrategy instanceof ETLSplitStrategy) {
List<SplitInfo> splitInfos = splitStrategy.getSplits();
for (SplitInfo splitInfo : splitInfos) {
- splitFutures.add(context.threadPool.submit(new SplitGenerator(splitInfo)));
+ splitFutures.add(Context.threadPool.submit(new SplitGenerator(splitInfo)));
}
} else {
splits.addAll(splitStrategy.getSplits());
@@ -1083,8 +1059,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
}
// complete split futures
- for (Future<?> splitFuture : splitFutures) {
- splits.addAll((Collection<? extends OrcSplit>) splitFuture.get());
+ for (Future<List<OrcSplit>> splitFuture : splitFutures) {
+ splits.addAll(splitFuture.get());
}
} catch (Exception e) {
cancelFutures(pathFutures);
@@ -1106,8 +1082,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
return splits;
}
- private static void cancelFutures(List<Future<?>> futures) {
- for (Future future : futures) {
+ private static <T> void cancelFutures(List<Future<T>> futures) {
+ for (Future<T> future : futures) {
future.cancel(true);
}
}
@@ -1375,6 +1351,55 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
directory);
}
+
+ @VisibleForTesting
+ static SplitStrategy determineSplitStrategy(Context context, FileSystem fs, Path dir,
+ AcidUtils.Directory dirInfo, List<HdfsFileStatusWithId> baseOrOriginalFiles) {
+ Path base = dirInfo.getBaseDirectory();
+ List<HdfsFileStatusWithId> original = dirInfo.getOriginalFiles();
+ List<DeltaMetaData> deltas = AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories());
+ boolean[] covered = new boolean[context.numBuckets];
+ boolean isOriginal = base == null;
+
+ // if we have a base to work from
+ if (base != null || !original.isEmpty()) {
+ long totalFileSize = 0;
+ for (HdfsFileStatusWithId child : baseOrOriginalFiles) {
+ totalFileSize += child.getFileStatus().getLen();
+ AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename
+ (child.getFileStatus().getPath(), context.conf);
+ int b = opts.getBucket();
+ // If the bucket is in the valid range, mark it as covered.
+ // I wish Hive actually enforced bucketing all of the time.
+ if (b >= 0 && b < covered.length) {
+ covered[b] = true;
+ }
+ }
+
+ int numFiles = baseOrOriginalFiles.size();
+ long avgFileSize = totalFileSize / numFiles;
+ int totalFiles = context.numFilesCounter.addAndGet(numFiles);
+ switch(context.splitStrategyKind) {
+ case BI:
+ // BI strategy requested through config
+ return new BISplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
+ case ETL:
+ // ETL strategy requested through config
+ return new ETLSplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
+ default:
+ // HYBRID strategy
+ if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) {
+ return new ETLSplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
+ } else {
+ return new BISplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
+ }
+ }
+ } else {
+ // no base, only deltas
+ return new ACIDSplitStrategy(dir, context.numBuckets, deltas, covered);
+ }
+ }
+
@Override
public RawReader<OrcStruct> getRawReader(Configuration conf,
boolean collapseEvents,
http://git-wip-us.apache.org/repos/asf/hive/blob/f530f44d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index ce86cd8..8ba4d2e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -484,7 +484,7 @@ public class TestInputOutputFormat {
conf, n);
OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
context, fs, new MockPath(fs, "mock:/a/b"), false);
- final SplitStrategy splitStrategy = gen.call();
+ final SplitStrategy splitStrategy = createSplitStrategy(context, gen);
assertTrue(
String.format(
"Split strategy for %d files x %d size for %d splits", c, s,
@@ -508,7 +508,7 @@ public class TestInputOutputFormat {
OrcInputFormat.FileGenerator gen =
new OrcInputFormat.FileGenerator(context, fs,
new MockPath(fs, "mock:/a/b"), false);
- SplitStrategy splitStrategy = gen.call();
+ OrcInputFormat.SplitStrategy splitStrategy = createSplitStrategy(context, gen);
assertEquals(true, splitStrategy instanceof OrcInputFormat.BISplitStrategy);
conf.set("mapreduce.input.fileinputformat.split.maxsize", "500");
@@ -521,11 +521,18 @@ public class TestInputOutputFormat {
new MockFile("mock:/a/b/part-04", 1000, new byte[1000]));
gen = new OrcInputFormat.FileGenerator(context, fs,
new MockPath(fs, "mock:/a/b"), false);
- splitStrategy = gen.call();
+ splitStrategy = createSplitStrategy(context, gen);
assertEquals(true, splitStrategy instanceof OrcInputFormat.ETLSplitStrategy);
}
+ private OrcInputFormat.SplitStrategy createSplitStrategy(
+ OrcInputFormat.Context context, OrcInputFormat.FileGenerator gen) throws IOException {
+ OrcInputFormat.AcidDirInfo adi = gen.call();
+ return OrcInputFormat.determineSplitStrategy(
+ context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseOrOriginalFiles);
+ }
+
public static class MockBlock {
int offset;
int length;
[04/28] hive git commit: HIVE-11618: Correct the SARG api to reunify
the PredicateLeaf.Type INTEGER and LONG (Owen O'Malley,
reviewed by Sergio Pena)
Posted by se...@apache.org.
HIVE-11618: Correct the SARG api to reunify the PredicateLeaf.Type INTEGER and LONG (Owen O'Malley, reviewed by Sergio Pena)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/97bf32a1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/97bf32a1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/97bf32a1
Branch: refs/heads/llap
Commit: 97bf32a12f754d83a362aaa4048a6612d299a386
Parents: ed4517c
Author: Sergio Pena <se...@cloudera.com>
Authored: Fri Aug 28 17:59:15 2015 -0500
Committer: Sergio Pena <se...@cloudera.com>
Committed: Fri Aug 28 17:59:15 2015 -0500
----------------------------------------------------------------------
.../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 2 -
.../hive/ql/io/parquet/LeafFilterFactory.java | 14 +-
.../read/ParquetFilterPredicateConverter.java | 35 +++--
.../hive/ql/io/sarg/ConvertAstToSearchArg.java | 3 -
.../hive/ql/io/orc/TestInputOutputFormat.java | 4 +-
.../hadoop/hive/ql/io/orc/TestOrcFile.java | 10 +-
.../hive/ql/io/orc/TestRecordReaderImpl.java | 42 +++---
.../parquet/TestParquetRecordReaderWrapper.java | 50 +++++---
.../read/TestParquetFilterPredicate.java | 6 +-
.../ql/io/sarg/TestConvertAstToSearchArg.java | 128 +++++++++++--------
.../hive/ql/io/sarg/TestSearchArgumentImpl.java | 22 ++--
.../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 3 +-
12 files changed, 181 insertions(+), 138 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index 0d765b1..fcb3746 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -637,8 +637,6 @@ class RecordReaderImpl implements RecordReader {
return ((BigDecimal) obj).doubleValue();
}
break;
- case INTEGER:
- // fall through
case LONG:
if (obj instanceof Number) {
// widening conversion
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
index a1dbc1a..1ceea6e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
@@ -22,6 +22,8 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator;
import org.apache.parquet.filter2.predicate.FilterApi;
import org.apache.parquet.filter2.predicate.FilterPredicate;
import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
import static org.apache.parquet.filter2.predicate.FilterApi.eq;
import static org.apache.parquet.filter2.predicate.FilterApi.lt;
@@ -146,12 +148,16 @@ public class LeafFilterFactory {
* @param type FilterPredicateType
* @return
*/
- public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type){
+ public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type,
+ Type parquetType){
switch (type){
- case INTEGER:
- return new IntFilterPredicateLeafBuilder();
case LONG:
- return new LongFilterPredicateLeafBuilder();
+ if (parquetType.asPrimitiveType().getPrimitiveTypeName() ==
+ PrimitiveType.PrimitiveTypeName.INT32) {
+ return new IntFilterPredicateLeafBuilder();
+ } else {
+ return new LongFilterPredicateLeafBuilder();
+ }
case FLOAT: // float and double
return new DoubleFilterPredicateLeafBuilder();
case STRING: // string, char, varchar
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
index f170026..d1864ae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
@@ -37,14 +37,6 @@ public class ParquetFilterPredicateConverter {
private static final Log LOG = LogFactory.getLog(ParquetFilterPredicateConverter.class);
/**
- * Translate the search argument to the filter predicate parquet uses
- * @return translate the sarg into a filter predicate
- */
- public static FilterPredicate toFilterPredicate(SearchArgument sarg) {
- return toFilterPredicate(sarg, null);
- }
-
- /**
* Translate the search argument to the filter predicate parquet uses. It includes
* only the columns from the passed schema.
* @return translate the sarg into a filter predicate
@@ -58,18 +50,21 @@ public class ParquetFilterPredicateConverter {
}
}
- return translate(sarg.getExpression(), sarg.getLeaves(), columns);
+ return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema);
}
- private static FilterPredicate translate(ExpressionTree root, List<PredicateLeaf> leaves, Set<String> columns) {
+ private static FilterPredicate translate(ExpressionTree root,
+ List<PredicateLeaf> leaves,
+ Set<String> columns,
+ MessageType schema) {
FilterPredicate p = null;
switch (root.getOperator()) {
case OR:
for(ExpressionTree child: root.getChildren()) {
if (p == null) {
- p = translate(child, leaves, columns);
+ p = translate(child, leaves, columns, schema);
} else {
- FilterPredicate right = translate(child, leaves, columns);
+ FilterPredicate right = translate(child, leaves, columns, schema);
// constant means no filter, ignore it when it is null
if(right != null){
p = FilterApi.or(p, right);
@@ -80,9 +75,9 @@ public class ParquetFilterPredicateConverter {
case AND:
for(ExpressionTree child: root.getChildren()) {
if (p == null) {
- p = translate(child, leaves, columns);
+ p = translate(child, leaves, columns, schema);
} else {
- FilterPredicate right = translate(child, leaves, columns);
+ FilterPredicate right = translate(child, leaves, columns, schema);
// constant means no filter, ignore it when it is null
if(right != null){
p = FilterApi.and(p, right);
@@ -91,7 +86,8 @@ public class ParquetFilterPredicateConverter {
}
return p;
case NOT:
- FilterPredicate op = translate(root.getChildren().get(0), leaves, columns);
+ FilterPredicate op = translate(root.getChildren().get(0), leaves,
+ columns, schema);
if (op != null) {
return FilterApi.not(op);
} else {
@@ -101,8 +97,9 @@ public class ParquetFilterPredicateConverter {
PredicateLeaf leaf = leaves.get(root.getLeaf());
// If columns is null, then we need to create the leaf
- if (columns == null || columns.contains(leaf.getColumnName())) {
- return buildFilterPredicateFromPredicateLeaf(leaf);
+ if (columns.contains(leaf.getColumnName())) {
+ Type parquetType = schema.getType(leaf.getColumnName());
+ return buildFilterPredicateFromPredicateLeaf(leaf, parquetType);
} else {
// Do not create predicate if the leaf is not on the passed schema.
return null;
@@ -116,12 +113,12 @@ public class ParquetFilterPredicateConverter {
}
private static FilterPredicate buildFilterPredicateFromPredicateLeaf
- (PredicateLeaf leaf) {
+ (PredicateLeaf leaf, Type parquetType) {
LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
FilterPredicateLeafBuilder builder;
try {
builder = leafFilterFactory
- .getLeafFilterBuilderByType(leaf.getType());
+ .getLeafFilterBuilderByType(leaf.getType(), parquetType);
if (builder == null) {
return null;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
index 5c4b7ea..e034650 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
@@ -88,7 +88,6 @@ public class ConvertAstToSearchArg {
case BYTE:
case SHORT:
case INT:
- return PredicateLeaf.Type.INTEGER;
case LONG:
return PredicateLeaf.Type.LONG;
case CHAR:
@@ -139,8 +138,6 @@ public class ConvertAstToSearchArg {
return null;
}
switch (type) {
- case INTEGER:
- return ((Number) lit).intValue();
case LONG:
return ((Number) lit).longValue();
case STRING:
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 547e799..ce86cd8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -1844,7 +1844,7 @@ public class TestInputOutputFormat {
types.add(builder.build());
types.add(builder.build());
SearchArgument isNull = SearchArgumentFactory.newBuilder()
- .startAnd().isNull("cost", PredicateLeaf.Type.INTEGER).end().build();
+ .startAnd().isNull("cost", PredicateLeaf.Type.LONG).end().build();
conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(isNull));
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
"url,cost");
@@ -1889,7 +1889,7 @@ public class TestInputOutputFormat {
SearchArgument sarg =
SearchArgumentFactory.newBuilder()
.startAnd()
- .lessThan("z", PredicateLeaf.Type.INTEGER, new Integer(0))
+ .lessThan("z", PredicateLeaf.Type.LONG, new Long(0))
.end()
.build();
conf.set("sarg.pushdown", toKryo(sarg));
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 4480d22..0bb8401 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -1923,9 +1923,9 @@ public class TestOrcFile {
SearchArgument sarg = SearchArgumentFactory.newBuilder()
.startAnd()
.startNot()
- .lessThan("int1", PredicateLeaf.Type.INTEGER, 300000)
+ .lessThan("int1", PredicateLeaf.Type.LONG, 300000L)
.end()
- .lessThan("int1", PredicateLeaf.Type.INTEGER, 600000)
+ .lessThan("int1", PredicateLeaf.Type.LONG, 600000L)
.end()
.build();
RecordReader rows = reader.rowsOptions(new Reader.Options()
@@ -1946,7 +1946,7 @@ public class TestOrcFile {
// look through the file with no rows selected
sarg = SearchArgumentFactory.newBuilder()
.startAnd()
- .lessThan("int1", PredicateLeaf.Type.INTEGER, 0)
+ .lessThan("int1", PredicateLeaf.Type.LONG, 0L)
.end()
.build();
rows = reader.rowsOptions(new Reader.Options()
@@ -1959,9 +1959,9 @@ public class TestOrcFile {
// select first 100 and last 100 rows
sarg = SearchArgumentFactory.newBuilder()
.startOr()
- .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 100)
+ .lessThan("int1", PredicateLeaf.Type.LONG, 300L * 100)
.startNot()
- .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 3400)
+ .lessThan("int1", PredicateLeaf.Type.LONG, 300L * 3400)
.end()
.end()
.build();
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
index 7957cb4..839bbc6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
@@ -445,7 +445,7 @@ public class TestRecordReaderImpl {
@Test
public void testPredEvalWithStringStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
- PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 100, null);
+ PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 100L, null);
assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
@@ -550,7 +550,7 @@ public class TestRecordReaderImpl {
@Test
public void testPredEvalWithDecimalStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
- PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15, null);
+ PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
@@ -590,7 +590,7 @@ public class TestRecordReaderImpl {
@Test
public void testPredEvalWithTimestampStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
- PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15, null);
+ PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
@@ -637,8 +637,8 @@ public class TestRecordReaderImpl {
@Test
public void testEquals() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
- (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER,
- "x", 15, null);
+ (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG,
+ "x", 15L, null);
assertEquals(TruthValue.NO_NULL,
RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
assertEquals(TruthValue.YES_NO_NULL,
@@ -656,8 +656,8 @@ public class TestRecordReaderImpl {
@Test
public void testNullSafeEquals() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
- (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER,
- "x", 15, null);
+ (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG,
+ "x", 15L, null);
assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
@@ -675,8 +675,8 @@ public class TestRecordReaderImpl {
@Test
public void testLessThan() throws Exception {
PredicateLeaf lessThan = TestSearchArgumentImpl.createPredicateLeaf
- (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.INTEGER,
- "x", 15, null);
+ (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.LONG,
+ "x", 15L, null);
assertEquals(TruthValue.NO_NULL,
RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null));
assertEquals(TruthValue.NO_NULL,
@@ -692,8 +692,8 @@ public class TestRecordReaderImpl {
@Test
public void testLessThanEquals() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
- (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.INTEGER,
- "x", 15, null);
+ (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.LONG,
+ "x", 15L, null);
assertEquals(TruthValue.NO_NULL,
RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
assertEquals(TruthValue.YES_NO_NULL,
@@ -709,10 +709,10 @@ public class TestRecordReaderImpl {
@Test
public void testIn() throws Exception {
List<Object> args = new ArrayList<Object>();
- args.add(10);
- args.add(20);
+ args.add(10L);
+ args.add(20L);
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
- (PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER,
+ (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
"x", null, args);
assertEquals(TruthValue.YES_NULL,
RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null));
@@ -727,10 +727,10 @@ public class TestRecordReaderImpl {
@Test
public void testBetween() throws Exception {
List<Object> args = new ArrayList<Object>();
- args.add(10);
- args.add(20);
+ args.add(10L);
+ args.add(20L);
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
- (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.INTEGER,
+ (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.LONG,
"x", null, args);
assertEquals(TruthValue.NO_NULL,
RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null));
@@ -751,7 +751,7 @@ public class TestRecordReaderImpl {
@Test
public void testIsNull() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
- (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.INTEGER,
+ (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.LONG,
"x", null, null);
assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
@@ -1306,10 +1306,10 @@ public class TestRecordReaderImpl {
@Test
public void testIntInBloomFilter() throws Exception {
List<Object> args = new ArrayList<Object>();
- args.add(15);
- args.add(19);
+ args.add(15L);
+ args.add(19L);
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
- (PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER,
+ (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
"x", null, args);
BloomFilterIO bf = new BloomFilterIO(10000);
for (int i = 20; i < 1000; i++) {
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
index f9ca528..e92b696 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
@@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
import org.junit.Test;
import java.sql.Date;
@@ -48,15 +50,19 @@ public class TestParquetRecordReaderWrapper {
SearchArgument sarg = SearchArgumentFactory.newBuilder()
.startNot()
.startOr()
- .isNull("x", PredicateLeaf.Type.INTEGER)
- .between("y", PredicateLeaf.Type.INTEGER, 10, 20)
- .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+ .isNull("x", PredicateLeaf.Type.LONG)
+ .between("y", PredicateLeaf.Type.LONG, 10L, 20L)
+ .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
.nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
.end()
.end()
.build();
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ MessageType schema = MessageTypeParser.parseMessageType("message test {" +
+ " optional int32 x; required int32 y; required int32 z;" +
+ " optional binary a;}");
+ FilterPredicate p =
+ ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected =
"and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " +
"eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
@@ -75,23 +81,27 @@ public class TestParquetRecordReaderWrapper {
.equals("z", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("1.0"))
.end()
.build();
+ MessageType schema = MessageTypeParser.parseMessageType("message test {" +
+ " required int32 x; required binary y; required binary z;}");
assertEquals("lteq(y, Binary{\"hi \"})",
- ParquetFilterPredicateConverter.toFilterPredicate(sarg).toString());
+ ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema).toString());
sarg = SearchArgumentFactory.newBuilder()
.startNot()
.startOr()
- .isNull("x", PredicateLeaf.Type.INTEGER)
+ .isNull("x", PredicateLeaf.Type.LONG)
.between("y", PredicateLeaf.Type.DECIMAL,
new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0"))
- .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+ .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
.nullSafeEquals("a", PredicateLeaf.Type.STRING,
new HiveVarchar("stinger", 100).toString())
.end()
.end()
.build();
-
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ schema = MessageTypeParser.parseMessageType("message test {" +
+ " optional int32 x; required binary y; required int32 z;" +
+ " optional binary a;}");
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected =
"and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
"not(eq(a, Binary{\"stinger\"})))";
@@ -110,23 +120,28 @@ public class TestParquetRecordReaderWrapper {
new HiveDecimalWritable("1.0"))
.end()
.build();
+ MessageType schema = MessageTypeParser.parseMessageType("message test {" +
+ " required int32 x; required binary y; required binary z;}");
assertEquals("lteq(y, Binary{\"hi \"})",
- ParquetFilterPredicateConverter.toFilterPredicate(sarg).toString());
+ ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema).toString());
sarg = SearchArgumentFactory.newBuilder()
.startNot()
.startOr()
- .isNull("x", PredicateLeaf.Type.INTEGER)
+ .isNull("x", PredicateLeaf.Type.LONG)
.between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"),
new HiveDecimalWritable("20.0"))
- .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+ .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
.nullSafeEquals("a", PredicateLeaf.Type.STRING,
new HiveVarchar("stinger", 100).toString())
.end()
.end()
.build();
+ schema = MessageTypeParser.parseMessageType("message test {" +
+ " optional int32 x; required binary y; required int32 z;" +
+ " optional binary a;}");
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
"not(eq(a, Binary{\"stinger\"})))";
assertEquals(expected, p.toString());
@@ -137,16 +152,19 @@ public class TestParquetRecordReaderWrapper {
SearchArgument sarg =
SearchArgumentFactory.newBuilder()
.startAnd()
- .lessThan("x", PredicateLeaf.Type.INTEGER, new Integer((short) 22))
- .lessThan("x1", PredicateLeaf.Type.INTEGER, new Integer(22))
+ .lessThan("x", PredicateLeaf.Type.LONG, 22L)
+ .lessThan("x1", PredicateLeaf.Type.LONG, 22L)
.lessThanEquals("y", PredicateLeaf.Type.STRING,
new HiveChar("hi", 10).toString())
.equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22))
.equals("z1", PredicateLeaf.Type.FLOAT, new Double(0.22))
.end()
.build();
+ MessageType schema = MessageTypeParser.parseMessageType("message test {" +
+ " required int32 x; required int32 x1;" +
+ " required binary y; required float z; required float z1;}");
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," +
" lteq(y, Binary{\"hi \"})), eq(z, " +
"0.22)), eq(z1, 0.22))";
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
index 847a02b..ac5c1a0 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
@@ -35,9 +35,9 @@ public class TestParquetFilterPredicate {
SearchArgument sarg = SearchArgumentFactory.newBuilder()
.startNot()
.startOr()
- .isNull("a", PredicateLeaf.Type.INTEGER)
- .between("y", PredicateLeaf.Type.INTEGER, 10, 20) // Column will be removed from filter
- .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) // Column will be removed from filter
+ .isNull("a", PredicateLeaf.Type.LONG)
+ .between("y", PredicateLeaf.Type.LONG, 10L, 20L) // Column will be removed from filter
+ .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L) // Column will be removed from filter
.nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
.end()
.end()
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
index 9e8425a..e72789d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
@@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
import org.junit.Test;
import java.beans.XMLDecoder;
@@ -550,7 +552,11 @@ public class TestConvertAstToSearchArg {
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(9, leaves.size());
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ MessageType schema =
+ MessageTypeParser.parseMessageType("message test { required int32 id;" +
+ " required binary first_name; }");
+
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String[] conditions = new String[]{
"eq(first_name, Binary{\"john\"})", /* first_name = 'john' */
"not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */
@@ -586,34 +592,34 @@ public class TestConvertAstToSearchArg {
assertEquals("alan", leaf.getLiteral());
leaf = leaves.get(3);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12, leaf.getLiteral());
+ assertEquals(12L, leaf.getLiteral());
leaf = leaves.get(4);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(13, leaf.getLiteral());
+ assertEquals(13L, leaf.getLiteral());
leaf = leaves.get(5);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(15, leaf.getLiteral());
+ assertEquals(15L, leaf.getLiteral());
leaf = leaves.get(6);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(16, leaf.getLiteral());
+ assertEquals(16L, leaf.getLiteral());
leaf = leaves.get(7);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(30, leaf.getLiteral());
+ assertEquals(30L, leaf.getLiteral());
leaf = leaves.get(8);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
@@ -842,7 +848,10 @@ public class TestConvertAstToSearchArg {
"lteq(id, 4)" /* id <= 4 */
};
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ MessageType schema =
+ MessageTypeParser.parseMessageType("message test { required int32 id;" +
+ " required binary first_name; }");
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions);
assertEquals(expected, p.toString());
@@ -860,16 +869,16 @@ public class TestConvertAstToSearchArg {
assertEquals("sue", leaf.getLiteral());
leaf = leaves.get(2);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12, leaf.getLiteral());
+ assertEquals(12L, leaf.getLiteral());
leaf = leaves.get(3);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(4, leaf.getLiteral());
+ assertEquals(4L, leaf.getLiteral());
assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)",
sarg.getExpression().toString());
@@ -1271,18 +1280,21 @@ public class TestConvertAstToSearchArg {
"eq(first_name, Binary{\"alan\"})", /* first_name = 'alan' */
"eq(last_name, Binary{\"smith\"})" /* 'smith' = last_name */
};
+ MessageType schema =
+ MessageTypeParser.parseMessageType("message test { required int32 id;" +
+ " required binary first_name; required binary last_name;}");
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions);
assertEquals(expected, p.toString());
PredicateLeaf leaf = leaves.get(0);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
assertEquals(null, leaf.getLiteral());
- assertEquals(23, leaf.getLiteralList().get(0));
- assertEquals(45, leaf.getLiteralList().get(1));
+ assertEquals(23L, leaf.getLiteralList().get(0));
+ assertEquals(45L, leaf.getLiteralList().get(1));
leaf = leaves.get(1);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
@@ -1493,15 +1505,19 @@ public class TestConvertAstToSearchArg {
"or(eq(id, 34), eq(id, 50))" /* id in (34,50) */
};
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ MessageType schema =
+ MessageTypeParser.parseMessageType("message test { required int32 id;" +
+ " required binary first_name; }");
+ FilterPredicate p =
+ ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions);
assertEquals(expected, p.toString());
PredicateLeaf leaf = leaves.get(0);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12, leaf.getLiteral());
+ assertEquals(12L, leaf.getLiteral());
leaf = leaves.get(1);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
@@ -1511,11 +1527,11 @@ public class TestConvertAstToSearchArg {
assertEquals("sue", leaf.getLiteralList().get(1));
leaf = leaves.get(2);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(34, leaf.getLiteralList().get(0));
- assertEquals(50, leaf.getLiteralList().get(1));
+ assertEquals(34L, leaf.getLiteralList().get(0));
+ assertEquals(50L, leaf.getLiteralList().get(1));
assertEquals("(and (not leaf-0) leaf-1 leaf-2)",
sarg.getExpression().toString());
@@ -1752,7 +1768,10 @@ public class TestConvertAstToSearchArg {
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(1, leaves.size());
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ MessageType schema =
+ MessageTypeParser.parseMessageType("message test { required int32 id;" +
+ " required binary first_name; }");
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected =
"and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))";
assertEquals(p.toString(), expected);
@@ -2232,7 +2251,10 @@ public class TestConvertAstToSearchArg {
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(9, leaves.size());
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ MessageType schema =
+ MessageTypeParser.parseMessageType("message test { required int32 id;" +
+ " required binary first_name; }");
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" +
"or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " +
"or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " +
@@ -2255,58 +2277,58 @@ public class TestConvertAstToSearchArg {
assertEquals(p.toString(), expected);
PredicateLeaf leaf = leaves.get(0);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(18, leaf.getLiteral());
+ assertEquals(18L, leaf.getLiteral());
leaf = leaves.get(1);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(10, leaf.getLiteral());
+ assertEquals(10L, leaf.getLiteral());
leaf = leaves.get(2);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(13, leaf.getLiteral());
+ assertEquals(13L, leaf.getLiteral());
leaf = leaves.get(3);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(16, leaf.getLiteral());
+ assertEquals(16L, leaf.getLiteral());
leaf = leaves.get(4);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(11, leaf.getLiteral());
+ assertEquals(11L, leaf.getLiteral());
leaf = leaves.get(5);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12, leaf.getLiteral());
+ assertEquals(12L, leaf.getLiteral());
leaf = leaves.get(6);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(14, leaf.getLiteral());
+ assertEquals(14L, leaf.getLiteral());
leaf = leaves.get(7);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(15, leaf.getLiteral());
+ assertEquals(15L, leaf.getLiteral());
leaf = leaves.get(8);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(17, leaf.getLiteral());
+ assertEquals(17L, leaf.getLiteral());
assertEquals("(and" +
" (or leaf-0 leaf-1 leaf-2 leaf-3)" +
@@ -2388,7 +2410,10 @@ public class TestConvertAstToSearchArg {
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(0, leaves.size());
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ MessageType schema =
+ MessageTypeParser.parseMessageType("message test { required int32 id;" +
+ " required binary first_name; }");
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
assertNull(p);
assertEquals("YES_NO_NULL",
@@ -2643,15 +2668,18 @@ public class TestConvertAstToSearchArg {
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(1, leaves.size());
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
+ MessageType schema =
+ MessageTypeParser.parseMessageType("message test { required int32 id;" +
+ " required binary first_name; }");
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(not(lt(id, 10)), not(lt(id, 10)))";
assertEquals(expected, p.toString());
- assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getType());
+ assertEquals(PredicateLeaf.Type.LONG, leaves.get(0).getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN,
leaves.get(0).getOperator());
assertEquals("id", leaves.get(0).getColumnName());
- assertEquals(10, leaves.get(0).getLiteral());
+ assertEquals(10L, leaves.get(0).getLiteral());
assertEquals("(and (not leaf-0) (not leaf-0))",
sarg.getExpression().toString());
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
index 20de846..573d5c6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
@@ -330,7 +330,7 @@ public class TestSearchArgumentImpl {
SearchArgument sarg =
SearchArgumentFactory.newBuilder()
.startAnd()
- .lessThan("x", PredicateLeaf.Type.INTEGER, 10)
+ .lessThan("x", PredicateLeaf.Type.LONG, 10L)
.lessThanEquals("y", PredicateLeaf.Type.STRING, "hi")
.equals("z", PredicateLeaf.Type.FLOAT, 1.0)
.end()
@@ -342,9 +342,9 @@ public class TestSearchArgumentImpl {
sarg = SearchArgumentFactory.newBuilder()
.startNot()
.startOr()
- .isNull("x", PredicateLeaf.Type.INTEGER)
- .between("y", PredicateLeaf.Type.INTEGER, 10, 20)
- .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+ .isNull("x", PredicateLeaf.Type.LONG)
+ .between("y", PredicateLeaf.Type.LONG, 10L, 20L)
+ .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
.nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
.end()
.end()
@@ -376,10 +376,10 @@ public class TestSearchArgumentImpl {
sarg = SearchArgumentFactory.newBuilder()
.startNot()
.startOr()
- .isNull("x", PredicateLeaf.Type.INTEGER)
+ .isNull("x", PredicateLeaf.Type.LONG)
.between("y", PredicateLeaf.Type.DECIMAL,
new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0"))
- .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+ .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
.nullSafeEquals("a", PredicateLeaf.Type.STRING,
new HiveVarchar("stinger", 100).toString())
.end()
@@ -413,10 +413,10 @@ public class TestSearchArgumentImpl {
sarg = SearchArgumentFactory.newBuilder()
.startNot()
.startOr()
- .isNull("x", PredicateLeaf.Type.INTEGER)
+ .isNull("x", PredicateLeaf.Type.LONG)
.between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"),
new HiveDecimalWritable("20.0"))
- .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
+ .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
.nullSafeEquals("a", PredicateLeaf.Type.STRING,
new HiveVarchar("stinger", 100).toString())
.end()
@@ -435,8 +435,8 @@ public class TestSearchArgumentImpl {
SearchArgument sarg =
SearchArgumentFactory.newBuilder()
.startAnd()
- .lessThan("x", PredicateLeaf.Type.INTEGER, new Integer((short) 22))
- .lessThan("x1", PredicateLeaf.Type.INTEGER, new Integer(22))
+ .lessThan("x", PredicateLeaf.Type.LONG, 22L)
+ .lessThan("x1", PredicateLeaf.Type.LONG, 22L)
.lessThanEquals("y", PredicateLeaf.Type.STRING,
new HiveChar("hi", 10).toString())
.equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22))
@@ -480,7 +480,7 @@ public class TestSearchArgumentImpl {
SearchArgument sarg =
SearchArgumentFactory.newBuilder()
.startAnd()
- .lessThan("x", PredicateLeaf.Type.INTEGER, "hi")
+ .lessThan("x", PredicateLeaf.Type.LONG, "hi")
.end()
.build();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
index 3a92565..dc71db4 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
@@ -47,8 +47,7 @@ public interface PredicateLeaf {
* The possible types for sargs.
*/
public static enum Type {
- INTEGER(Integer.class), // all of the integer types except long
- LONG(Long.class),
+ LONG(Long.class), // all of the integer types
FLOAT(Double.class), // float and double
STRING(String.class), // string, char, varchar
DATE(Date.class),
[13/28] hive git commit: HIVE-11704. Create errata file.
Posted by se...@apache.org.
HIVE-11704. Create errata file.
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d5977659
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d5977659
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d5977659
Branch: refs/heads/llap
Commit: d597765937ccf57e32ff4a79abb60ec69dbe84f6
Parents: 9763c9d
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Aug 31 16:33:20 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Sep 1 09:18:26 2015 -0700
----------------------------------------------------------------------
errata.txt | 9 +++++++++
1 file changed, 9 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/d5977659/errata.txt
----------------------------------------------------------------------
diff --git a/errata.txt b/errata.txt
new file mode 100644
index 0000000..70992ad
--- /dev/null
+++ b/errata.txt
@@ -0,0 +1,9 @@
+Commits with the wrong JIRA referenced:
+
+git commit branch jira url
+5a576b6fbf1680ab4dd8f275cad484a2614ef2c1 master HIVE-10391 https://issues.apache.org/jira/browse/HIVE-10391
+582f4e1bc39b9605d11f762480b29561a44688ae llap HIVE-10217 https://issues.apache.org/jira/browse/HIVE-10217
+8981f365bf0cf921bc0ac2ff8914df44ca2f7de7 master HIVE-10500 https://issues.apache.org/jira/browse/HIVE-10500
+09100831adff7589ee48e735a4beac6ebb25cb3e master HIVE-10885 https://issues.apache.org/jira/browse/HIVE-10885
+f3ab5fda6af57afff31c29ad048d906fd095d5fb branch-1.2 HIVE-10885 https://issues.apache.org/jira/browse/HIVE-10885
+dcf21cd6fa98fb5db01ef661bb3b9f94d9ca2d15 master HIVE-10021 https://issues.apache.org/jira/browse/HIVE-10021
[11/28] hive git commit: HIVE-11504: Predicate pushing down doesn't
work for float type for Parquet(Ferdinand Xu,
reviewed by Sergio Pena and Owen O'Malley)
Posted by se...@apache.org.
HIVE-11504: Predicate pushing down doesn't work for float type for Parquet(Ferdinand Xu, reviewed by Sergio Pena and Owen O'Malley)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8f930e58
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8f930e58
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8f930e58
Branch: refs/heads/llap
Commit: 8f930e588efd6ec937b9ad20fcf09030ae210ec3
Parents: a338f33
Author: Ferdinand Xu <ch...@intel.com>
Authored: Mon Aug 31 21:07:10 2015 -0400
Committer: Ferdinand Xu <ch...@intel.com>
Committed: Mon Aug 31 21:07:10 2015 -0400
----------------------------------------------------------------------
.../hive/ql/io/parquet/LeafFilterFactory.java | 29 +-
.../read/TestParquetFilterPredicate.java | 21 +
.../clientpositive/parquet_ppd_partition.q | 9 +
.../clientpositive/parquet_predicate_pushdown.q | 297 +++-
.../clientpositive/parquet_ppd_partition.q.out | 47 +
.../parquet_predicate_pushdown.q.out | 1309 +++++++++++++++++-
6 files changed, 1660 insertions(+), 52 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
index 1ceea6e..3e00612 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
@@ -31,6 +31,7 @@ import static org.apache.parquet.filter2.predicate.FilterApi.ltEq;
import static org.apache.parquet.filter2.predicate.FilterApi.binaryColumn;
import static org.apache.parquet.filter2.predicate.FilterApi.booleanColumn;
import static org.apache.parquet.filter2.predicate.FilterApi.doubleColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.floatColumn;
import static org.apache.parquet.filter2.predicate.FilterApi.intColumn;
public class LeafFilterFactory {
@@ -83,6 +84,25 @@ public class LeafFilterFactory {
}
}
+ class FloatFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object constant, String columnName) {
+ switch (op) {
+ case LESS_THAN:
+ return lt(floatColumn(columnName), ((Number) constant).floatValue());
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return eq(floatColumn(columnName),
+ (constant == null) ? null : ((Number) constant).floatValue());
+ case LESS_THAN_EQUALS:
+ return ltEq(FilterApi.floatColumn(columnName), ((Number) constant).floatValue());
+ default:
+ throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+ }
+ }
+ }
+
class DoubleFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
@Override
@@ -158,8 +178,13 @@ public class LeafFilterFactory {
} else {
return new LongFilterPredicateLeafBuilder();
}
- case FLOAT: // float and double
- return new DoubleFilterPredicateLeafBuilder();
+ case FLOAT:
+ if (parquetType.asPrimitiveType().getPrimitiveTypeName() ==
+ PrimitiveType.PrimitiveTypeName.FLOAT) {
+ return new FloatFilterPredicateLeafBuilder();
+ } else {
+ return new DoubleFilterPredicateLeafBuilder();
+ }
case STRING: // string, char, varchar
return new BinaryFilterPredicateLeafBuilder();
case BOOLEAN:
http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
index ac5c1a0..2be2596 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
@@ -48,4 +48,25 @@ public class TestParquetFilterPredicate {
String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))";
assertEquals(expected, p.toString());
}
+
+ @Test
+ public void testFilterFloatColumns() {
+ MessageType schema =
+ MessageTypeParser.parseMessageType("message test { required float a; required int32 b; }");
+ SearchArgument sarg = SearchArgumentFactory.newBuilder()
+ .startNot()
+ .startOr()
+ .isNull("a", PredicateLeaf.Type.FLOAT)
+ .between("a", PredicateLeaf.Type.FLOAT, 10.2, 20.3)
+ .in("b", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
+ .end()
+ .end()
+ .build();
+
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
+
+ String expected =
+ "and(and(not(eq(a, null)), not(and(lt(a, 20.3), not(lteq(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))";
+ assertEquals(expected, p.toString());
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/queries/clientpositive/parquet_ppd_partition.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_partition.q b/ql/src/test/queries/clientpositive/parquet_ppd_partition.q
new file mode 100644
index 0000000..08af84f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_partition.q
@@ -0,0 +1,9 @@
+SET hive.optimize.index.filter=true;
+SET hive.optimize.ppd=true;
+
+-- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET;
+ALTER TABLE part1 ADD PARTITION (p='p1');
+INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b');
+SELECT * FROM part1 WHERE p='p1';
+DROP TABLE part1 PURGE;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q b/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
index 08af84f..32767e8 100644
--- a/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
+++ b/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
@@ -1,9 +1,292 @@
-SET hive.optimize.index.filter=true;
SET hive.optimize.ppd=true;
--- Test predicate with partitioned columns
-CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET;
-ALTER TABLE part1 ADD PARTITION (p='p1');
-INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b');
-SELECT * FROM part1 WHERE p='p1';
-DROP TABLE part1 PURGE;
\ No newline at end of file
+-- SORT_QUERY_RESULTS
+CREATE TABLE tbl_pred(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS PARQUET;
+
+CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging;
+
+INSERT INTO TABLE tbl_pred select * from staging;
+
+-- no predicate case. the explain plan should not have filter expression in table scan operator
+
+SELECT SUM(HASH(t)) FROM tbl_pred;
+
+SET hive.optimize.index.filter=true;
+SELECT SUM(HASH(t)) FROM tbl_pred;
+SET hive.optimize.index.filter=false;
+
+EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred;
+SET hive.optimize.index.filter=false;
+
+-- all the following queries have predicates which are pushed down to table scan operator if
+-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
+-- in table scan operator.
+
+SELECT * FROM tbl_pred WHERE t<2 limit 1;
+SET hive.optimize.index.filter=true;
+SELECT * FROM tbl_pred WHERE t<2 limit 1;
+SET hive.optimize.index.filter=false;
+
+SELECT * FROM tbl_pred WHERE t>2 limit 1;
+SET hive.optimize.index.filter=true;
+SELECT * FROM tbl_pred WHERE t>2 limit 1;
+SET hive.optimize.index.filter=false;
+
+SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2;
+
+SET hive.optimize.index.filter=true;
+SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2;
+SET hive.optimize.index.filter=false;
+
+EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2;
+SET hive.optimize.index.filter=false;
+
+SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+ ;
+
+SET hive.optimize.index.filter=true;
+SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+ ;
+SET hive.optimize.index.filter=false;
+
+EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+ ;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+ ;
+SET hive.optimize.index.filter=false;
+
+SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s;
+
+set hive.optimize.index.filter=true;
+SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s;
+set hive.optimize.index.filter=false;
+
+EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s;
+SET hive.optimize.index.filter=false;
+
+SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3;
+
+SET hive.optimize.index.filter=true;
+SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3;
+SET hive.optimize.index.filter=false;
+
+EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3;
+SET hive.optimize.index.filter=false;
+
+SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3;
+
+SET hive.optimize.index.filter=true;
+SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3;
+SET hive.optimize.index.filter=false;
+
+SET hive.optimize.index.filter=true;
+SELECT f, i, b FROM tbl_pred
+ WHERE f IS NOT NULL
+ AND f < 123.2
+ AND f > 1.92
+ AND f >= 9.99
+ AND f BETWEEN 1.92 AND 123.2
+ AND i IS NOT NULL
+ AND i < 67627
+ AND i > 60627
+ AND i >= 60626
+ AND i BETWEEN 60626 AND 67627
+ AND b IS NOT NULL
+ AND b < 4294967861
+ AND b > 4294967261
+ AND b >= 4294967260
+ AND b BETWEEN 4294967261 AND 4294967861
+ SORT BY f DESC
+ LIMIT 3;
+SET hive.optimize.index.filter=false;
+
+EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3;
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3;
+SET hive.optimize.index.filter=false;
+
+
+SET hive.optimize.index.filter=true;
+EXPLAIN SELECT f, i, b FROM tbl_pred
+ WHERE f IS NOT NULL
+ AND f < 123.2
+ AND f > 1.92
+ AND f >= 9.99
+ AND f BETWEEN 1.92 AND 123.2
+ AND i IS NOT NULL
+ AND i < 67627
+ AND i > 60627
+ AND i >= 60626
+ AND i BETWEEN 60626 AND 67627
+ AND b IS NOT NULL
+ AND b < 4294967861
+ AND b > 4294967261
+ AND b >= 4294967260
+ AND b BETWEEN 4294967261 AND 4294967861
+ SORT BY f DESC
+ LIMIT 3;
+SET hive.optimize.index.filter=false;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out b/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out
new file mode 100644
index 0000000..4186618
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out
@@ -0,0 +1,47 @@
+PREHOOK: query: -- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part1
+POSTHOOK: query: -- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part1
+PREHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@part1
+POSTHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@part1
+POSTHOOK: Output: default@part1@p=p1
+PREHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@part1@p=p1
+POSTHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@part1@p=p1
+POSTHOOK: Lineage: part1 PARTITION(p=p1).content SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: part1 PARTITION(p=p1).id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM part1 WHERE p='p1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part1
+PREHOOK: Input: default@part1@p=p1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM part1 WHERE p='p1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part1
+POSTHOOK: Input: default@part1@p=p1
+#### A masked pattern was here ####
+1 a p1
+2 b p1
+PREHOOK: query: DROP TABLE part1 PURGE
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@part1
+PREHOOK: Output: default@part1
+POSTHOOK: query: DROP TABLE part1 PURGE
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@part1
+POSTHOOK: Output: default@part1
http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
index 4186618..1dc2937 100644
--- a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
@@ -1,47 +1,1270 @@
-PREHOOK: query: -- Test predicate with partitioned columns
-CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+PREHOOK: query: -- SORT_QUERY_RESULTS
+CREATE TABLE tbl_pred(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS PARQUET
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
-PREHOOK: Output: default@part1
-POSTHOOK: query: -- Test predicate with partitioned columns
-CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+PREHOOK: Output: default@tbl_pred
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+CREATE TABLE tbl_pred(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS PARQUET
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
-POSTHOOK: Output: default@part1
-PREHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
-PREHOOK: type: ALTERTABLE_ADDPARTS
-PREHOOK: Output: default@part1
-POSTHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
-POSTHOOK: type: ALTERTABLE_ADDPARTS
-POSTHOOK: Output: default@part1
-POSTHOOK: Output: default@part1@p=p1
-PREHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
-PREHOOK: type: QUERY
-PREHOOK: Input: default@values__tmp__table__1
-PREHOOK: Output: default@part1@p=p1
-POSTHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@values__tmp__table__1
-POSTHOOK: Output: default@part1@p=p1
-POSTHOOK: Lineage: part1 PARTITION(p=p1).content SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
-POSTHOOK: Lineage: part1 PARTITION(p=p1).id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-PREHOOK: query: SELECT * FROM part1 WHERE p='p1'
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part1
-PREHOOK: Input: default@part1@p=p1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT * FROM part1 WHERE p='p1'
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part1
-POSTHOOK: Input: default@part1@p=p1
-#### A masked pattern was here ####
-1 a p1
-2 b p1
-PREHOOK: query: DROP TABLE part1 PURGE
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@part1
-PREHOOK: Output: default@part1
-POSTHOOK: query: DROP TABLE part1 PURGE
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@part1
-POSTHOOK: Output: default@part1
+POSTHOOK: Output: default@tbl_pred
+PREHOOK: query: CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@staging
+POSTHOOK: query: CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@staging
+PREHOOK: query: INSERT INTO TABLE tbl_pred select * from staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@tbl_pred
+POSTHOOK: query: INSERT INTO TABLE tbl_pred select * from staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@tbl_pred
+POSTHOOK: Lineage: tbl_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: tbl_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: tbl_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+PREHOOK: query: -- no predicate case. the explain plan should not have filter expression in table scan operator
+
+SELECT SUM(HASH(t)) FROM tbl_pred
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: -- no predicate case. the explain plan should not have filter expression in table scan operator
+
+SELECT SUM(HASH(t)) FROM tbl_pred
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+62430
+PREHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+62430
+PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(t) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(t) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- all the following queries have predicates which are pushed down to table scan operator if
+-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
+-- in table scan operator.
+
+SELECT * FROM tbl_pred WHERE t<2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: -- all the following queries have predicates which are pushed down to table scan operator if
+-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
+-- in table scan operator.
+
+SELECT * FROM tbl_pred WHERE t<2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-3 467 65575 4294967437 81.64 23.53 true tom hernandez 2013-03-01 09:11:58.703188 32.85 study skills
+PREHOOK: query: SELECT * FROM tbl_pred WHERE t<2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM tbl_pred WHERE t<2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-3 467 65575 4294967437 81.64 23.53 true tom hernandez 2013-03-01 09:11:58.703188 32.85 study skills
+PREHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty
+PREHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty
+PREHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-8
+PREHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-8
+PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean)
+ Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(t) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ filterExpr: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean)
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean)
+ Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(t) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-1 bob laertes
+-1 bob young
+PREHOOK: query: SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+-1 bob laertes
+-1 bob young
+PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((t = -1) and s is not null) and (s like 'bob%')) (type: boolean)
+ Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: -1 (type: tinyint), s (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE t <=> -1
+ AND s IS NOT NULL
+ AND s LIKE 'bob%'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ filterExpr: (((t = -1) and s is not null) and (s like 'bob%')) (type: boolean)
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((t = -1) and s is not null) and (s like 'bob%')) (type: boolean)
+ Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: -1 (type: tinyint), s (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+26 bob ovid
+26 bob quirinius
+27 bob ovid
+PREHOOK: query: SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+26 bob ovid
+26 bob quirinius
+27 bob ovid
+PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((s is not null and (s like 'bob%')) and (not (t) IN (-1, -2, -3))) and t BETWEEN 25 AND 30) (type: boolean)
+ Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t (type: tinyint), s (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
+ WHERE s IS NOT NULL
+ AND s LIKE 'bob%'
+ AND t NOT IN (-1,-2,-3)
+ AND t BETWEEN 25 AND 30
+ SORT BY t,s
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ filterExpr: (((s is not null and (s like 'bob%')) and (not (t) IN (-1, -2, -3))) and t BETWEEN 25 AND 30) (type: boolean)
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((s is not null and (s like 'bob%')) and (not (t) IN (-1, -2, -3))) and t BETWEEN 25 AND 30) (type: boolean)
+ Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t (type: tinyint), s (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+101 327 11.48 gabriella ellison
+15 334 11.12 jessica robinson
+7 320 11.54 bob ellison
+PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+101 327 11.48 gabriella ellison
+15 334 11.12 jessica robinson
+7 320 11.54 bob ellison
+PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+ Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: -
+ Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 3
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ ORDER BY s DESC
+ LIMIT 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+ Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: -
+ Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 3
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+15 334 11.12 jessica robinson
+PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+15 334 11.12 jessica robinson
+PREHOOK: query: SELECT f, i, b FROM tbl_pred
+ WHERE f IS NOT NULL
+ AND f < 123.2
+ AND f > 1.92
+ AND f >= 9.99
+ AND f BETWEEN 1.92 AND 123.2
+ AND i IS NOT NULL
+ AND i < 67627
+ AND i > 60627
+ AND i >= 60626
+ AND i BETWEEN 60626 AND 67627
+ AND b IS NOT NULL
+ AND b < 4294967861
+ AND b > 4294967261
+ AND b >= 4294967260
+ AND b BETWEEN 4294967261 AND 4294967861
+ SORT BY f DESC
+ LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f, i, b FROM tbl_pred
+ WHERE f IS NOT NULL
+ AND f < 123.2
+ AND f > 1.92
+ AND f >= 9.99
+ AND f BETWEEN 1.92 AND 123.2
+ AND i IS NOT NULL
+ AND i < 67627
+ AND i > 60627
+ AND i >= 60626
+ AND i BETWEEN 60626 AND 67627
+ AND b IS NOT NULL
+ AND b < 4294967861
+ AND b > 4294967261
+ AND b >= 4294967260
+ AND b BETWEEN 4294967261 AND 4294967861
+ SORT BY f DESC
+ LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_pred
+#### A masked pattern was here ####
+99.68 65658 4294967503
+99.91 65763 4294967324
+99.92 65661 4294967404
+PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((((((t > 10) and (t <> 101)) and (d >= 10.0)) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: -
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: -
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 3
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
+ WHERE t > 10
+ AND t <> 101
+ AND d >= ROUND(9.99)
+ AND d < 12
+ AND t IS NOT NULL
+ AND s LIKE '%son'
+ AND s NOT LIKE '%car%'
+ AND t > 0
+ AND si BETWEEN 300 AND 400
+ SORT BY s DESC
+ LIMIT 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ filterExpr: ((((((((t > 10) and (t <> 101)) and (d >= 10.0)) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((((((t > 10) and (t <> 101)) and (d >= 10.0)) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: -
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: -
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 3
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN SELECT f, i, b FROM tbl_pred
+ WHERE f IS NOT NULL
+ AND f < 123.2
+ AND f > 1.92
+ AND f >= 9.99
+ AND f BETWEEN 1.92 AND 123.2
+ AND i IS NOT NULL
+ AND i < 67627
+ AND i > 60627
+ AND i >= 60626
+ AND i BETWEEN 60626 AND 67627
+ AND b IS NOT NULL
+ AND b < 4294967861
+ AND b > 4294967261
+ AND b >= 4294967260
+ AND b BETWEEN 4294967261 AND 4294967861
+ SORT BY f DESC
+ LIMIT 3
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT f, i, b FROM tbl_pred
+ WHERE f IS NOT NULL
+ AND f < 123.2
+ AND f > 1.92
+ AND f >= 9.99
+ AND f BETWEEN 1.92 AND 123.2
+ AND i IS NOT NULL
+ AND i < 67627
+ AND i > 60627
+ AND i >= 60626
+ AND i BETWEEN 60626 AND 67627
+ AND b IS NOT NULL
+ AND b < 4294967861
+ AND b > 4294967261
+ AND b >= 4294967260
+ AND b BETWEEN 4294967261 AND 4294967861
+ SORT BY f DESC
+ LIMIT 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tbl_pred
+ filterExpr: ((((((((((((f < 123.2) and (f > 1.92)) and (f >= 9.99)) and f BETWEEN 1.92 AND 123.2) and (i < 67627)) and (i > 60627)) and (i >= 60626)) and i BETWEEN 60626 AND 67627) and (b < 4294967861)) and (b > 4294967261)) and (b >= 4294967260)) and b BETWEEN 4294967261 AND 4294967861) (type: boolean)
+ Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((((((((((f < 123.2) and (f > 1.92)) and (f >= 9.99)) and f BETWEEN 1.92 AND 123.2) and (i < 67627)) and (i > 60627)) and (i >= 60626)) and i BETWEEN 60626 AND 67627) and (b < 4294967861)) and (b > 4294967261)) and (b >= 4294967260)) and b BETWEEN 4294967261 AND 4294967861) (type: boolean)
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: f (type: float), i (type: int), b (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: float)
+ sort order: -
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: float)
+ sort order: -
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 3
+ Processor Tree:
+ ListSink
+
[19/28] hive git commit: HIVE-11698: Add additional test for
PointLookupOptimizer (Jesus Camacho Rodriguez,
reviewed by Hari Sankar Sivarama Subramaniyan)
Posted by se...@apache.org.
HIVE-11698: Add additional test for PointLookupOptimizer (Jesus Camacho Rodriguez, reviewed by Hari Sankar Sivarama Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dbdd6116
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dbdd6116
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dbdd6116
Branch: refs/heads/llap
Commit: dbdd6116bd9e25bdb5112d21fd40ec09d7f39adc
Parents: f2056a1
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Sep 2 08:35:27 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Wed Sep 2 08:35:37 2015 +0200
----------------------------------------------------------------------
.../test/queries/clientpositive/pointlookup3.q | 41 +
.../results/clientpositive/pointlookup3.q.out | 1394 ++++++++++++++++++
2 files changed, 1435 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/dbdd6116/ql/src/test/queries/clientpositive/pointlookup3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup3.q b/ql/src/test/queries/clientpositive/pointlookup3.q
new file mode 100644
index 0000000..3daa94b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/pointlookup3.q
@@ -0,0 +1,41 @@
+drop table pcr_t1;
+
+create table pcr_t1 (key int, value string) partitioned by (ds1 string, ds2 string);
+insert overwrite table pcr_t1 partition (ds1='2000-04-08', ds2='2001-04-08') select * from src where key < 20 order by key;
+insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') select * from src where key < 20 order by key;
+insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key;
+
+set hive.optimize.point.lookup.min=2;
+set hive.optimize.point.lookup.extract=true;
+
+explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and key=1) or (ds1='2000-04-09' and key=2)
+order by key, value, ds1, ds2;
+
+explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-08' and key=2)
+order by key, value, ds1, ds2;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds2='2001-04-08'
+order by t1.key;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds1='2000-04-09'
+order by t1.key;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+where (t1.ds1='2000-04-08' and t2.key=1) or (t1.ds1='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds1;
+
+drop table pcr_t1;
http://git-wip-us.apache.org/repos/asf/hive/blob/dbdd6116/ql/src/test/results/clientpositive/pointlookup3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out
new file mode 100644
index 0000000..4cfb97e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/pointlookup3.q.out
@@ -0,0 +1,1394 @@
+PREHOOK: query: drop table pcr_t1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds1 string, ds2 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@pcr_t1
+POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds1 string, ds2 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@pcr_t1
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-08', ds2='2001-04-08') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds1=2000-04-08/ds2=2001-04-08
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-08', ds2='2001-04-08') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds1=2000-04-08/ds2=2001-04-08
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-08,ds2=2001-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-08,ds2=2001-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds1=2000-04-09/ds2=2001-04-09
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds1=2000-04-09/ds2=2001-04-09
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-09,ds2=2001-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-09,ds2=2001-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds1=2000-04-10/ds2=2001-04-10
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds1=2000-04-10/ds2=2001-04-10
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-10,ds2=2001-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-10,ds2=2001-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and key=1) or (ds1='2000-04-09' and key=2)
+order by key, value, ds1, ds2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and key=1) or (ds1='2000-04-09' and key=2)
+order by key, value, ds1, ds2
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ pcr_t1
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ key
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ value
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds1
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds2
+ TOK_WHERE
+ or
+ and
+ =
+ TOK_TABLE_OR_COL
+ ds1
+ '2000-04-08'
+ =
+ TOK_TABLE_OR_COL
+ key
+ 1
+ and
+ =
+ TOK_TABLE_OR_COL
+ ds1
+ '2000-04-09'
+ =
+ TOK_TABLE_OR_COL
+ key
+ 2
+ TOK_ORDERBY
+ TOK_TABSORTCOLNAMEASC
+ TOK_TABLE_OR_COL
+ key
+ TOK_TABSORTCOLNAMEASC
+ TOK_TABLE_OR_COL
+ value
+ TOK_TABSORTCOLNAMEASC
+ TOK_TABLE_OR_COL
+ ds1
+ TOK_TABSORTCOLNAMEASC
+ TOK_TABLE_OR_COL
+ ds2
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: pcr_t1
+ Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (struct(ds1,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ sort order: ++++
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds2=2001-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds1 2000-04-08
+ ds2 2001-04-08
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ numFiles 1
+ numRows 20
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ rawDataSize 160
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.pcr_t1
+ name: default.pcr_t1
+#### A masked pattern was here ####
+ Partition
+ base file name: ds2=2001-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds1 2000-04-09
+ ds2 2001-04-09
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ numFiles 1
+ numRows 20
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ rawDataSize 160
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.pcr_t1
+ name: default.pcr_t1
+ Truncated Path -> Alias:
+ /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [pcr_t1]
+ /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [pcr_t1]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types int:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-08' and key=2)
+order by key, value, ds1, ds2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select key, value, ds1, ds2
+from pcr_t1
+where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-08' and key=2)
+order by key, value, ds1, ds2
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ pcr_t1
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ key
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ value
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds1
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds2
+ TOK_WHERE
+ or
+ and
+ and
+ =
+ TOK_TABLE_OR_COL
+ ds1
+ '2000-04-08'
+ =
+ TOK_TABLE_OR_COL
+ ds2
+ '2001-04-08'
+ =
+ TOK_TABLE_OR_COL
+ key
+ 1
+ and
+ and
+ =
+ TOK_TABLE_OR_COL
+ ds1
+ '2000-04-09'
+ =
+ TOK_TABLE_OR_COL
+ ds2
+ '2001-04-08'
+ =
+ TOK_TABLE_OR_COL
+ key
+ 2
+ TOK_ORDERBY
+ TOK_TABSORTCOLNAMEASC
+ TOK_TABLE_OR_COL
+ key
+ TOK_TABSORTCOLNAMEASC
+ TOK_TABLE_OR_COL
+ value
+ TOK_TABSORTCOLNAMEASC
+ TOK_TABLE_OR_COL
+ ds1
+ TOK_TABSORTCOLNAMEASC
+ TOK_TABLE_OR_COL
+ ds2
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: pcr_t1
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (key = 1) (type: boolean)
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), ds1 (type: string)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 1 (type: int), _col1 (type: string), _col2 (type: string), '2001-04-08' (type: string)
+ sort order: ++++
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds2=2001-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds1 2000-04-08
+ ds2 2001-04-08
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ numFiles 1
+ numRows 20
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ rawDataSize 160
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.pcr_t1
+ name: default.pcr_t1
+ Truncated Path -> Alias:
+ /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [pcr_t1]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: 1 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types int:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds2='2001-04-08'
+order by t1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds2='2001-04-08'
+order by t1.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_JOIN
+ TOK_TABREF
+ TOK_TABNAME
+ pcr_t1
+ t1
+ TOK_TABREF
+ TOK_TABNAME
+ pcr_t1
+ t2
+ and
+ and
+ =
+ .
+ TOK_TABLE_OR_COL
+ t1
+ key
+ .
+ TOK_TABLE_OR_COL
+ t2
+ key
+ =
+ .
+ TOK_TABLE_OR_COL
+ t1
+ ds1
+ '2000-04-08'
+ =
+ .
+ TOK_TABLE_OR_COL
+ t2
+ ds2
+ '2001-04-08'
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ TOK_ALLCOLREF
+ TOK_ORDERBY
+ TOK_TABSORTCOLNAMEASC
+ .
+ TOK_TABLE_OR_COL
+ t1
+ key
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string), ds2 (type: string)
+ auto parallelism: false
+ TableScan
+ alias: t2
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string), ds1 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds2=2001-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds1 2000-04-08
+ ds2 2001-04-08
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ numFiles 1
+ numRows 20
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ rawDataSize 160
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.pcr_t1
+ name: default.pcr_t1
+ Truncated Path -> Alias:
+ /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [t1, t2]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col7, _col8, _col9
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col3,_col4,_col5,_col6
+ columns.types int,string,string,int,string,string
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10003
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col3,_col4,_col5,_col6
+ columns.types int,string,string,int,string,string
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col3,_col4,_col5,_col6
+ columns.types int,string,string,int,string,string
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: string), '2001-04-08' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+ columns.types int:string:string:string:int:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds1='2000-04-09'
+order by t1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds1='2000-04-08' and t2.ds1='2000-04-09'
+order by t1.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_JOIN
+ TOK_TABREF
+ TOK_TABNAME
+ pcr_t1
+ t1
+ TOK_TABREF
+ TOK_TABNAME
+ pcr_t1
+ t2
+ and
+ and
+ =
+ .
+ TOK_TABLE_OR_COL
+ t1
+ key
+ .
+ TOK_TABLE_OR_COL
+ t2
+ key
+ =
+ .
+ TOK_TABLE_OR_COL
+ t1
+ ds1
+ '2000-04-08'
+ =
+ .
+ TOK_TABLE_OR_COL
+ t2
+ ds1
+ '2000-04-09'
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ TOK_ALLCOLREF
+ TOK_ORDERBY
+ TOK_TABSORTCOLNAMEASC
+ .
+ TOK_TABLE_OR_COL
+ t1
+ key
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string), ds2 (type: string)
+ auto parallelism: false
+ TableScan
+ alias: t2
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string), ds2 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds2=2001-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds1 2000-04-08
+ ds2 2001-04-08
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ numFiles 1
+ numRows 20
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ rawDataSize 160
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.pcr_t1
+ name: default.pcr_t1
+#### A masked pattern was here ####
+ Partition
+ base file name: ds2=2001-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds1 2000-04-09
+ ds2 2001-04-09
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ numFiles 1
+ numRows 20
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ rawDataSize 160
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.pcr_t1
+ name: default.pcr_t1
+ Truncated Path -> Alias:
+ /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [t1]
+ /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [t2]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col7, _col8, _col10
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col7 (type: int), _col8 (type: string), _col10 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col7
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col3,_col4,_col5,_col7
+ columns.types int,string,string,int,string,string
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10003
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col3,_col4,_col5,_col7
+ columns.types int,string,string,int,string,string
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col3,_col4,_col5,_col7
+ columns.types int,string,string,int,string,string
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), '2000-04-09' (type: string), VALUE._col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+ columns.types int:string:string:string:int:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+where (t1.ds1='2000-04-08' and t2.key=1) or (t1.ds1='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+where (t1.ds1='2000-04-08' and t2.key=1) or (t1.ds1='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_JOIN
+ TOK_TABREF
+ TOK_TABNAME
+ pcr_t1
+ t1
+ TOK_TABREF
+ TOK_TABNAME
+ pcr_t1
+ t2
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ TOK_ALLCOLREF
+ TOK_WHERE
+ or
+ and
+ =
+ .
+ TOK_TABLE_OR_COL
+ t1
+ ds1
+ '2000-04-08'
+ =
+ .
+ TOK_TABLE_OR_COL
+ t2
+ key
+ 1
+ and
+ =
+ .
+ TOK_TABLE_OR_COL
+ t1
+ ds1
+ '2000-04-09'
+ =
+ .
+ TOK_TABLE_OR_COL
+ t2
+ key
+ 2
+ TOK_ORDERBY
+ TOK_TABSORTCOLNAMEASC
+ .
+ TOK_TABLE_OR_COL
+ t2
+ key
+ TOK_TABSORTCOLNAMEASC
+ .
+ TOK_TABLE_OR_COL
+ t2
+ value
+ TOK_TABSORTCOLNAMEASC
+ .
+ TOK_TABLE_OR_COL
+ t1
+ ds1
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
+ auto parallelism: false
+ TableScan
+ alias: t2
+ Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds2=2001-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds1 2000-04-08
+ ds2 2001-04-08
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ numFiles 1
+ numRows 20
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ rawDataSize 160
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.pcr_t1
+ name: default.pcr_t1
+#### A masked pattern was here ####
+ Partition
+ base file name: ds2=2001-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds1 2000-04-09
+ ds2 2001-04-09
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ numFiles 1
+ numRows 20
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ rawDataSize 160
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.pcr_t1
+ name: default.pcr_t1
+#### A masked pattern was here ####
+ Partition
+ base file name: ds2=2001-04-10
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds1 2000-04-10
+ ds2 2001-04-10
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ numFiles 1
+ numRows 20
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ rawDataSize 160
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.pcr_t1
+ partition_columns ds1/ds2
+ partition_columns.types string:string
+ serialization.ddl struct pcr_t1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.pcr_t1
+ name: default.pcr_t1
+ Truncated Path -> Alias:
+ /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [t1, t2]
+ /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [t1, t2]
+ /pcr_t1/ds1=2000-04-10/ds2=2001-04-10 [t2]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((_col2) IN ('2000-04-08', '2000-04-09') and (struct(_col7,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean)
+ Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+ columns.types int,string,string,string,int,string,string,string
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
+ sort order: +++
+ Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10003
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+ columns.types int,string,string,string,int,string,string,string
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+ columns.types int,string,string,string,int,string,string,string
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+ columns.types int:string:string:string:int:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: drop table pcr_t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Output: default@pcr_t1
+POSTHOOK: query: drop table pcr_t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Output: default@pcr_t1
[12/28] hive git commit: Merge branch 'master' of
https://git-wip-us.apache.org/repos/asf/hive
Posted by se...@apache.org.
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9763c9dd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9763c9dd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9763c9dd
Branch: refs/heads/llap
Commit: 9763c9dd31bd5939db3ca50e75bb97955b411f6d
Parents: da95f63 8f930e5
Author: Dmitry Tolpeko <dm...@gmail.com>
Authored: Tue Sep 1 05:00:33 2015 -0700
Committer: Dmitry Tolpeko <dm...@gmail.com>
Committed: Tue Sep 1 05:00:33 2015 -0700
----------------------------------------------------------------------
.../java/org/apache/hive/beeline/BeeLine.java | 13 +-
.../org/apache/hive/beeline/BeeLineOpts.java | 12 +-
.../org/apache/hive/beeline/HiveSchemaTool.java | 14 +-
.../org/apache/hadoop/hive/conf/HiveConf.java | 76 +-
data/conf/tez/hive-site.xml | 9 +
.../hive/hbase/HiveHBaseInputFormatUtil.java | 50 +-
.../queries/positive/hbase_null_first_col.q | 22 +
.../results/positive/hbase_null_first_col.q.out | 109 ++
.../vectorization/VectorizationBench.java | 93 +
.../org/apache/hive/jdbc/HiveConnection.java | 30 +-
jdbc/src/java/org/apache/hive/jdbc/Utils.java | 117 +-
.../hive/jdbc/ZooKeeperHiveClientHelper.java | 104 +-
.../hadoop/hive/metastore/HiveAlterHandler.java | 2 +-
.../hadoop/hive/metastore/HiveMetaStore.java | 6 +-
.../hive/metastore/MetaStoreDirectSql.java | 40 +-
.../hadoop/hive/metastore/MetaStoreUtils.java | 17 +-
.../hadoop/hive/ql/exec/FunctionRegistry.java | 61 +-
.../hadoop/hive/ql/exec/ScriptOperator.java | 85 +-
.../hive/ql/exec/tez/TezSessionState.java | 2 +
.../ql/exec/vector/VectorHashKeyWrapper.java | 2 +-
.../ql/exec/vector/VectorizationContext.java | 7 +-
.../expressions/CastStringGroupToString.java | 40 +
.../ql/exec/vector/expressions/ColAndCol.java | 34 +-
.../ql/exec/vector/expressions/ColOrCol.java | 42 +-
.../exec/vector/expressions/CuckooSetBytes.java | 2 +-
.../hive/ql/exec/vector/expressions/NotCol.java | 14 +-
...VectorMapJoinInnerBigOnlyStringOperator.java | 4 +-
.../VectorMapJoinInnerStringOperator.java | 4 +-
.../VectorMapJoinLeftSemiStringOperator.java | 4 +-
.../VectorMapJoinOuterStringOperator.java | 4 +-
.../apache/hadoop/hive/ql/io/orc/FileDump.java | 48 +-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 18 +-
.../apache/hadoop/hive/ql/io/orc/Reader.java | 6 +
.../hadoop/hive/ql/io/orc/ReaderImpl.java | 281 ++-
.../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 2 -
.../hive/ql/io/orc/TreeReaderFactory.java | 18 +-
.../hive/ql/io/parquet/LeafFilterFactory.java | 43 +-
.../read/ParquetFilterPredicateConverter.java | 35 +-
.../hive/ql/io/sarg/ConvertAstToSearchArg.java | 3 -
.../hadoop/hive/ql/lib/DefaultGraphWalker.java | 80 +-
.../hadoop/hive/ql/lib/ForwardWalker.java | 33 +-
.../ql/metadata/SessionHiveMetaStoreClient.java | 2 +-
.../hadoop/hive/ql/optimizer/ColumnPruner.java | 6 +-
.../hive/ql/optimizer/ConstantPropagate.java | 10 +-
.../optimizer/ConstantPropagateProcFactory.java | 100 +-
.../hadoop/hive/ql/optimizer/IndexUtils.java | 13 +-
.../hadoop/hive/ql/optimizer/Optimizer.java | 17 +-
.../hive/ql/optimizer/PointLookupOptimizer.java | 102 +-
.../calcite/translator/HiveOpConverter.java | 53 +-
.../ql/parse/ColumnStatsSemanticAnalyzer.java | 6 +-
.../hadoop/hive/ql/parse/LeadLagInfo.java | 4 +-
.../hive/ql/parse/LoadSemanticAnalyzer.java | 38 +-
.../hadoop/hive/ql/plan/ExprNodeDesc.java | 23 +-
.../hadoop/hive/ql/ppd/ExprWalkerInfo.java | 136 +-
.../hive/ql/ppd/ExprWalkerProcFactory.java | 92 +-
.../hadoop/hive/ql/ppd/OpProcFactory.java | 11 +-
.../hadoop/hive/ql/udf/generic/GenericUDF.java | 14 +-
.../ql/udf/generic/GenericUDFBaseNumeric.java | 4 +-
.../hive/ql/udf/generic/GenericUDFBasePad.java | 8 +-
.../hive/ql/udf/generic/GenericUDFNvl.java | 2 +-
.../hive/ql/udf/generic/GenericUDFOPAnd.java | 4 +
.../hive/ql/udf/generic/GenericUDFOPEqual.java | 4 +
.../generic/GenericUDFOPEqualOrGreaterThan.java | 4 +
.../generic/GenericUDFOPEqualOrLessThan.java | 4 +
.../ql/udf/generic/GenericUDFOPGreaterThan.java | 4 +
.../ql/udf/generic/GenericUDFOPLessThan.java | 4 +
.../ql/udf/generic/GenericUDFOPNotEqual.java | 5 +
.../ql/udf/generic/GenericUDFOPNotNull.java | 4 +
.../hive/ql/udf/generic/GenericUDFOPNull.java | 4 +
.../hive/ql/udf/generic/GenericUDFOPOr.java | 4 +
ql/src/main/resources/tez-container-log4j2.xml | 49 +
.../apache/hadoop/hive/ql/TestTxnCommands2.java | 85 +-
.../hadoop/hive/ql/exec/TestOperators.java | 16 +
.../hive/ql/io/orc/TestInputOutputFormat.java | 4 +-
.../hadoop/hive/ql/io/orc/TestOrcFile.java | 10 +-
.../hive/ql/io/orc/TestRecordReaderImpl.java | 42 +-
.../parquet/TestParquetRecordReaderWrapper.java | 50 +-
.../read/TestParquetFilterPredicate.java | 27 +-
.../ql/io/sarg/TestConvertAstToSearchArg.java | 128 +-
.../hive/ql/io/sarg/TestSearchArgumentImpl.java | 22 +-
.../queries/clientnegative/load_orc_negative3.q | 6 +
.../queries/clientnegative/nvl_mismatch_type.q | 20 +
.../clientpositive/cbo_rp_outer_join_ppr.q | 40 +
.../clientpositive/columnstats_quoting.q | 8 +
.../queries/clientpositive/flatten_and_or.q | 4 +-
.../queries/clientpositive/folder_predicate.q | 32 +
.../test/queries/clientpositive/load_orc_part.q | 4 +
.../clientpositive/parquet_ppd_partition.q | 9 +
.../clientpositive/parquet_predicate_pushdown.q | 297 +++-
.../test/queries/clientpositive/pointlookup.q | 59 +
.../test/queries/clientpositive/pointlookup2.q | 51 +
.../queries/clientpositive/selectDistinctStar.q | 2 +
ql/src/test/queries/clientpositive/structin.q | 6 +
.../clientpositive/unionall_unbalancedppd.q | 3 +
.../clientpositive/vector_cast_constant.q | 4 +-
.../queries/clientpositive/vectorized_casts.q | 6 +
.../clientnegative/char_pad_convert_fail0.q.out | 2 +-
.../clientnegative/char_pad_convert_fail1.q.out | 2 +-
.../clientnegative/char_pad_convert_fail3.q.out | 2 +-
.../clientnegative/load_orc_negative3.q.out | 25 +
.../clientnegative/nvl_mismatch_type.q.out | 43 +
.../alter_partition_coltype.q.out | 12 +-
.../clientpositive/annotate_stats_filter.q.out | 18 +-
.../cbo_rp_outer_join_ppr.q.java1.7.out | 855 +++++++++
.../clientpositive/columnstats_quoting.q.out | 114 ++
.../results/clientpositive/decimal_udf.q.out | 18 +-
.../clientpositive/filter_cond_pushdown.q.out | 32 +-
.../clientpositive/filter_join_breaktask.q.out | 12 +-
.../results/clientpositive/flatten_and_or.q.out | 8 +-
.../test/results/clientpositive/fold_when.q.out | 16 +-
.../clientpositive/folder_predicate.q.out | 368 ++++
.../clientpositive/input_testxpath2.q.out | 2 +-
.../list_bucket_query_oneskew_3.q.out | 6 +-
.../results/clientpositive/load_orc_part.q.out | 18 +
.../clientpositive/parquet_ppd_partition.q.out | 47 +
.../parquet_predicate_pushdown.q.out | 1309 +++++++++++++-
ql/src/test/results/clientpositive/pcr.q.out | 12 +-
.../results/clientpositive/pointlookup.q.out | 198 +++
.../results/clientpositive/pointlookup2.q.out | 1647 ++++++++++++++++++
.../results/clientpositive/ppd_transform.q.out | 12 +-
.../clientpositive/rand_partitionpruner3.q.out | 12 +-
.../clientpositive/selectDistinctStar.q.out | 8 +-
.../clientpositive/select_unquote_not.q.out | 8 +-
.../spark/filter_join_breaktask.q.out | 12 +-
.../test/results/clientpositive/spark/pcr.q.out | 12 +-
.../clientpositive/spark/ppd_transform.q.out | 12 +-
.../spark/vector_cast_constant.q.java1.7.out | 41 +-
.../clientpositive/spark/vectorized_case.q.out | 2 +-
.../test/results/clientpositive/structin.q.out | 44 +
.../clientpositive/tez/explainuser_1.q.out | 2 +-
.../tez/filter_join_breaktask.q.out | 12 +-
.../clientpositive/tez/selectDistinctStar.q.out | 8 +-
.../tez/vector_cast_constant.q.java1.7.out | 21 +-
.../tez/vector_char_mapjoin1.q.out | 1 +
.../clientpositive/tez/vector_decimal_udf.q.out | 24 +-
.../tez/vector_varchar_mapjoin1.q.out | 1 +
.../clientpositive/tez/vectorized_case.q.out | 2 +-
.../clientpositive/tez/vectorized_casts.q.out | 99 +-
.../clientpositive/udf_isnull_isnotnull.q.out | 2 +-
.../test/results/clientpositive/udf_size.q.out | 2 +-
.../clientpositive/unionall_unbalancedppd.q.out | 14 +-
.../vector_cast_constant.q.java1.7.out | 32 +-
.../clientpositive/vector_char_mapjoin1.q.out | 1 +
.../clientpositive/vector_decimal_udf.q.out | 24 +-
.../vector_varchar_mapjoin1.q.out | 1 +
.../clientpositive/vectorized_case.q.out | 9 +-
.../clientpositive/vectorized_casts.q.out | 66 +-
.../serde2/avro/AvroLazyObjectInspector.java | 19 +-
.../hadoop/hive/serde2/avro/InstanceCache.java | 17 +-
.../objectinspector/ObjectInspectorFactory.java | 61 +-
.../ReflectionStructObjectInspector.java | 60 +-
.../StandardStructObjectInspector.java | 7 +-
.../ThriftUnionObjectInspector.java | 28 +-
.../avro/TestAvroLazyObjectInspector.java | 59 +
.../TestReflectionObjectInspectors.java | 71 +-
.../apache/hive/service/server/HiveServer2.java | 74 +-
shims/0.20S/pom.xml | 8 +-
.../hadoop/hive/shims/Hadoop20SShims.java | 35 +-
shims/0.23/pom.xml | 21 +-
.../apache/hadoop/hive/shims/Hadoop23Shims.java | 79 +-
.../apache/hadoop/hive/shims/HadoopShims.java | 22 +
.../hadoop/hive/shims/HadoopShimsSecure.java | 32 +
.../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 3 +-
163 files changed, 7857 insertions(+), 1241 deletions(-)
----------------------------------------------------------------------
[28/28] hive git commit: HIVE-11730 : LLAP: merge master into branch
(Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-11730 : LLAP: merge master into branch (Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/772c4b90
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/772c4b90
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/772c4b90
Branch: refs/heads/llap
Commit: 772c4b90f99dd6adafbc10e1bad5c5a40c803b7a
Parents: 0a20369 bb4f5e7
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Sep 3 19:53:51 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Sep 3 19:53:51 2015 -0700
----------------------------------------------------------------------
data/conf/hive-log4j2.xml | 3 -
errata.txt | 10 +
.../antlr4/org/apache/hive/hplsql/Hplsql.g4 | 85 +-
.../java/org/apache/hive/hplsql/Column.java | 65 +
.../main/java/org/apache/hive/hplsql/Exec.java | 142 +-
.../java/org/apache/hive/hplsql/Expression.java | 6 +
.../main/java/org/apache/hive/hplsql/Meta.java | 118 ++
.../main/java/org/apache/hive/hplsql/Row.java | 97 ++
.../java/org/apache/hive/hplsql/Select.java | 16 +-
.../main/java/org/apache/hive/hplsql/Stmt.java | 73 +-
.../main/java/org/apache/hive/hplsql/Var.java | 37 +-
.../apache/hive/hplsql/functions/Function.java | 13 +
.../org/apache/hive/hplsql/TestHplsqlLocal.java | 7 +-
.../apache/hive/hplsql/TestHplsqlOffline.java | 2 +-
.../src/test/queries/db/rowtype_attribute.sql | 22 +
hplsql/src/test/queries/db/type_attribute.sql | 8 +
.../local/create_procedure_no_params.sql | 19 +
.../test/queries/offline/create_table_ora.sql | 49 +
.../test/results/db/rowtype_attribute.out.txt | 42 +
.../src/test/results/db/type_attribute.out.txt | 15 +
.../local/create_procedure_no_params.out.txt | 26 +
.../results/offline/create_table_ora.out.txt | 38 +
.../hive/metastore/MetaStoreDirectSql.java | 29 +-
pom.xml | 2 +-
.../hive/ql/exec/tez/TezSessionState.java | 1 +
.../apache/hadoop/hive/ql/io/orc/FileDump.java | 48 +-
.../hadoop/hive/ql/io/orc/FileMetaInfo.java | 13 +-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 193 ++-
.../apache/hadoop/hive/ql/io/orc/Reader.java | 4 +
.../hadoop/hive/ql/io/orc/ReaderImpl.java | 270 +++-
.../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 2 -
.../hive/ql/io/parquet/LeafFilterFactory.java | 43 +-
.../read/ParquetFilterPredicateConverter.java | 35 +-
.../hive/ql/io/sarg/ConvertAstToSearchArg.java | 3 -
.../hadoop/hive/ql/lib/DefaultGraphWalker.java | 80 +-
.../hadoop/hive/ql/lib/ForwardWalker.java | 33 +-
.../apache/hadoop/hive/ql/lib/RuleRegExp.java | 22 +-
.../hadoop/hive/ql/optimizer/ColumnPruner.java | 6 +-
.../hive/ql/optimizer/ConstantPropagate.java | 10 +-
.../ql/optimizer/IdentityProjectRemover.java | 15 +
.../hadoop/hive/ql/optimizer/IndexUtils.java | 13 +-
.../ql/optimizer/calcite/HiveRelOptUtil.java | 23 -
.../calcite/reloperators/HiveSort.java | 29 +-
.../rules/HiveJoinProjectTransposeRule.java | 238 +--
.../calcite/translator/HiveOpConverter.java | 22 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 4 +
.../apache/hadoop/hive/ql/TestTxnCommands2.java | 85 +-
.../hive/ql/io/orc/TestInputOutputFormat.java | 17 +-
.../hadoop/hive/ql/io/orc/TestOrcFile.java | 10 +-
.../hive/ql/io/orc/TestRecordReaderImpl.java | 42 +-
.../parquet/TestParquetRecordReaderWrapper.java | 50 +-
.../read/TestParquetFilterPredicate.java | 27 +-
.../ql/io/sarg/TestConvertAstToSearchArg.java | 128 +-
.../hive/ql/io/sarg/TestSearchArgumentImpl.java | 22 +-
.../clientpositive/cbo_rp_outer_join_ppr.q | 40 +
.../queries/clientpositive/groupby1_map_nomap.q | 2 +
ql/src/test/queries/clientpositive/groupby6.q | 2 +
.../clientpositive/groupby_grouping_id2.q | 2 +
.../clientpositive/groupby_ppr_multi_distinct.q | 2 +
ql/src/test/queries/clientpositive/having2.q | 27 +
.../clientpositive/parquet_ppd_boolean.q | 35 +
.../queries/clientpositive/parquet_ppd_char.q | 76 +
.../queries/clientpositive/parquet_ppd_date.q | 101 ++
.../clientpositive/parquet_ppd_decimal.q | 163 ++
.../clientpositive/parquet_ppd_partition.q | 9 +
.../clientpositive/parquet_ppd_timestamp.q | 98 ++
.../clientpositive/parquet_ppd_varchar.q | 76 +
.../clientpositive/parquet_predicate_pushdown.q | 297 +++-
.../test/queries/clientpositive/pointlookup3.q | 41 +
.../queries/clientpositive/ptfgroupbyjoin.q | 61 +
.../cbo_rp_outer_join_ppr.q.java1.7.out | 855 +++++++++++
.../clientpositive/constprog_partitioner.q.out | 30 +-
.../clientpositive/correlationoptimizer10.q.out | 48 +-
.../clientpositive/groupby1_map_nomap.q.out | 8 +-
.../test/results/clientpositive/groupby6.q.out | 8 +-
.../clientpositive/groupby_duplicate_key.q.out | 16 +-
.../clientpositive/groupby_grouping_id2.q.out | 28 +-
.../groupby_ppr_multi_distinct.q.out | 8 +-
.../test/results/clientpositive/having2.q.out | 353 +++++
.../clientpositive/parquet_ppd_boolean.q.out | 200 +++
.../clientpositive/parquet_ppd_char.q.out | 220 +++
.../clientpositive/parquet_ppd_date.q.out | 301 ++++
.../clientpositive/parquet_ppd_decimal.q.out | 490 ++++++
.../clientpositive/parquet_ppd_partition.q.out | 47 +
.../clientpositive/parquet_ppd_timestamp.q.out | 292 ++++
.../clientpositive/parquet_ppd_varchar.q.out | 220 +++
.../parquet_predicate_pushdown.q.out | 1309 +++++++++++++++-
.../results/clientpositive/pointlookup3.q.out | 1394 ++++++++++++++++++
.../results/clientpositive/ptfgroupbyjoin.q.out | 519 +++++++
.../spark/constprog_partitioner.q.out | 30 +-
.../spark/groupby1_map_nomap.q.out | 564 +++----
.../results/clientpositive/spark/groupby6.q.out | 20 +-
.../spark/groupby_grouping_id2.q.out | 38 +-
.../spark/groupby_ppr_multi_distinct.q.out | 16 +-
.../clientpositive/spark/subquery_exists.q.out | 12 +-
.../clientpositive/spark/subquery_in.q.out | 36 +-
.../spark/vector_mapjoin_reduce.q.out | 22 +-
.../clientpositive/subquery_exists.q.out | 12 +-
.../results/clientpositive/subquery_in.q.out | 36 +-
.../clientpositive/subquery_in_having.q.out | 50 +-
.../subquery_unqualcolumnrefs.q.out | 26 +-
.../results/clientpositive/subquery_views.q.out | 40 +-
.../clientpositive/tez/explainuser_1.q.out | 309 ++--
.../clientpositive/tez/subquery_exists.q.out | 12 +-
.../clientpositive/tez/subquery_in.q.out | 36 +-
.../clientpositive/tez/vector_inner_join.q.out | 14 +-
.../tez/vector_mapjoin_reduce.q.out | 24 +-
.../clientpositive/vector_inner_join.q.out | 12 +-
.../clientpositive/vector_mapjoin_reduce.q.out | 26 +-
.../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 3 +-
111 files changed, 9648 insertions(+), 1485 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/pom.xml
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
index ac460b3,568ebbe..41a742c
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
@@@ -202,30 -165,9 +202,31 @@@ public class TezSessionState
// and finally we're ready to create and start the session
// generate basic tez config
TezConfiguration tezConfig = new TezConfiguration(conf);
+
+ // set up the staging directory to use
tezConfig.set(TezConfiguration.TEZ_AM_STAGING_DIR, tezScratchDir.toUri().toString());
+ Utilities.stripHivePasswordDetails(tezConfig);
+ ServicePluginsDescriptor servicePluginsDescriptor;
+ UserPayload servicePluginPayload = TezUtils.createUserPayloadFromConf(tezConfig);
+
+ if (llapMode) {
+ // we need plugins to handle llap and uber mode
+ servicePluginsDescriptor = ServicePluginsDescriptor.create(true,
+ new TaskSchedulerDescriptor[]{
+ TaskSchedulerDescriptor.create(LLAP_SERVICE, LLAP_SCHEDULER)
+ .setUserPayload(servicePluginPayload)},
+ new ContainerLauncherDescriptor[]{
+ ContainerLauncherDescriptor.create(LLAP_SERVICE, LLAP_LAUNCHER)},
+ new TaskCommunicatorDescriptor[]{
+ TaskCommunicatorDescriptor.create(LLAP_SERVICE, LLAP_TASK_COMMUNICATOR)
+ .setUserPayload(servicePluginPayload)});
+ } else {
+ // we need plugins to handle llap and uber mode
+ servicePluginsDescriptor = ServicePluginsDescriptor.create(true);
+ }
+
+ // container prewarming. tell the am how many containers we need
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED)) {
int n = HiveConf.getIntVar(conf, ConfVars.HIVE_PREWARM_NUM_CONTAINERS);
n = Math.max(tezConfig.getInt(
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java
index aced77b,0000000..2853119
mode 100644,000000..100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java
@@@ -1,53 -1,0 +1,60 @@@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+
++import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
++
+/**
+ * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file
+ * that is useful for Reader implementation
+ *
+ */
+class FileMetaInfo {
++ ByteBuffer footerMetaAndPsBuffer;
+ final String compressionType;
+ final int bufferSize;
+ final int metadataSize;
+ final ByteBuffer footerBuffer;
+ final List<Integer> versionList;
+ final OrcFile.WriterVersion writerVersion;
+
++
++ /** Ctor used when reading splits - no version list or full footer buffer. */
+ FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
+ ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
+ this(compressionType, bufferSize, metadataSize, footerBuffer, null,
- writerVersion);
++ writerVersion, null);
+ }
+
++ /** Ctor used when creating file info during init and when getting a new one. */
+ public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer, List<Integer> versionList,
- OrcFile.WriterVersion writerVersion){
++ ByteBuffer footerBuffer, List<Integer> versionList, WriterVersion writerVersion,
++ ByteBuffer fullFooterBuffer) {
+ this.compressionType = compressionType;
+ this.bufferSize = bufferSize;
+ this.metadataSize = metadataSize;
+ this.footerBuffer = footerBuffer;
+ this.versionList = versionList;
+ this.writerVersion = writerVersion;
++ this.footerMetaAndPsBuffer = fullFooterBuffer;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 380fd4e,cf8694e..5770bef
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@@ -51,9 -53,9 +53,10 @@@ import org.apache.hadoop.hive.ql.io.Aci
import org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaMetaData;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
+ import org.apache.hadoop.hive.ql.io.AcidUtils.Directory;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.ql.io.LlapWrappableInputFormatInterface;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context;
@@@ -1053,16 -1023,18 +1025,18 @@@ public class OrcInputFormat implement
throws IOException {
// use threads to resolve directories into splits
Context context = new Context(conf, numSplits);
+ boolean useFileIds = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
List<OrcSplit> splits = Lists.newArrayList();
- List<Future<?>> pathFutures = Lists.newArrayList();
- List<Future<?>> splitFutures = Lists.newArrayList();
+ List<Future<AcidDirInfo>> pathFutures = Lists.newArrayList();
+ List<Future<List<OrcSplit>>> splitFutures = Lists.newArrayList();
// multi-threaded file statuses and split strategy
- for (Path dir : getInputPaths(conf)) {
- boolean useFileIds = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
+ Path[] paths = getInputPaths(conf);
+ CompletionService<AcidDirInfo> ecs = new ExecutorCompletionService<>(Context.threadPool);
+ for (Path dir : paths) {
FileSystem fs = dir.getFileSystem(conf);
FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds);
- pathFutures.add(context.threadPool.submit(fileGenerator));
+ pathFutures.add(ecs.submit(fileGenerator));
}
// complete path futures and schedule split generation
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
index 71baabc,187924d..251e1f8
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
@@@ -352,15 -360,8 +353,18 @@@ public interface Reader
MetadataReader metadata() throws IOException;
+ List<Integer> getVersionList();
+
+ int getMetadataSize();
+
+ List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics();
+
+ List<StripeStatistics> getStripeStatistics();
+
+ List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics();
+
+ DataReader createDefaultDataReader(boolean useZeroCopy);
++
+ /** Gets serialized file metadata read from disk for the purposes of caching, etc. */
+ ByteBuffer getSerializedFileFooter();
-
- Footer getFooter();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 4ff1e28,ab539c4..8661682
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@@ -82,8 -76,11 +82,11 @@@ public class ReaderImpl implements Read
// will help avoid cpu cycles spend in deserializing at cost of increased
// memory footprint.
private final ByteBuffer footerByteBuffer;
+ // Same for metastore cache - maintains the same background buffer, but includes postscript.
+ // This will only be set if the file footer/metadata was read from disk.
+ private final ByteBuffer footerMetaAndPsBuffer;
- static class StripeInformationImpl
+ public static class StripeInformationImpl
implements StripeInformation {
private final OrcProto.StripeInformation stripe;
@@@ -314,61 -310,33 +316,59 @@@
this.path = path;
this.conf = options.getConfiguration();
- FileMetaInfo footerMetaData;
- if (options.getFileMetaInfo() != null) {
- footerMetaData = options.getFileMetaInfo();
+ FileMetadata fileMetadata = options.getFileMetadata();
+ if (fileMetadata != null) {
+ this.compressionKind = fileMetadata.getCompressionKind();
+ this.bufferSize = fileMetadata.getCompressionBufferSize();
+ this.codec = WriterImpl.createCodec(compressionKind);
+ this.metadataSize = fileMetadata.getMetadataSize();
+ this.stripeStats = fileMetadata.getStripeStats();
+ this.versionList = fileMetadata.getVersionList();
+ this.writerVersion = WriterVersion.from(fileMetadata.getWriterVersionNum());
+ this.types = fileMetadata.getTypes();
+ this.rowIndexStride = fileMetadata.getRowIndexStride();
+ this.contentLength = fileMetadata.getContentLength();
+ this.numberOfRows = fileMetadata.getNumberOfRows();
+ this.fileStats = fileMetadata.getFileStats();
+ this.stripes = fileMetadata.getStripes();
-
+ this.inspector = OrcStruct.createObjectInspector(0, fileMetadata.getTypes());
+ this.footerByteBuffer = null; // not cached and not needed here
+ this.userMetadata = null; // not cached and not needed here
+ this.footerMetaAndPsBuffer = null;
} else {
- footerMetaData = extractMetaInfoFromFooter(fs, path,
- options.getMaxLength());
- this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
- }
- MetaInfoObjExtractor rInfo =
- new MetaInfoObjExtractor(footerMetaData.compressionType,
- footerMetaData.bufferSize,
- footerMetaData.metadataSize,
- footerMetaData.footerBuffer
- );
- this.footerByteBuffer = footerMetaData.footerBuffer;
- this.compressionKind = rInfo.compressionKind;
- this.codec = rInfo.codec;
- this.bufferSize = rInfo.bufferSize;
- this.metadataSize = rInfo.metadataSize;
- this.metadata = rInfo.metadata;
- this.footer = rInfo.footer;
- this.inspector = rInfo.inspector;
- this.versionList = footerMetaData.versionList;
- this.writerVersion = footerMetaData.writerVersion;
+ FileMetaInfo footerMetaData;
+ if (options.getFileMetaInfo() != null) {
+ footerMetaData = options.getFileMetaInfo();
++ this.footerMetaAndPsBuffer = null;
+ } else {
+ footerMetaData = extractMetaInfoFromFooter(fs, path,
+ options.getMaxLength());
++ this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
+ }
+ MetaInfoObjExtractor rInfo =
+ new MetaInfoObjExtractor(footerMetaData.compressionType,
+ footerMetaData.bufferSize,
+ footerMetaData.metadataSize,
+ footerMetaData.footerBuffer
+ );
+ this.footerByteBuffer = footerMetaData.footerBuffer;
+ this.compressionKind = rInfo.compressionKind;
+ this.codec = rInfo.codec;
+ this.bufferSize = rInfo.bufferSize;
+ this.metadataSize = rInfo.metadataSize;
+ this.stripeStats = rInfo.metadata.getStripeStatsList();
+ this.types = rInfo.footer.getTypesList();
+ this.rowIndexStride = rInfo.footer.getRowIndexStride();
+ this.contentLength = rInfo.footer.getContentLength();
+ this.numberOfRows = rInfo.footer.getNumberOfRows();
+ this.userMetadata = rInfo.footer.getMetadataList();
+ this.fileStats = rInfo.footer.getStatisticsList();
+ this.inspector = rInfo.inspector;
+ this.versionList = footerMetaData.versionList;
+ this.writerVersion = footerMetaData.writerVersion;
- this.stripes = new ArrayList<StripeInformation>(rInfo.footer.getStripesCount());
- for(OrcProto.StripeInformation info: rInfo.footer.getStripesList()) {
- this.stripes.add(new StripeInformationImpl(info));
- }
++ this.stripes = convertProtoStripesToStripes(rInfo.footer.getStripesList());
+ }
}
--
/**
* Get the WriterVersion based on the ORC file postscript.
* @param writerVersion the integer writer version
@@@ -383,6 -351,111 +383,115 @@@
return OrcFile.WriterVersion.ORIGINAL;
}
+ /** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */
+ public static FooterInfo extractMetaInfoFromFooter(
+ ByteBuffer bb, Path srcPath) throws IOException {
+ // Read the PostScript. Be very careful as some parts of this historically use bb position
+ // and some use absolute offsets that have to take position into account.
+ int baseOffset = bb.position();
+ int lastByteAbsPos = baseOffset + bb.remaining() - 1;
+ int psLen = bb.get(lastByteAbsPos) & 0xff;
+ int psAbsPos = lastByteAbsPos - psLen;
+ OrcProto.PostScript ps = extractPostScript(bb, srcPath, psLen, psAbsPos);
+ assert baseOffset == bb.position();
+
+ // Extract PS information.
+ int footerSize = (int)ps.getFooterLength(), metadataSize = (int)ps.getMetadataLength(),
+ footerAbsPos = psAbsPos - footerSize, metadataAbsPos = footerAbsPos - metadataSize;
+ String compressionType = ps.getCompression().toString();
+ CompressionCodec codec = WriterImpl.createCodec(CompressionKind.valueOf(compressionType));
+ int bufferSize = (int)ps.getCompressionBlockSize();
+ bb.position(metadataAbsPos);
+ bb.mark();
+
+ // Extract metadata and footer.
- Metadata metadata = new Metadata(extractMetadata(
- bb, metadataAbsPos, metadataSize, codec, bufferSize));
++ OrcProto.Metadata metadata = extractMetadata(
++ bb, metadataAbsPos, metadataSize, codec, bufferSize);
++ List<StripeStatistics> stats = new ArrayList<>(metadata.getStripeStatsCount());
++ for (OrcProto.StripeStatistics ss : metadata.getStripeStatsList()) {
++ stats.add(new StripeStatistics(ss.getColStatsList()));
++ }
+ OrcProto.Footer footer = extractFooter(bb, footerAbsPos, footerSize, codec, bufferSize);
+ bb.position(metadataAbsPos);
+ bb.limit(psAbsPos);
+ // TODO: do we need footer buffer here? FileInfo/FileMetaInfo is a mess...
+ FileMetaInfo fmi = new FileMetaInfo(
+ compressionType, bufferSize, metadataSize, bb, extractWriterVersion(ps));
- return new FooterInfo(metadata, footer, fmi);
++ return new FooterInfo(stats, footer, fmi);
+ }
+
+ private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos,
+ int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
+ bb.position(footerAbsPos);
+ bb.limit(footerAbsPos + footerSize);
- InputStream instream = InStream.create("footer", Lists.<DiskRange>newArrayList(
++ InputStream instream = InStream.create(null, "footer", Lists.<DiskRange>newArrayList(
+ new BufferChunk(bb, 0)), footerSize, codec, bufferSize);
+ return OrcProto.Footer.parseFrom(instream);
+ }
+
+ private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
+ int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
+ bb.position(metadataAbsPos);
+ bb.limit(metadataAbsPos + metadataSize);
- InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
++ InputStream instream = InStream.create(null, "metadata", Lists.<DiskRange>newArrayList(
+ new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
+ CodedInputStream in = CodedInputStream.newInstance(instream);
+ int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
+ OrcProto.Metadata meta = null;
+ do {
+ try {
+ in.setSizeLimit(msgLimit);
+ meta = OrcProto.Metadata.parseFrom(in);
+ } catch (InvalidProtocolBufferException e) {
+ if (e.getMessage().contains("Protocol message was too large")) {
+ LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
+ " size of the coded input stream." );
+
+ msgLimit = msgLimit << 1;
+ if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
+ LOG.error("Metadata section exceeds max protobuf message size of " +
+ PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
+ throw e;
+ }
+
+ // we must have failed in the middle of reading instream and instream doesn't support
+ // resetting the stream
- instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
++ instream = InStream.create(null, "metadata", Lists.<DiskRange>newArrayList(
+ new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
+ in = CodedInputStream.newInstance(instream);
+ } else {
+ throw e;
+ }
+ }
+ } while (meta == null);
+ return meta;
+ }
+
+ private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,
+ int psLen, int psAbsOffset) throws IOException {
+ // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here.
+ assert bb.hasArray();
+ CodedInputStream in = CodedInputStream.newInstance(
+ bb.array(), bb.arrayOffset() + psAbsOffset, psLen);
+ OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
+ checkOrcVersion(LOG, path, ps.getVersionList());
+
+ // Check compression codec.
+ switch (ps.getCompression()) {
+ case NONE:
+ break;
+ case ZLIB:
+ break;
+ case SNAPPY:
+ break;
+ case LZO:
+ break;
+ default:
+ throw new IllegalArgumentException("Unknown compression");
+ }
+ return ps;
+ }
+
private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
Path path,
long maxFileLength
@@@ -544,12 -579,82 +615,48 @@@
}
}
- public FileMetaInfo getFileMetaInfo(){
- /**
- * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file
- * that is useful for Reader implementation
- *
- */
- static class FileMetaInfo {
- private ByteBuffer footerMetaAndPsBuffer;
- final String compressionType;
- final int bufferSize;
- final int metadataSize;
- final ByteBuffer footerBuffer;
- final List<Integer> versionList;
- final OrcFile.WriterVersion writerVersion;
-
- /** Ctor used when reading splits - no version list or full footer buffer. */
- FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
- this(compressionType, bufferSize, metadataSize, footerBuffer, null,
- writerVersion, null);
- }
-
- /** Ctor used when creating file info during init and when getting a new one. */
- public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer, List<Integer> versionList, WriterVersion writerVersion,
- ByteBuffer fullFooterBuffer) {
- this.compressionType = compressionType;
- this.bufferSize = bufferSize;
- this.metadataSize = metadataSize;
- this.footerBuffer = footerBuffer;
- this.versionList = versionList;
- this.writerVersion = writerVersion;
- this.footerMetaAndPsBuffer = fullFooterBuffer;
- }
- }
-
+ public FileMetaInfo getFileMetaInfo() {
return new FileMetaInfo(compressionKind.toString(), bufferSize,
- metadataSize, footerByteBuffer, versionList, writerVersion);
+ metadataSize, footerByteBuffer, versionList, writerVersion, footerMetaAndPsBuffer);
}
+ /** Same as FileMetaInfo, but with extra fields. FileMetaInfo is serialized for splits
+ * and so we don't just add fields to it, it's already messy and confusing. */
+ public static final class FooterInfo {
+ private final OrcProto.Footer footer;
- private final Metadata metadata;
++ private final List<StripeStatistics> metadata;
+ private final List<StripeInformation> stripes;
+ private final FileMetaInfo fileMetaInfo;
+
- private FooterInfo(Metadata metadata, OrcProto.Footer footer, FileMetaInfo fileMetaInfo) {
++ private FooterInfo(
++ List<StripeStatistics> metadata, OrcProto.Footer footer, FileMetaInfo fileMetaInfo) {
+ this.metadata = metadata;
+ this.footer = footer;
+ this.fileMetaInfo = fileMetaInfo;
+ this.stripes = convertProtoStripesToStripes(footer.getStripesList());
+ }
+ public OrcProto.Footer getFooter() {
+ return footer;
+ }
+
- public Metadata getMetadata() {
++ public List<StripeStatistics> getMetadata() {
+ return metadata;
+ }
+
+ public FileMetaInfo getFileMetaInfo() {
+ return fileMetaInfo;
+ }
+
+ public List<StripeInformation> getStripes() {
+ return stripes;
+ }
+ }
+
+ @Override
+ public ByteBuffer getSerializedFileFooter() {
+ return footerMetaAndPsBuffer;
+ }
@Override
public RecordReader rows() throws IOException {
@@@ -607,17 -714,22 +714,24 @@@
@Override
public long getRawDataSizeFromColIndices(List<Integer> colIndices) {
- return getRawDataSizeFromColIndices(colIndices, footer);
++ return getRawDataSizeFromColIndices(colIndices, types, fileStats);
+ }
+
+ public static long getRawDataSizeFromColIndices(
- List<Integer> colIndices, OrcProto.Footer footer) {
++ List<Integer> colIndices, List<OrcProto.Type> types,
++ List<OrcProto.ColumnStatistics> stats) {
long result = 0;
for (int colIdx : colIndices) {
- result += getRawDataSizeOfColumn(colIdx);
- result += getRawDataSizeOfColumn(colIdx, footer);
++ result += getRawDataSizeOfColumn(colIdx, types, stats);
}
return result;
}
- private long getRawDataSizeOfColumn(int colIdx) {
- OrcProto.ColumnStatistics colStat = fileStats.get(colIdx);
- private static long getRawDataSizeOfColumn(int colIdx, OrcProto.Footer footer) {
- OrcProto.ColumnStatistics colStat = footer.getStatistics(colIdx);
++ private static long getRawDataSizeOfColumn(int colIdx, List<OrcProto.Type> types,
++ List<OrcProto.ColumnStatistics> stats) {
++ OrcProto.ColumnStatistics colStat = stats.get(colIdx);
long numVals = colStat.getNumberOfValues();
- Type type = footer.getTypes(colIdx);
+ Type type = types.get(colIdx);
switch (type.getKind()) {
case BINARY:
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/772c4b90/ql/src/test/results/clientpositive/vector_inner_join.q.out
----------------------------------------------------------------------
[17/28] hive git commit: HIVE-11440: Create Parquet predicate push
down (PPD) unit tests and q-tests (Ferdinand Xu, reviewed by Sergio Pena)
Posted by se...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
new file mode 100644
index 0000000..745237d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
@@ -0,0 +1,292 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.ts EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: -- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: -- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1033237945500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1033237945500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1033237945500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1033237945500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+445653015500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+1478890961000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
new file mode 100644
index 0000000..23e3cd0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
@@ -0,0 +1,220 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.da EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: -- varchar data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where v="bee"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: -- varchar data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where v="bee"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v="bee"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v="bee"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v="bee "
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v="bee "
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v="bee "
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v="bee "
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
[25/28] hive git commit: HIVE-11383: Upgrade Hive to Calcite 1.4
(Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index 4d797f2..7595c3e 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -2713,62 +2713,62 @@ Stage-0
limit:-1
Stage-1
Reducer 6
- File Output Operator [FS_50]
+ File Output Operator [FS_53]
compressed:false
Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- Select Operator [SEL_49]
+ Select Operator [SEL_52]
| outputColumnNames:["_col0","_col1","_col2"]
| Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
|<-Reducer 5 [SIMPLE_EDGE]
- Reduce Output Operator [RS_48]
+ Reduce Output Operator [RS_51]
key expressions:_col1 (type: bigint), _col0 (type: string)
sort order:++
Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
value expressions:_col2 (type: bigint)
- Group By Operator [GBY_46]
+ Group By Operator [GBY_49]
| aggregations:["count(VALUE._col0)"]
| keys:KEY._col0 (type: string), KEY._col1 (type: bigint)
| outputColumnNames:["_col0","_col1","_col2"]
| Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
|<-Reducer 4 [SIMPLE_EDGE]
- Reduce Output Operator [RS_45]
+ Reduce Output Operator [RS_48]
key expressions:_col0 (type: string), _col1 (type: bigint)
Map-reduce partition columns:_col0 (type: string), _col1 (type: bigint)
sort order:++
Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
value expressions:_col2 (type: bigint)
- Group By Operator [GBY_44]
+ Group By Operator [GBY_47]
aggregations:["count()"]
keys:_col0 (type: string), _col1 (type: bigint)
outputColumnNames:["_col0","_col1","_col2"]
Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
- Merge Join Operator [MERGEJOIN_63]
+ Merge Join Operator [MERGEJOIN_66]
| condition map:[{"":"Left Semi Join 0 to 1"},{"":"Left Semi Join 0 to 2"}]
| keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"}
| outputColumnNames:["_col0","_col1"]
| Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
|<-Map 10 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_41]
+ | Reduce Output Operator [RS_44]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
- | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
- | Group By Operator [GBY_35]
+ | Statistics:Num rows: 3 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
+ | Group By Operator [GBY_38]
| keys:_col0 (type: string)
| outputColumnNames:["_col0"]
- | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
- | Select Operator [SEL_31]
+ | Statistics:Num rows: 3 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
+ | Select Operator [SEL_34]
| outputColumnNames:["_col0"]
- | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
- | Filter Operator [FIL_61]
- | predicate:key is not null (type: boolean)
- | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
- | TableScan [TS_30]
+ | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
+ | Filter Operator [FIL_64]
+ | predicate:(UDFToDouble(key) > 0.0) (type: boolean)
+ | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
+ | TableScan [TS_32]
| alias:cbo_t3
| Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE
|<-Reducer 3 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_37]
+ | Reduce Output Operator [RS_40]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
@@ -2783,16 +2783,16 @@ Stage-0
| sort order:-+
| Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE
| value expressions:_col0 (type: string)
- | Filter Operator [FIL_57]
+ | Filter Operator [FIL_60]
| predicate:(((_col1 + 1) >= 0) and ((_col1 > 0) or (UDFToDouble(_col0) >= 0.0))) (type: boolean)
| Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE
| Select Operator [SEL_9]
| outputColumnNames:["_col0","_col1","_col2","_col3"]
| Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE
- | Filter Operator [FIL_58]
+ | Filter Operator [FIL_61]
| predicate:(((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0) and ((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1))) (type: boolean)
| Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE
- | Select Operator [SEL_62]
+ | Select Operator [SEL_65]
| outputColumnNames:["_col1","_col2","_col3"]
| Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE
| Group By Operator [GBY_8]
@@ -2815,19 +2815,19 @@ Stage-0
| Select Operator [SEL_2]
| outputColumnNames:["_col0","_col1","_col2"]
| Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
- | Filter Operator [FIL_59]
+ | Filter Operator [FIL_62]
| predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean)
| Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
| TableScan [TS_0]
| alias:cbo_t1
| Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE
|<-Reducer 9 [SIMPLE_EDGE]
- Reduce Output Operator [RS_39]
+ Reduce Output Operator [RS_42]
key expressions:_col0 (type: string)
Map-reduce partition columns:_col0 (type: string)
sort order:+
Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator [GBY_33]
+ Group By Operator [GBY_36]
keys:_col0 (type: string)
outputColumnNames:["_col0"]
Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE
@@ -2862,8 +2862,8 @@ Stage-0
Select Operator [SEL_19]
outputColumnNames:["_col0","_col1","_col2"]
Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator [FIL_60]
- predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean)
+ Filter Operator [FIL_63]
+ predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean)
Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
TableScan [TS_17]
alias:cbo_t2
@@ -3181,47 +3181,47 @@ Stage-0
limit:-1
Stage-1
Reducer 2
- File Output Operator [FS_14]
+ File Output Operator [FS_16]
compressed:false
- Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- Merge Join Operator [MERGEJOIN_19]
+ Merge Join Operator [MERGEJOIN_21]
| condition map:[{"":"Left Semi Join 0 to 1"}]
| keys:{"1":"_col0 (type: string), _col1 (type: string)","0":"_col1 (type: string), _col0 (type: string)"}
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
|<-Map 1 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_9]
+ | Reduce Output Operator [RS_11]
| key expressions:_col1 (type: string), _col0 (type: string)
| Map-reduce partition columns:_col1 (type: string), _col0 (type: string)
| sort order:++
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
| Select Operator [SEL_2]
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- | Filter Operator [FIL_17]
- | predicate:(value is not null and key is not null) (type: boolean)
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ | Filter Operator [FIL_19]
+ | predicate:((value > 'val_9') and key is not null) (type: boolean)
+ | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
| TableScan [TS_0]
| alias:b
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
|<-Map 3 [SIMPLE_EDGE]
- Reduce Output Operator [RS_11]
+ Reduce Output Operator [RS_13]
key expressions:_col0 (type: string), _col1 (type: string)
Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
sort order:++
Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator [GBY_7]
+ Group By Operator [GBY_9]
keys:_col0 (type: string), _col1 (type: string)
outputColumnNames:["_col0","_col1"]
Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator [SEL_5]
+ Select Operator [SEL_7]
outputColumnNames:["_col0","_col1"]
Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator [FIL_18]
+ Filter Operator [FIL_20]
predicate:((value > 'val_9') and key is not null) (type: boolean)
Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan [TS_3]
+ TableScan [TS_5]
alias:b
Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -3253,47 +3253,47 @@ Stage-0
limit:-1
Stage-1
Reducer 2
- File Output Operator [FS_14]
+ File Output Operator [FS_16]
compressed:false
- Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- Merge Join Operator [MERGEJOIN_19]
+ Merge Join Operator [MERGEJOIN_21]
| condition map:[{"":"Left Semi Join 0 to 1"}]
| keys:{"1":"_col0 (type: string), _col1 (type: string)","0":"_col1 (type: string), _col0 (type: string)"}
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
|<-Map 1 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_9]
+ | Reduce Output Operator [RS_11]
| key expressions:_col1 (type: string), _col0 (type: string)
| Map-reduce partition columns:_col1 (type: string), _col0 (type: string)
| sort order:++
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
| Select Operator [SEL_2]
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- | Filter Operator [FIL_17]
- | predicate:(value is not null and key is not null) (type: boolean)
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ | Filter Operator [FIL_19]
+ | predicate:((value > 'val_9') and key is not null) (type: boolean)
+ | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
| TableScan [TS_0]
| alias:b
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
|<-Map 3 [SIMPLE_EDGE]
- Reduce Output Operator [RS_11]
+ Reduce Output Operator [RS_13]
key expressions:_col0 (type: string), _col1 (type: string)
Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
sort order:++
Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator [GBY_7]
+ Group By Operator [GBY_9]
keys:_col0 (type: string), _col1 (type: string)
outputColumnNames:["_col0","_col1"]
Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator [SEL_5]
+ Select Operator [SEL_7]
outputColumnNames:["_col0","_col1"]
Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator [FIL_18]
+ Filter Operator [FIL_20]
predicate:((value > 'val_9') and key is not null) (type: boolean)
Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan [TS_3]
+ TableScan [TS_5]
alias:b
Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -3315,48 +3315,48 @@ Stage-0
limit:-1
Stage-1
Reducer 2
- File Output Operator [FS_14]
+ File Output Operator [FS_16]
compressed:false
- Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- Merge Join Operator [MERGEJOIN_19]
+ Merge Join Operator [MERGEJOIN_21]
| condition map:[{"":"Left Semi Join 0 to 1"}]
| keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
|<-Map 1 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_9]
+ | Reduce Output Operator [RS_11]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
| value expressions:_col1 (type: string)
| Select Operator [SEL_2]
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- | Filter Operator [FIL_17]
- | predicate:key is not null (type: boolean)
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ | Filter Operator [FIL_19]
+ | predicate:(key > '9') (type: boolean)
+ | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
| TableScan [TS_0]
| alias:src_cbo
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
|<-Map 3 [SIMPLE_EDGE]
- Reduce Output Operator [RS_11]
+ Reduce Output Operator [RS_13]
key expressions:_col0 (type: string)
Map-reduce partition columns:_col0 (type: string)
sort order:+
Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator [GBY_7]
+ Group By Operator [GBY_9]
keys:_col0 (type: string)
outputColumnNames:["_col0"]
Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator [SEL_5]
+ Select Operator [SEL_7]
outputColumnNames:["_col0"]
Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator [FIL_18]
+ Filter Operator [FIL_20]
predicate:(key > '9') (type: boolean)
Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan [TS_3]
+ TableScan [TS_5]
alias:src_cbo
Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -3382,92 +3382,92 @@ Stage-0
limit:-1
Stage-1
Reducer 3
- File Output Operator [FS_26]
+ File Output Operator [FS_28]
compressed:false
Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- Select Operator [SEL_25]
+ Select Operator [SEL_27]
outputColumnNames:["_col0","_col1"]
Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- Merge Join Operator [MERGEJOIN_36]
+ Merge Join Operator [MERGEJOIN_38]
| condition map:[{"":"Inner Join 0 to 1"}]
| keys:{"1":"_col0 (type: int)","0":"_col1 (type: int)"}
| outputColumnNames:["_col1","_col2"]
| Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
|<-Reducer 2 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_21]
+ | Reduce Output Operator [RS_23]
| key expressions:_col1 (type: int)
| Map-reduce partition columns:_col1 (type: int)
| sort order:+
| Statistics:Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
| value expressions:_col2 (type: int)
- | Merge Join Operator [MERGEJOIN_35]
+ | Merge Join Operator [MERGEJOIN_37]
| | condition map:[{"":"Left Semi Join 0 to 1"}]
- | | keys:{"1":"_col0 (type: int), _col1 (type: int)","0":"_col0 (type: int), _col3 (type: int)"}
+ | | keys:{"1":"_col0 (type: int)","0":"_col0 (type: int)"}
| | outputColumnNames:["_col1","_col2"]
| | Statistics:Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
| |<-Map 1 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_16]
- | | key expressions:_col0 (type: int), _col3 (type: int)
- | | Map-reduce partition columns:_col0 (type: int), _col3 (type: int)
- | | sort order:++
+ | | Reduce Output Operator [RS_18]
+ | | key expressions:_col0 (type: int)
+ | | Map-reduce partition columns:_col0 (type: int)
+ | | sort order:+
| | Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
| | value expressions:_col1 (type: int), _col2 (type: int)
| | Select Operator [SEL_2]
- | | outputColumnNames:["_col0","_col1","_col2","_col3"]
+ | | outputColumnNames:["_col0","_col1","_col2"]
| | Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
- | | Filter Operator [FIL_32]
+ | | Filter Operator [FIL_34]
| | predicate:(((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
| | Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
| | TableScan [TS_0]
| | alias:lineitem
| | Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE
| |<-Map 4 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_18]
- | key expressions:_col0 (type: int), _col1 (type: int)
- | Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
- | sort order:++
- | Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- | Group By Operator [GBY_14]
- | keys:_col0 (type: int), _col1 (type: int)
- | outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ | Reduce Output Operator [RS_20]
+ | key expressions:_col0 (type: int)
+ | Map-reduce partition columns:_col0 (type: int)
+ | sort order:+
+ | Statistics:Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ | Group By Operator [GBY_16]
+ | keys:_col0 (type: int)
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
| Select Operator [SEL_5]
- | outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
- | Filter Operator [FIL_33]
- | predicate:(((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+ | Filter Operator [FIL_35]
+ | predicate:(((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
| Statistics:Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE
| TableScan [TS_3]
| alias:lineitem
| Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE
|<-Reducer 6 [SIMPLE_EDGE]
- Reduce Output Operator [RS_23]
+ Reduce Output Operator [RS_25]
key expressions:_col0 (type: int)
Map-reduce partition columns:_col0 (type: int)
sort order:+
Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator [GBY_11]
+ Group By Operator [GBY_13]
| keys:KEY._col0 (type: int)
| outputColumnNames:["_col0"]
| Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
|<-Map 5 [SIMPLE_EDGE]
- Reduce Output Operator [RS_10]
+ Reduce Output Operator [RS_12]
key expressions:_col0 (type: int)
Map-reduce partition columns:_col0 (type: int)
sort order:+
Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator [GBY_9]
+ Group By Operator [GBY_11]
keys:_col0 (type: int)
outputColumnNames:["_col0"]
Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator [SEL_7]
+ Select Operator [SEL_9]
outputColumnNames:["_col0"]
Statistics:Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator [FIL_34]
+ Filter Operator [FIL_36]
predicate:l_partkey is not null (type: boolean)
Statistics:Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan [TS_6]
+ TableScan [TS_8]
alias:lineitem
Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE
@@ -3496,125 +3496,125 @@ Stage-0
limit:-1
Stage-1
Reducer 4
- File Output Operator [FS_36]
+ File Output Operator [FS_38]
compressed:false
Statistics:Num rows: 34 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- Merge Join Operator [MERGEJOIN_49]
+ Merge Join Operator [MERGEJOIN_51]
| condition map:[{"":"Left Semi Join 0 to 1"}]
| keys:{"1":"_col0 (type: bigint)","0":"_col2 (type: bigint)"}
| outputColumnNames:["_col0","_col1","_col2"]
| Statistics:Num rows: 34 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE
|<-Reducer 3 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_31]
+ | Reduce Output Operator [RS_33]
| key expressions:_col2 (type: bigint)
| Map-reduce partition columns:_col2 (type: bigint)
| sort order:+
- | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
| value expressions:_col0 (type: string), _col1 (type: string)
- | Filter Operator [FIL_42]
+ | Filter Operator [FIL_44]
| predicate:_col2 is not null (type: boolean)
- | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
- | Group By Operator [GBY_18]
+ | Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
+ | Group By Operator [GBY_20]
| | aggregations:["count(VALUE._col0)"]
| | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | outputColumnNames:["_col0","_col1","_col2"]
- | | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
+ | | Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
| |<-Reducer 2 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_17]
+ | Reduce Output Operator [RS_19]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| sort order:++
- | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
+ | Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
| value expressions:_col2 (type: bigint)
- | Group By Operator [GBY_16]
+ | Group By Operator [GBY_18]
| aggregations:["count()"]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1","_col2"]
- | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
- | Merge Join Operator [MERGEJOIN_48]
+ | Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
+ | Merge Join Operator [MERGEJOIN_50]
| | condition map:[{"":"Left Semi Join 0 to 1"}]
| | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
| | outputColumnNames:["_col0","_col1"]
- | | Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
+ | | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
| |<-Map 1 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_11]
+ | | Reduce Output Operator [RS_13]
| | key expressions:_col0 (type: string)
| | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
- | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
| | value expressions:_col1 (type: string)
| | Select Operator [SEL_2]
| | outputColumnNames:["_col0","_col1"]
- | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- | | Filter Operator [FIL_43]
- | | predicate:key is not null (type: boolean)
- | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ | | Filter Operator [FIL_45]
+ | | predicate:(key > '8') (type: boolean)
+ | | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
| | TableScan [TS_0]
| | alias:b
| | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
| |<-Map 5 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_13]
+ | Reduce Output Operator [RS_15]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
| Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
- | Group By Operator [GBY_9]
+ | Group By Operator [GBY_11]
| keys:_col0 (type: string)
| outputColumnNames:["_col0"]
| Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
- | Select Operator [SEL_7]
+ | Select Operator [SEL_9]
| outputColumnNames:["_col0"]
| Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
- | Filter Operator [FIL_44]
+ | Filter Operator [FIL_46]
| predicate:(key > '8') (type: boolean)
| Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
- | TableScan [TS_5]
+ | TableScan [TS_7]
| alias:b
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
|<-Reducer 7 [SIMPLE_EDGE]
- Reduce Output Operator [RS_33]
+ Reduce Output Operator [RS_35]
key expressions:_col0 (type: bigint)
Map-reduce partition columns:_col0 (type: bigint)
sort order:+
Statistics:Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator [GBY_29]
+ Group By Operator [GBY_31]
keys:_col0 (type: bigint)
outputColumnNames:["_col0"]
Statistics:Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator [SEL_27]
+ Select Operator [SEL_29]
outputColumnNames:["_col0"]
Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator [FIL_45]
+ Filter Operator [FIL_47]
predicate:_col1 is not null (type: boolean)
Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator [SEL_47]
+ Select Operator [SEL_49]
outputColumnNames:["_col1"]
Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator [GBY_26]
+ Group By Operator [GBY_28]
| aggregations:["count(VALUE._col0)"]
| keys:KEY._col0 (type: string)
| outputColumnNames:["_col0","_col1"]
| Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE
|<-Map 6 [SIMPLE_EDGE]
- Reduce Output Operator [RS_25]
+ Reduce Output Operator [RS_27]
key expressions:_col0 (type: string)
Map-reduce partition columns:_col0 (type: string)
sort order:+
Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE
value expressions:_col1 (type: bigint)
- Group By Operator [GBY_24]
+ Group By Operator [GBY_26]
aggregations:["count()"]
keys:_col0 (type: string)
outputColumnNames:["_col0","_col1"]
Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator [SEL_22]
+ Select Operator [SEL_24]
outputColumnNames:["_col0"]
Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator [FIL_46]
+ Filter Operator [FIL_48]
predicate:(key > '9') (type: boolean)
Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan [TS_20]
+ TableScan [TS_22]
alias:b
Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/subquery_exists.q.out b/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
index 503a5e9..41aa048 100644
--- a/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
@@ -41,17 +41,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (value is not null and key is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((value > 'val_9') and key is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: string), _col0 (type: string)
sort order: ++
Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
@@ -83,10 +83,10 @@ STAGE PLANS:
0 _col1 (type: string), _col0 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/subquery_in.q.out b/ql/src/test/results/clientpositive/tez/subquery_in.q.out
index 38f4bc6..2b1237b 100644
--- a/ql/src/test/results/clientpositive/tez/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/tez/subquery_in.q.out
@@ -31,17 +31,17 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ predicate: (key > '9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
@@ -74,10 +74,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -149,17 +149,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key > '9') and value is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
@@ -191,10 +191,10 @@ STAGE PLANS:
0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -627,17 +627,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key > '9') and value is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
@@ -669,10 +669,10 @@ STAGE PLANS:
0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out b/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
index af80260..b9d601a 100644
--- a/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
@@ -150,11 +150,11 @@ STAGE PLANS:
outputColumnNames: _col0
input vertices:
1 Map 2
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -166,22 +166,22 @@ STAGE PLANS:
alias: t1
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: a is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ predicate: (a > 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Stage: Stage-0
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
index 29b86bd..e7fec82 100644
--- a/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
@@ -202,15 +202,15 @@ STAGE PLANS:
predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Left Semi Join 0 to 1
keys:
- 0 _col0 (type: int), _col3 (type: int)
- 1 _col0 (type: int), _col1 (type: int)
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col1, _col2
input vertices:
1 Map 2
@@ -228,21 +228,21 @@ STAGE PLANS:
alias: lineitem
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+ predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_linenumber (type: int)
- outputColumnNames: _col0, _col1
+ expressions: l_orderkey (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: int), _col1 (type: int)
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/vector_inner_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_inner_join.q.out b/ql/src/test/results/clientpositive/vector_inner_join.q.out
index 3e8d2f4..4775ae9 100644
--- a/ql/src/test/results/clientpositive/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/vector_inner_join.q.out
@@ -136,17 +136,17 @@ STAGE PLANS:
alias: t1
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: a is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ predicate: (a > 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 _col0 (type: int)
@@ -172,10 +172,10 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
index 16916cb..fdd7ea8 100644
--- a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
@@ -465,30 +465,30 @@ STAGE PLANS:
Stage: Stage-11
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_1:lineitem
+ $hdt$_1:$hdt$_1:lineitem
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_1:lineitem
+ $hdt$_1:$hdt$_1:lineitem
TableScan
alias: lineitem
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+ predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_linenumber (type: int)
- outputColumnNames: _col0, _col1
+ expressions: l_orderkey (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: int), _col1 (type: int)
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
- 0 _col0 (type: int), _col3 (type: int)
- 1 _col0 (type: int), _col1 (type: int)
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
Stage: Stage-8
Map Reduce
@@ -500,15 +500,15 @@ STAGE PLANS:
predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Left Semi Join 0 to 1
keys:
- 0 _col0 (type: int), _col3 (type: int)
- 1 _col0 (type: int), _col1 (type: int)
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col1, _col2
Statistics: Num rows: 14 Data size: 1714 Basic stats: COMPLETE Column stats: NONE
File Output Operator
[10/28] hive git commit: HIVE-11701 : Make tez tests AM logs work
with new log4j2 changes (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Posted by se...@apache.org.
HIVE-11701 : Make tez tests AM logs work with new log4j2 changes (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a338f33c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a338f33c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a338f33c
Branch: refs/heads/llap
Commit: a338f33cc2b17d90d391466090839f76bebc1163
Parents: 78e7015
Author: Sergey Shelukhin <se...@apache.org>
Authored: Mon Aug 31 15:54:08 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Mon Aug 31 15:54:08 2015 -0700
----------------------------------------------------------------------
data/conf/tez/hive-site.xml | 5 +++++
1 file changed, 5 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a338f33c/data/conf/tez/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
index 2f9415a..b4abe90 100644
--- a/data/conf/tez/hive-site.xml
+++ b/data/conf/tez/hive-site.xml
@@ -258,4 +258,9 @@
<value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
</property>
+<property>
+ <name>tez.am.launch.cmd-opts</name>
+ <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
+</property>
+
</configuration>
[20/28] hive git commit: HIVE-11689 : minor flow changes to ORC split
generation (Sergey Shelukhin,
reviewed by Prasanth Jayachandran and Swarnim Kulkarni) ADDENDUM
Posted by se...@apache.org.
HIVE-11689 : minor flow changes to ORC split generation (Sergey Shelukhin, reviewed by Prasanth Jayachandran and Swarnim Kulkarni) ADDENDUM
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3ff3c6f1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3ff3c6f1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3ff3c6f1
Branch: refs/heads/llap
Commit: 3ff3c6f19ab82390f44c88cde5afea32a0299986
Parents: dbdd611
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 2 11:01:35 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 2 11:01:35 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3ff3c6f1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 05efc5f..cf8694e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -28,6 +28,7 @@ import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.concurrent.Callable;
+import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -373,7 +374,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
private final Configuration conf;
private static Cache<Path, FileInfo> footerCache;
private static ExecutorService threadPool = null;
- private static ExecutorCompletionService<AcidDirInfo> ecs = null;
private final int numBuckets;
private final long maxSize;
private final long minSize;
@@ -419,7 +419,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
threadPool = Executors.newFixedThreadPool(numThreads,
new ThreadFactoryBuilder().setDaemon(true)
.setNameFormat("ORC_GET_SPLITS #%d").build());
- ecs = new ExecutorCompletionService<AcidDirInfo>(threadPool);
}
if (footerCache == null && cacheStripeDetails) {
@@ -440,7 +439,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
/**
* The full ACID directory information needed for splits; no more calls to HDFS needed.
* We could just live with AcidUtils.Directory but...
- * 1) That doesn't contain have base files.
+ * 1) That doesn't have base files for the base-directory case.
* 2) We save fs for convenience to avoid getting it twice.
*/
@VisibleForTesting
@@ -1031,17 +1030,18 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
// multi-threaded file statuses and split strategy
boolean useFileIds = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
Path[] paths = getInputPaths(conf);
+ CompletionService<AcidDirInfo> ecs = new ExecutorCompletionService<>(Context.threadPool);
for (Path dir : paths) {
FileSystem fs = dir.getFileSystem(conf);
FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds);
- pathFutures.add(Context.ecs.submit(fileGenerator));
+ pathFutures.add(ecs.submit(fileGenerator));
}
// complete path futures and schedule split generation
try {
for (int notIndex = 0; notIndex < paths.length; ++notIndex) {
- AcidDirInfo adi = Context.ecs.take().get();
- SplitStrategy splitStrategy = determineSplitStrategy(
+ AcidDirInfo adi = ecs.take().get();
+ SplitStrategy<?> splitStrategy = determineSplitStrategy(
context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseOrOriginalFiles);
if (isDebugEnabled) {
@@ -1049,12 +1049,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
}
if (splitStrategy instanceof ETLSplitStrategy) {
- List<SplitInfo> splitInfos = splitStrategy.getSplits();
+ List<SplitInfo> splitInfos = ((ETLSplitStrategy)splitStrategy).getSplits();
for (SplitInfo splitInfo : splitInfos) {
splitFutures.add(Context.threadPool.submit(new SplitGenerator(splitInfo)));
}
} else {
- splits.addAll(splitStrategy.getSplits());
+ @SuppressWarnings("unchecked")
+ List<OrcSplit> readySplits = (List<OrcSplit>)splitStrategy.getSplits();
+ splits.addAll(readySplits);
}
}
[26/28] hive git commit: HIVE-11383: Upgrade Hive to Calcite 1.4
(Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-11383: Upgrade Hive to Calcite 1.4 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c40382d4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c40382d4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c40382d4
Branch: refs/heads/llap
Commit: c40382d4c3bd3f0b0c0f1b09affb7d03198f47ba
Parents: 492c8b1
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Sep 3 11:48:07 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Sep 3 11:48:07 2015 +0200
----------------------------------------------------------------------
pom.xml | 2 +-
.../ql/optimizer/calcite/HiveRelOptUtil.java | 23 --
.../calcite/reloperators/HiveSort.java | 29 +-
.../rules/HiveJoinProjectTransposeRule.java | 238 ++--------------
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +-
.../clientpositive/constprog_partitioner.q.out | 30 +-
.../clientpositive/correlationoptimizer10.q.out | 48 ++--
.../spark/constprog_partitioner.q.out | 30 +-
.../clientpositive/spark/subquery_exists.q.out | 12 +-
.../clientpositive/spark/subquery_in.q.out | 36 +--
.../spark/vector_mapjoin_reduce.q.out | 22 +-
.../clientpositive/subquery_exists.q.out | 12 +-
.../results/clientpositive/subquery_in.q.out | 36 +--
.../clientpositive/subquery_in_having.q.out | 50 ++--
.../subquery_unqualcolumnrefs.q.out | 26 +-
.../results/clientpositive/subquery_views.q.out | 40 +--
.../clientpositive/tez/explainuser_1.q.out | 278 +++++++++----------
.../clientpositive/tez/subquery_exists.q.out | 12 +-
.../clientpositive/tez/subquery_in.q.out | 36 +--
.../clientpositive/tez/vector_inner_join.q.out | 14 +-
.../tez/vector_mapjoin_reduce.q.out | 24 +-
.../clientpositive/vector_inner_join.q.out | 12 +-
.../clientpositive/vector_mapjoin_reduce.q.out | 26 +-
23 files changed, 431 insertions(+), 610 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 15c2805..b55e86a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -103,7 +103,7 @@
<antlr.version>3.4</antlr.version>
<avro.version>1.7.7</avro.version>
<bonecp.version>0.8.0.RELEASE</bonecp.version>
- <calcite.version>1.3.0-incubating</calcite.version>
+ <calcite.version>1.4.0-incubating</calcite.version>
<datanucleus-api-jdo.version>3.2.6</datanucleus-api-jdo.version>
<datanucleus-core.version>3.2.10</datanucleus-core.version>
<datanucleus-rdbms.version>3.2.9</datanucleus-rdbms.version>
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
index 5a5954d..0e282b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
@@ -278,29 +278,6 @@ public class HiveRelOptUtil extends RelOptUtil {
}
}
- private static SqlOperator op(SqlKind kind, SqlOperator operator) {
- switch (kind) {
- case EQUALS:
- return SqlStdOperatorTable.EQUALS;
- case NOT_EQUALS:
- return SqlStdOperatorTable.NOT_EQUALS;
- case GREATER_THAN:
- return SqlStdOperatorTable.GREATER_THAN;
- case GREATER_THAN_OR_EQUAL:
- return SqlStdOperatorTable.GREATER_THAN_OR_EQUAL;
- case LESS_THAN:
- return SqlStdOperatorTable.LESS_THAN;
- case LESS_THAN_OR_EQUAL:
- return SqlStdOperatorTable.LESS_THAN_OR_EQUAL;
- case IS_DISTINCT_FROM:
- return SqlStdOperatorTable.IS_DISTINCT_FROM;
- case IS_NOT_DISTINCT_FROM:
- return SqlStdOperatorTable.IS_NOT_DISTINCT_FROM;
- default:
- return operator;
- }
- }
-
private static void addJoinKey(
List<RexNode> joinKeyList,
RexNode key,
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
index 18d2838..1df6542 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
@@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.core.Sort;
@@ -49,6 +50,24 @@ public class HiveSort extends Sort implements HiveRelNode {
offset, fetch);
}
+ /**
+ * Creates a HiveSort.
+ *
+ * @param input Input relational expression
+ * @param collation array of sort specifications
+ * @param offset Expression for number of rows to discard before returning
+ * first row
+ * @param fetch Expression for number of rows to fetch
+ */
+ public static HiveSort create(RelNode input, RelCollation collation,
+ RexNode offset, RexNode fetch) {
+ RelOptCluster cluster = input.getCluster();
+ collation = RelCollationTraitDef.INSTANCE.canonize(collation);
+ RelTraitSet traitSet =
+ TraitsUtil.getSortTraitSet(cluster, input.getTraitSet(), collation);
+ return new HiveSort(cluster, traitSet, input, collation, offset, fetch);
+ }
+
@Override
public HiveSort copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation,
RexNode offset, RexNode fetch) {
@@ -77,9 +96,15 @@ public class HiveSort extends Sort implements HiveRelNode {
private static class HiveSortRelFactory implements RelFactories.SortFactory {
@Override
- public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation,
+ public RelNode createSort(RelTraitSet traits, RelNode input, RelCollation collation,
RexNode offset, RexNode fetch) {
- return new HiveSort(child.getCluster(), traits, child, collation, offset, fetch);
+ return createSort(input, collation, offset, fetch);
+ }
+
+ @Override
+ public RelNode createSort(RelNode input, RelCollation collation, RexNode offset,
+ RexNode fetch) {
+ return create(input, collation, offset, fetch);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
index fd8f5cb..ac72ee5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
@@ -49,14 +49,14 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule {
operand(HiveProject.class, any()),
operand(HiveProject.class, any())),
"JoinProjectTransposeRule(Project-Project)",
- HiveProject.DEFAULT_PROJECT_FACTORY);
+ false, HiveProject.DEFAULT_PROJECT_FACTORY);
public static final HiveJoinProjectTransposeRule LEFT_PROJECT =
new HiveJoinProjectTransposeRule(
operand(HiveJoin.class,
some(operand(HiveProject.class, any()))),
"JoinProjectTransposeRule(Project-Other)",
- HiveProject.DEFAULT_PROJECT_FACTORY);
+ false, HiveProject.DEFAULT_PROJECT_FACTORY);
public static final HiveJoinProjectTransposeRule RIGHT_PROJECT =
new HiveJoinProjectTransposeRule(
@@ -65,219 +65,37 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule {
operand(RelNode.class, any()),
operand(HiveProject.class, any())),
"JoinProjectTransposeRule(Other-Project)",
- HiveProject.DEFAULT_PROJECT_FACTORY);
+ false, HiveProject.DEFAULT_PROJECT_FACTORY);
- private final ProjectFactory projectFactory;
-
-
- private HiveJoinProjectTransposeRule(
- RelOptRuleOperand operand,
- String description, ProjectFactory pFactory) {
- super(operand, description, pFactory);
- this.projectFactory = pFactory;
- }
-
- @Override
- public void onMatch(RelOptRuleCall call) {
- Join joinRel = call.rel(0);
- JoinRelType joinType = joinRel.getJoinType();
-
- Project leftProj;
- Project rightProj;
- RelNode leftJoinChild;
- RelNode rightJoinChild;
-
- // see if at least one input's projection doesn't generate nulls
- if (hasLeftChild(call)) {
- leftProj = call.rel(1);
- leftJoinChild = getProjectChild(call, leftProj, true);
- } else {
- leftProj = null;
- leftJoinChild = call.rel(1);
- }
- if (hasRightChild(call)) {
- rightProj = getRightChild(call);
- rightJoinChild = getProjectChild(call, rightProj, false);
- } else {
- rightProj = null;
- rightJoinChild = joinRel.getRight();
- }
- if ((leftProj == null) && (rightProj == null)) {
- return;
- }
-
- // Construct two RexPrograms and combine them. The bottom program
- // is a join of the projection expressions from the left and/or
- // right projects that feed into the join. The top program contains
- // the join condition.
-
- // Create a row type representing a concatenation of the inputs
- // underneath the projects that feed into the join. This is the input
- // into the bottom RexProgram. Note that the join type is an inner
- // join because the inputs haven't actually been joined yet.
- RelDataType joinChildrenRowType =
- Join.deriveJoinRowType(
- leftJoinChild.getRowType(),
- rightJoinChild.getRowType(),
- JoinRelType.INNER,
- joinRel.getCluster().getTypeFactory(),
- null,
- Collections.<RelDataTypeField>emptyList());
-
- // Create projection expressions, combining the projection expressions
- // from the projects that feed into the join. For the RHS projection
- // expressions, shift them to the right by the number of fields on
- // the LHS. If the join input was not a projection, simply create
- // references to the inputs.
- int nProjExprs = joinRel.getRowType().getFieldCount();
- List<Pair<RexNode, String>> projects =
- new ArrayList<Pair<RexNode, String>>();
- RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
-
- createProjectExprs(
- leftProj,
- leftJoinChild,
- 0,
- rexBuilder,
- joinChildrenRowType.getFieldList(),
- projects);
-
- List<RelDataTypeField> leftFields =
- leftJoinChild.getRowType().getFieldList();
- int nFieldsLeft = leftFields.size();
- createProjectExprs(
- rightProj,
- rightJoinChild,
- nFieldsLeft,
- rexBuilder,
- joinChildrenRowType.getFieldList(),
- projects);
-
- List<RelDataType> projTypes = new ArrayList<RelDataType>();
- for (int i = 0; i < nProjExprs; i++) {
- projTypes.add(projects.get(i).left.getType());
- }
- RelDataType projRowType =
- rexBuilder.getTypeFactory().createStructType(
- projTypes,
- Pair.right(projects));
-
- // create the RexPrograms and merge them
- RexProgram bottomProgram =
- RexProgram.create(
- joinChildrenRowType,
- Pair.left(projects),
- null,
- projRowType,
- rexBuilder);
- RexProgramBuilder topProgramBuilder =
- new RexProgramBuilder(
- projRowType,
- rexBuilder);
- topProgramBuilder.addIdentity();
- topProgramBuilder.addCondition(joinRel.getCondition());
- RexProgram topProgram = topProgramBuilder.getProgram();
- RexProgram mergedProgram =
- RexProgramBuilder.mergePrograms(
- topProgram,
- bottomProgram,
- rexBuilder);
+ public static final HiveJoinProjectTransposeRule BOTH_PROJECT_INCLUDE_OUTER =
+ new HiveJoinProjectTransposeRule(
+ operand(HiveJoin.class,
+ operand(HiveProject.class, any()),
+ operand(HiveProject.class, any())),
+ "Join(IncludingOuter)ProjectTransposeRule(Project-Project)",
+ true, HiveProject.DEFAULT_PROJECT_FACTORY);
- // expand out the join condition and construct a new LogicalJoin that
- // directly references the join children without the intervening
- // ProjectRels
- RexNode newCondition =
- mergedProgram.expandLocalRef(
- mergedProgram.getCondition());
- Join newJoinRel =
- joinRel.copy(joinRel.getTraitSet(), newCondition,
- leftJoinChild, rightJoinChild, joinRel.getJoinType(),
- joinRel.isSemiJoinDone());
+ public static final HiveJoinProjectTransposeRule LEFT_PROJECT_INCLUDE_OUTER =
+ new HiveJoinProjectTransposeRule(
+ operand(HiveJoin.class,
+ some(operand(HiveProject.class, any()))),
+ "Join(IncludingOuter)ProjectTransposeRule(Project-Other)",
+ true, HiveProject.DEFAULT_PROJECT_FACTORY);
- // expand out the new projection expressions; if the join is an
- // outer join, modify the expressions to reference the join output
- List<RexNode> newProjExprs = new ArrayList<RexNode>();
- List<RexLocalRef> projList = mergedProgram.getProjectList();
- List<RelDataTypeField> newJoinFields =
- newJoinRel.getRowType().getFieldList();
- int nJoinFields = newJoinFields.size();
- int[] adjustments = new int[nJoinFields];
- for (int i = 0; i < nProjExprs; i++) {
- RexNode newExpr = mergedProgram.expandLocalRef(projList.get(i));
- if (joinType != JoinRelType.INNER) {
- newExpr =
- newExpr.accept(
- new RelOptUtil.RexInputConverter(
- rexBuilder,
- joinChildrenRowType.getFieldList(),
- newJoinFields,
- adjustments));
- }
- newProjExprs.add(newExpr);
- }
+ public static final HiveJoinProjectTransposeRule RIGHT_PROJECT_INCLUDE_OUTER =
+ new HiveJoinProjectTransposeRule(
+ operand(
+ HiveJoin.class,
+ operand(RelNode.class, any()),
+ operand(HiveProject.class, any())),
+ "Join(IncludingOuter)ProjectTransposeRule(Other-Project)",
+ true, HiveProject.DEFAULT_PROJECT_FACTORY);
- // finally, create the projection on top of the join
- RelNode newProjRel = projectFactory.createProject(newJoinRel, newProjExprs,
- joinRel.getRowType().getFieldNames());
- call.transformTo(newProjRel);
+ private HiveJoinProjectTransposeRule(
+ RelOptRuleOperand operand, String description,
+ boolean includeOuter, ProjectFactory projectFactory) {
+ super(operand, description, includeOuter, projectFactory);
}
- /**
- * Creates projection expressions corresponding to one of the inputs into
- * the join
- *
- * @param projRel the projection input into the join (if it exists)
- * @param joinChild the child of the projection input (if there is a
- * projection); otherwise, this is the join input
- * @param adjustmentAmount the amount the expressions need to be shifted by
- * @param rexBuilder rex builder
- * @param joinChildrenFields concatenation of the fields from the left and
- * right join inputs (once the projections have been
- * removed)
- * @param projects Projection expressions & names to be created
- */
- private void createProjectExprs(
- Project projRel,
- RelNode joinChild,
- int adjustmentAmount,
- RexBuilder rexBuilder,
- List<RelDataTypeField> joinChildrenFields,
- List<Pair<RexNode, String>> projects) {
- List<RelDataTypeField> childFields =
- joinChild.getRowType().getFieldList();
- if (projRel != null) {
- List<Pair<RexNode, String>> namedProjects =
- projRel.getNamedProjects();
- int nChildFields = childFields.size();
- int[] adjustments = new int[nChildFields];
- for (int i = 0; i < nChildFields; i++) {
- adjustments[i] = adjustmentAmount;
- }
- for (Pair<RexNode, String> pair : namedProjects) {
- RexNode e = pair.left;
- if (adjustmentAmount != 0) {
- // shift the references by the adjustment amount
- e = e.accept(
- new RelOptUtil.RexInputConverter(
- rexBuilder,
- childFields,
- joinChildrenFields,
- adjustments));
- }
- projects.add(Pair.of(e, pair.right));
- }
- } else {
- // no projection; just create references to the inputs
- for (int i = 0; i < childFields.size(); i++) {
- final RelDataTypeField field = childFields.get(i);
- projects.add(
- Pair.of(
- (RexNode) rexBuilder.makeInputRef(
- field.getType(),
- i + adjustmentAmount),
- field.getName()));
- }
- }
- }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index f26d1df..73ae7c4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -914,8 +914,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
// 6.1. Merge join into multijoin operators (if possible)
calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(),
- HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT,
- HiveJoinProjectTransposeRule.LEFT_PROJECT, HiveJoinProjectTransposeRule.RIGHT_PROJECT,
+ HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER,
+ HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER,
+ HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER,
HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE);
// The previous rules can pull up projections through join operators,
// thus we run the field trimmer again to push them back down
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
index 6475fa7..e22a4a9 100644
--- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
@@ -111,42 +111,42 @@ STAGE PLANS:
predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int), _col3 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
TableScan
alias: li
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+ predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_linenumber (type: int)
- outputColumnNames: _col0, _col1
+ expressions: l_orderkey (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: int), _col1 (type: int)
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Join Operator
condition map:
Left Semi Join 0 to 1
keys:
- 0 _col0 (type: int), _col3 (type: int)
- 1 _col0 (type: int), _col1 (type: int)
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col1, _col2
Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
Select Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
index 14a6ade..90fb6f4 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
@@ -474,17 +474,17 @@ STAGE PLANS:
alias: xx
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 20.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
TableScan
Reduce Output Operator
@@ -581,17 +581,17 @@ STAGE PLANS:
alias: xx
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 20.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
TableScan
alias: x
@@ -625,9 +625,9 @@ STAGE PLANS:
Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Demux Operator
- Statistics: Num rows: 69 Data size: 693 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 57 Data size: 601 Basic stats: COMPLETE Column stats: NONE
Mux Operator
- Statistics: Num rows: 69 Data size: 693 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 57 Data size: 601 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Left Semi Join 0 to 1
@@ -652,7 +652,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Mux Operator
- Statistics: Num rows: 69 Data size: 693 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 57 Data size: 601 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Left Semi Join 0 to 1
@@ -802,17 +802,17 @@ STAGE PLANS:
alias: xx
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 180.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
TableScan
Reduce Output Operator
@@ -828,10 +828,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -921,17 +921,17 @@ STAGE PLANS:
alias: xx
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 180.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
TableScan
alias: xx
@@ -965,9 +965,9 @@ STAGE PLANS:
Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Demux Operator
- Statistics: Num rows: 306 Data size: 3250 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 891 Basic stats: COMPLETE Column stats: NONE
Mux Operator
- Statistics: Num rows: 306 Data size: 3250 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 891 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Left Semi Join 0 to 1
@@ -992,7 +992,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Mux Operator
- Statistics: Num rows: 306 Data size: 3250 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 891 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Left Semi Join 0 to 1
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
index 665bfce..3a3a751 100644
--- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
@@ -124,13 +124,13 @@ STAGE PLANS:
predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int), _col3 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
Map 3
@@ -139,21 +139,21 @@ STAGE PLANS:
alias: li
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+ predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_linenumber (type: int)
- outputColumnNames: _col0, _col1
+ expressions: l_orderkey (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: int), _col1 (type: int)
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -161,8 +161,8 @@ STAGE PLANS:
condition map:
Left Semi Join 0 to 1
keys:
- 0 _col0 (type: int), _col3 (type: int)
- 1 _col0 (type: int), _col1 (type: int)
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col1, _col2
Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
Select Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
index 28eda26..44f467b 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
@@ -41,17 +41,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (value is not null and key is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((value > 'val_9') and key is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: string), _col0 (type: string)
sort order: ++
Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
@@ -83,10 +83,10 @@ STAGE PLANS:
0 _col1 (type: string), _col0 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
index 00b3399..bfcdaa8 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
@@ -31,17 +31,17 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ predicate: (key > '9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
@@ -74,10 +74,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -149,17 +149,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key > '9') and value is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
@@ -191,10 +191,10 @@ STAGE PLANS:
0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -627,17 +627,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key > '9') and value is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
@@ -669,10 +669,10 @@ STAGE PLANS:
0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
index a2dd910..be39d0d 100644
--- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
@@ -205,21 +205,21 @@ STAGE PLANS:
alias: lineitem
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+ predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_linenumber (type: int)
- outputColumnNames: _col0, _col1
+ expressions: l_orderkey (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: int), _col1 (type: int)
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
- 0 _col0 (type: int), _col3 (type: int)
- 1 _col0 (type: int), _col1 (type: int)
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
Local Work:
Map Reduce Local Work
@@ -236,15 +236,15 @@ STAGE PLANS:
predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Left Semi Join 0 to 1
keys:
- 0 _col0 (type: int), _col3 (type: int)
- 1 _col0 (type: int), _col1 (type: int)
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col1, _col2
input vertices:
1 Map 2
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out
index 776b145..ace5efe 100644
--- a/ql/src/test/results/clientpositive/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/subquery_exists.q.out
@@ -36,17 +36,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (value is not null and key is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((value > 'val_9') and key is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: string), _col0 (type: string)
sort order: ++
Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
TableScan
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -75,10 +75,10 @@ STAGE PLANS:
0 _col1 (type: string), _col0 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_in.q.out b/ql/src/test/results/clientpositive/subquery_in.q.out
index 7c53638..f82c799 100644
--- a/ql/src/test/results/clientpositive/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/subquery_in.q.out
@@ -26,17 +26,17 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ predicate: (key > '9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
TableScan
alias: src
@@ -66,10 +66,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -136,17 +136,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key > '9') and value is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
TableScan
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -175,10 +175,10 @@ STAGE PLANS:
0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -671,17 +671,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key > '9') and value is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
TableScan
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
@@ -696,10 +696,10 @@ STAGE PLANS:
0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_in_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out
index 8cabfa7..da1da06 100644
--- a/ql/src/test/results/clientpositive/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out
@@ -782,17 +782,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ predicate: (key > '8') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
TableScan
alias: b
@@ -822,13 +822,13 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -844,7 +844,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -852,10 +852,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: _col2 is not null (type: boolean)
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -871,7 +871,7 @@ STAGE PLANS:
key expressions: _col2 (type: bigint)
sort order: +
Map-reduce partition columns: _col2 (type: bigint)
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string)
TableScan
Reduce Output Operator
@@ -887,10 +887,10 @@ STAGE PLANS:
0 _col2 (type: bigint)
1 _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1079,10 +1079,10 @@ STAGE PLANS:
0 _col2 (type: bigint)
1 _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1098,7 +1098,7 @@ STAGE PLANS:
key expressions: _col2 (type: bigint)
sort order: +
Map-reduce partition columns: _col2 (type: bigint)
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string)
TableScan
Reduce Output Operator
@@ -1114,10 +1114,10 @@ STAGE PLANS:
0 _col2 (type: bigint)
1 _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1158,12 +1158,12 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ predicate: (key > '8') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Left Semi Join 0 to 1
@@ -1171,18 +1171,18 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: bigint)
Local Work:
Map Reduce Local Work
@@ -1192,10 +1192,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: _col2 is not null (type: boolean)
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
index 5d6d4a8..76d7503 100644
--- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
+++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
@@ -52,7 +52,7 @@ STAGE PLANS:
alias: src11
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: (key1 is not null and value1 is not null) (type: boolean)
+ predicate: ((key1 > '9') and value1 is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key1 (type: string), value1 (type: string)
@@ -122,17 +122,17 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key > '9') and value is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
TableScan
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -161,10 +161,10 @@ STAGE PLANS:
0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -588,17 +588,17 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key > '9') and value is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
TableScan
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
@@ -613,10 +613,10 @@ STAGE PLANS:
0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_views.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_views.q.out b/ql/src/test/results/clientpositive/subquery_views.q.out
index 41834a3..c59d86e 100644
--- a/ql/src/test/results/clientpositive/subquery_views.q.out
+++ b/ql/src/test/results/clientpositive/subquery_views.q.out
@@ -70,7 +70,7 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: database:default
POSTHOOK: Output: default@cv2
Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
-Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
PREHOOK: query: explain
select *
from cv2 where cv2.key in (select key from cv2 c where c.key < '11')
@@ -97,7 +97,7 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean)
+ predicate: ((value > 'val_11') and (key is null or value is null or key is null)) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -135,15 +135,15 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ predicate: (key < '11') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string)
TableScan
Reduce Output Operator
@@ -157,7 +157,7 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -173,22 +173,22 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string)
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
TableScan
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((value > 'val_11') and key is not null) (type: boolean)
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((value > 'val_11') and (key < '11')) and key is not null) (type: boolean)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string), key (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Join Operator
condition map:
@@ -197,14 +197,14 @@ STAGE PLANS:
0 _col0 (type: string), _col1 (type: string), _col0 (type: string)
1 _col0 (type: string), _col1 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: _col3 is null (type: boolean)
- Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -220,7 +220,7 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
TableScan
Reduce Output Operator
@@ -236,10 +236,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 166 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 110 Data size: 1172 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 166 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 110 Data size: 1172 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -252,7 +252,7 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean)
+ predicate: ((value > 'val_11') and (key is null or value is null or key is null)) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -379,7 +379,7 @@ STAGE PLANS:
ListSink
Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
-Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
PREHOOK: query: select *
from cv2 where cv2.key in (select key from cv2 c where c.key < '11')
PREHOOK: type: QUERY
[23/28] hive git commit: HIVE-11700: exception in logs in Tez test
with new logger (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-11700: exception in logs in Tez test with new logger (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1fc9320f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1fc9320f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1fc9320f
Branch: refs/heads/llap
Commit: 1fc9320f07b066e4850a04958a2c73643b5ad5b1
Parents: 5a1957f
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Sep 2 15:56:15 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Sep 2 15:56:15 2015 -0500
----------------------------------------------------------------------
data/conf/hive-log4j2.xml | 3 ---
1 file changed, 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1fc9320f/data/conf/hive-log4j2.xml
----------------------------------------------------------------------
diff --git a/data/conf/hive-log4j2.xml b/data/conf/hive-log4j2.xml
index 11c8e79..452f01f 100644
--- a/data/conf/hive-log4j2.xml
+++ b/data/conf/hive-log4j2.xml
@@ -94,9 +94,6 @@
<Logger name="org.apache.zookeeper.ClientCnxnSocketNIO" level="WARN">
<AppenderRef ref="${sys:hive.root.logger}"/>
</Logger>
- <Logger name="org.apache.hadoop.hive.ql.log.PerfLogger" level="${sys:hive.ql.log.PerfLogger.level}">
- <AppenderRef ref="${sys:hive.ql.log.PerfLogger.logger}"/>
- </Logger>
<Logger name="org.apache.hadoop.hive.ql.exec.Operator" level="INFO">
<AppenderRef ref="${sys:hive.root.logger}"/>
</Logger>
[21/28] hive git commit: HIVE-11604 : HIVE return wrong results in
some queries with PTF function (Yongzhi Chen via Szehon)
Posted by se...@apache.org.
HIVE-11604 : HIVE return wrong results in some queries with PTF function (Yongzhi Chen via Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/308ae90a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/308ae90a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/308ae90a
Branch: refs/heads/llap
Commit: 308ae90aa396a2d99660f6ccf931c031ce9aa8a1
Parents: 3ff3c6f
Author: Szehon Ho <sz...@cloudera.com>
Authored: Wed Sep 2 11:49:04 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Wed Sep 2 11:49:04 2015 -0700
----------------------------------------------------------------------
.../ql/optimizer/IdentityProjectRemover.java | 15 +
.../queries/clientpositive/ptfgroupbyjoin.q | 61 +++
.../results/clientpositive/ptfgroupbyjoin.q.out | 519 +++++++++++++++++++
.../clientpositive/tez/explainuser_1.q.out | 31 +-
4 files changed, 612 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
index e3d3ce6..135b90b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
@@ -30,8 +30,10 @@ import com.google.common.collect.Iterators;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.PTFOperator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -111,6 +113,19 @@ public class IdentityProjectRemover implements Transform {
// For RS-SEL-RS case. reducer operator in reducer task cannot be null in task compiler
return null;
}
+ List<Operator<? extends OperatorDesc>> ancestorList = new ArrayList<Operator<? extends OperatorDesc>>();
+ ancestorList.addAll(sel.getParentOperators());
+ while (!ancestorList.isEmpty()) {
+ Operator<? extends OperatorDesc> curParent = ancestorList.remove(0);
+ // PTF need a SelectOp.
+ if ((curParent instanceof PTFOperator)) {
+ return null;
+ }
+ if ((curParent instanceof FilterOperator) && curParent.getParentOperators() != null) {
+ ancestorList.addAll(curParent.getParentOperators());
+ }
+ }
+
if(sel.isIdentitySelect()) {
parent.removeChildAndAdoptItsChildren(sel);
LOG.debug("Identity project remover optimization removed : " + sel);
http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q b/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q
new file mode 100644
index 0000000..61d034e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q
@@ -0,0 +1,61 @@
+create table tlb1 (id int, fkey int, val string);
+create table tlb2 (fid int, name string);
+insert into table tlb1 values(100,1,'abc');
+insert into table tlb1 values(200,1,'efg');
+insert into table tlb2 values(1, 'key1');
+
+explain
+select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+explain
+select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+
+set hive.optimize.ppd=false;
+
+explain
+select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey;
+
+
http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out b/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out
new file mode 100644
index 0000000..9368df9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out
@@ -0,0 +1,519 @@
+PREHOOK: query: create table tlb1 (id int, fkey int, val string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tlb1
+POSTHOOK: query: create table tlb1 (id int, fkey int, val string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tlb1
+PREHOOK: query: create table tlb2 (fid int, name string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tlb2
+POSTHOOK: query: create table tlb2 (fid int, name string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tlb2
+PREHOOK: query: insert into table tlb1 values(100,1,'abc')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@tlb1
+POSTHOOK: query: insert into table tlb1 values(100,1,'abc')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@tlb1
+POSTHOOK: Lineage: tlb1.fkey EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: tlb1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tlb1.val SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: insert into table tlb1 values(200,1,'efg')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@tlb1
+POSTHOOK: query: insert into table tlb1 values(200,1,'efg')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@tlb1
+POSTHOOK: Lineage: tlb1.fkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: tlb1.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tlb1.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: insert into table tlb2 values(1, 'key1')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@tlb2
+POSTHOOK: query: insert into table tlb2 values(1, 'key1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@tlb2
+POSTHOOK: Lineage: tlb2.fid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tlb2.name SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tlb1
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), fkey (type: int)
+ outputColumnNames: id, fkey
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: id (type: int), fkey (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0, _col1
+ partition by: _col0, _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: row_number_window_0
+ name: row_number
+ window function: GenericUDAFRowNumberEvaluator
+ window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ TableScan
+ alias: aaa
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: fid is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: fid (type: int)
+ sort order: +
+ Map-reduce partition columns: fid (type: int)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ value expressions: name (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 fid (type: int)
+ outputColumnNames: _col0, _col1, _col4
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tlb1
+PREHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tlb1
+POSTHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+200 1 key1
+100 1 key1
+PREHOOK: query: explain
+select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tlb1
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), fkey (type: int)
+ outputColumnNames: id, fkey
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: id (type: int), fkey (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0, _col1
+ partition by: _col0, _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: row_number_window_0
+ name: row_number
+ window function: GenericUDAFRowNumberEvaluator
+ window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), row_number_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: int)
+ TableScan
+ alias: aaa
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: fid is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: fid (type: int)
+ sort order: +
+ Map-reduce partition columns: fid (type: int)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ value expressions: name (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 fid (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tlb1
+PREHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name, ddd.rnum
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tlb1
+POSTHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+200 1 key1 1
+100 1 key1 1
+PREHOOK: query: explain
+select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tlb1
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), fkey (type: int)
+ outputColumnNames: id, fkey
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: id (type: int), fkey (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0, _col1
+ partition by: _col0, _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: row_number_window_0
+ name: row_number
+ window function: GenericUDAFRowNumberEvaluator
+ window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ TableScan
+ alias: aaa
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: fid is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: fid (type: int)
+ sort order: +
+ Map-reduce partition columns: fid (type: int)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ value expressions: name (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 fid (type: int)
+ outputColumnNames: _col0, _col1, _col4
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tlb1
+PREHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name
+from (
+ select id, fkey,
+ row_number() over (partition by id, fkey) as rnum
+ from tlb1 group by id, fkey
+ ) ddd
+inner join tlb2 aaa on aaa.fid = ddd.fkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tlb1
+POSTHOOK: Input: default@tlb2
+#### A masked pattern was here ####
+200 1 key1
+100 1 key1
http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index 9756b0c..4d797f2 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -6967,22 +6967,25 @@ Stage-0
Map-reduce partition columns:rand() (type: double)
sort order:+++
Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
- PTF Operator [PTF_3]
- Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}]
+ Select Operator [SEL_4]
+ outputColumnNames:["_col1","_col2","_col5"]
Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator [SEL_2]
- | outputColumnNames:["_col1","_col2","_col5"]
- | Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
- |<-Map 1 [SIMPLE_EDGE]
- Reduce Output Operator [RS_1]
- key expressions:p_mfgr (type: string), p_name (type: string)
- Map-reduce partition columns:p_mfgr (type: string)
- sort order:++
- Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions:p_size (type: int)
- TableScan [TS_0]
- alias:part
+ PTF Operator [PTF_3]
+ Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}]
+ Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator [SEL_2]
+ | outputColumnNames:["_col1","_col2","_col5"]
+ | Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_1]
+ key expressions:p_mfgr (type: string), p_name (type: string)
+ Map-reduce partition columns:p_mfgr (type: string)
+ sort order:++
Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions:p_size (type: int)
+ TableScan [TS_0]
+ alias:part
+ Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
PREHOOK: query: explain
select abc.*
[02/28] hive git commit: HIVE-11669: OrcFileDump service should
support directories (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-11669: OrcFileDump service should support directories (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8e712da0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8e712da0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8e712da0
Branch: refs/heads/llap
Commit: 8e712da0d8464173e0977b61661bbd00960b08d8
Parents: 2ef40ca
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Fri Aug 28 13:13:18 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Fri Aug 28 13:13:18 2015 -0500
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/io/orc/FileDump.java | 48 ++++++++++++++++++--
1 file changed, 44 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8e712da0/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
index 4acb810..76ecb33 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
@@ -22,7 +22,7 @@ import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
-import java.util.Arrays;
+import java.util.Collection;
import java.util.List;
import java.util.Map;
@@ -32,8 +32,10 @@ import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex;
import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
@@ -47,6 +49,9 @@ import org.apache.hadoop.io.LongWritable;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONWriter;
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+
/**
* A tool for printing out the file structure of ORC files.
*/
@@ -86,23 +91,55 @@ public final class FileDump {
System.err.println("Error : ORC files are not specified");
return;
}
+
+ // if the specified path is directory, iterate through all files and print the file dump
+ List<String> filesInPath = Lists.newArrayList();
+ for (String filename : files) {
+ Path path = new Path(filename);
+ filesInPath.addAll(getAllFilesInPath(path, conf));
+ }
+
if (dumpData) {
- printData(Arrays.asList(files), conf);
+ printData(filesInPath, conf);
} else {
if (jsonFormat) {
boolean prettyPrint = cli.hasOption('p');
- JsonFileDump.printJsonMetaData(Arrays.asList(files), conf, rowIndexCols, prettyPrint,
+ JsonFileDump.printJsonMetaData(filesInPath, conf, rowIndexCols, prettyPrint,
printTimeZone);
} else {
- printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
+ printMetaData(filesInPath, conf, rowIndexCols, printTimeZone);
+ }
+ }
+ }
+
+ private static Collection<? extends String> getAllFilesInPath(final Path path,
+ final Configuration conf) throws IOException {
+ List<String> filesInPath = Lists.newArrayList();
+ FileSystem fs = path.getFileSystem(conf);
+ FileStatus fileStatus = fs.getFileStatus(path);
+ if (fileStatus.isDir()) {
+ FileStatus[] fileStatuses = fs.listStatus(path, AcidUtils.hiddenFileFilter);
+ for (FileStatus fileInPath : fileStatuses) {
+ if (fileInPath.isDir()) {
+ filesInPath.addAll(getAllFilesInPath(fileInPath.getPath(), conf));
+ } else {
+ filesInPath.add(fileInPath.getPath().toString());
+ }
}
+ } else {
+ filesInPath.add(path.toString());
}
+
+ return filesInPath;
}
private static void printData(List<String> files, Configuration conf) throws IOException,
JSONException {
for (String file : files) {
printJsonData(conf, file);
+ if (files.size() > 1) {
+ System.out.println(Strings.repeat("=", 80) + "\n");
+ }
}
}
@@ -204,6 +241,9 @@ public final class FileDump {
System.out.println("Padding length: " + paddedBytes + " bytes");
System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
rows.close();
+ if (files.size() > 1) {
+ System.out.println(Strings.repeat("=", 80) + "\n");
+ }
}
}
[14/28] hive git commit: Added HIVE-11536 to errata
Posted by se...@apache.org.
Added HIVE-11536 to errata
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b22e54ec
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b22e54ec
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b22e54ec
Branch: refs/heads/llap
Commit: b22e54ecec96d1b86fea1b53dae4c8bb68a11f9c
Parents: d597765
Author: Alan Gates <ga...@hortonworks.com>
Authored: Tue Sep 1 09:49:47 2015 -0700
Committer: Alan Gates <ga...@hortonworks.com>
Committed: Tue Sep 1 09:49:47 2015 -0700
----------------------------------------------------------------------
errata.txt | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b22e54ec/errata.txt
----------------------------------------------------------------------
diff --git a/errata.txt b/errata.txt
index 70992ad..e5a1748 100644
--- a/errata.txt
+++ b/errata.txt
@@ -1,4 +1,4 @@
-Commits with the wrong JIRA referenced:
+Commits with the wrong or no JIRA referenced:
git commit branch jira url
5a576b6fbf1680ab4dd8f275cad484a2614ef2c1 master HIVE-10391 https://issues.apache.org/jira/browse/HIVE-10391
@@ -7,3 +7,4 @@ git commit branch jira url
09100831adff7589ee48e735a4beac6ebb25cb3e master HIVE-10885 https://issues.apache.org/jira/browse/HIVE-10885
f3ab5fda6af57afff31c29ad048d906fd095d5fb branch-1.2 HIVE-10885 https://issues.apache.org/jira/browse/HIVE-10885
dcf21cd6fa98fb5db01ef661bb3b9f94d9ca2d15 master HIVE-10021 https://issues.apache.org/jira/browse/HIVE-10021
+9763c9dd31bd5939db3ca50e75bb97955b411f6d master HIVE-11536 https://issues.apache.org/jira/browse/HIVE-11536
[18/28] hive git commit: HIVE-11440: Create Parquet predicate push
down (PPD) unit tests and q-tests (Ferdinand Xu, reviewed by Sergio Pena)
Posted by se...@apache.org.
HIVE-11440: Create Parquet predicate push down (PPD) unit tests and q-tests (Ferdinand Xu, reviewed by Sergio Pena)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f2056a13
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f2056a13
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f2056a13
Branch: refs/heads/llap
Commit: f2056a13e734ed2a00e185c069a85e514bb175b1
Parents: f530f44
Author: Ferdinand Xu <ch...@intel.com>
Authored: Wed Sep 2 00:34:45 2015 -0400
Committer: Ferdinand Xu <ch...@intel.com>
Committed: Wed Sep 2 00:39:21 2015 -0400
----------------------------------------------------------------------
.../clientpositive/parquet_ppd_boolean.q | 35 ++
.../queries/clientpositive/parquet_ppd_char.q | 76 +++
.../queries/clientpositive/parquet_ppd_date.q | 101 ++++
.../clientpositive/parquet_ppd_decimal.q | 163 ++++++
.../clientpositive/parquet_ppd_timestamp.q | 98 ++++
.../clientpositive/parquet_ppd_varchar.q | 76 +++
.../clientpositive/parquet_ppd_boolean.q.out | 200 ++++++++
.../clientpositive/parquet_ppd_char.q.out | 220 +++++++++
.../clientpositive/parquet_ppd_date.q.out | 301 ++++++++++++
.../clientpositive/parquet_ppd_decimal.q.out | 490 +++++++++++++++++++
.../clientpositive/parquet_ppd_timestamp.q.out | 292 +++++++++++
.../clientpositive/parquet_ppd_varchar.q.out | 220 +++++++++
12 files changed, 2272 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q
new file mode 100644
index 0000000..05c6c50
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q
@@ -0,0 +1,35 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2) uniontbl;
+
+SET hive.optimize.ppd=true;
+SET hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where b=true;
+select sum(hash(*)) from newtypestbl where b!=true;
+select sum(hash(*)) from newtypestbl where b<true;
+select sum(hash(*)) from newtypestbl where b>true;
+select sum(hash(*)) from newtypestbl where b<=true;
+
+select sum(hash(*)) from newtypestbl where b=false;
+select sum(hash(*)) from newtypestbl where b!=false;
+select sum(hash(*)) from newtypestbl where b<false;
+select sum(hash(*)) from newtypestbl where b>false;
+select sum(hash(*)) from newtypestbl where b<=false;
+
+
+SET hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where b=true;
+select sum(hash(*)) from newtypestbl where b!=true;
+select sum(hash(*)) from newtypestbl where b<true;
+select sum(hash(*)) from newtypestbl where b>true;
+select sum(hash(*)) from newtypestbl where b<=true;
+
+select sum(hash(*)) from newtypestbl where b=false;
+select sum(hash(*)) from newtypestbl where b!=false;
+select sum(hash(*)) from newtypestbl where b<false;
+select sum(hash(*)) from newtypestbl where b>false;
+select sum(hash(*)) from newtypestbl where b<=false;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_char.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_char.q b/ql/src/test/queries/clientpositive/parquet_ppd_char.q
new file mode 100644
index 0000000..b01612c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_char.q
@@ -0,0 +1,76 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.ppd=true;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+set hive.optimize.index.filter=false;
+
+-- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where c="apple";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c="apple";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c!="apple";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c!="apple";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c<"hello";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c<"hello";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c<="hello";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c<="hello";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c="apple ";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c="apple ";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c in ("apple", "carrot");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c in ("apple", "carrot");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c in ("apple", "hello");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c in ("apple", "hello");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c in ("carrot");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c in ("carrot");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c between "apple" and "carrot";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c between "apple" and "carrot";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c between "apple" and "zombie";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c between "apple" and "zombie";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1";
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_date.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_date.q b/ql/src/test/queries/clientpositive/parquet_ppd_date.q
new file mode 100644
index 0000000..a18a9cf
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_date.q
@@ -0,0 +1,101 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.ppd=true;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+-- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where da='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da= date '1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da!='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da!='1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da<'1970-02-27';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da<'1970-02-27';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da<'1970-02-29';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da<'1970-02-29';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da<'1970-02-15';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da<'1970-02-15';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da<='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da<='1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da<='1970-02-27';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da<='1970-02-27';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19';
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q
new file mode 100644
index 0000000..679164b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q
@@ -0,0 +1,163 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+-- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where d=0.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d=0.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d='0.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d='0.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d=cast('0.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d=cast('0.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d!=0.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d!=0.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d!='0.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d!='0.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<11.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<11.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<'11.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<'11.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<cast('11.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<cast('11.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<1;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<1;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<=11.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<=11.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<='11.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<='11.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<=11.22BD;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<=11.22BD;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d<=12;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d<=12;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d between 0 and 1;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d between 0 and 1;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d between 0 and 1000;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d between 0 and 1000;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d between 0 and '2.0';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d between 0 and '2.0';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10));
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q
new file mode 100644
index 0000000..e0802a0
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q
@@ -0,0 +1,98 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.ppd=true;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2) uniontbl;
+
+-- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp);
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q
new file mode 100644
index 0000000..be50ca2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q
@@ -0,0 +1,76 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.ppd=true;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet;
+
+insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+set hive.optimize.index.filter=false;
+
+-- varchar data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where v="bee";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v="bee";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v!="bee";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v!="bee";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v<"world";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v<"world";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v<="world";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v<="world";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v="bee ";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v="bee ";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v in ("bee", "orange");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v in ("bee", "orange");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v in ("bee", "world");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v in ("bee", "world");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v in ("orange");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v in ("orange");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v between "bee" and "orange";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v between "bee" and "orange";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v between "bee" and "zombie";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v between "bee" and "zombie";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypestbl where v between "orange" and "pine";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypestbl where v between "orange" and "pine";
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out
new file mode 100644
index 0000000..78b7aa6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out
@@ -0,0 +1,200 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.b EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b!=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b!=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b>true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b>true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475822500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b!=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b!=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b>false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b>false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b!=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b!=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b>true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b>true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<=true
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<=true
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475822500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b!=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b!=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b>false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b>false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951953500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where b<=false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where b<=false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427776000
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
new file mode 100644
index 0000000..e62462c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
@@ -0,0 +1,220 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.da EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: -- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where c="apple"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: -- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where c="apple"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c="apple"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c="apple"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c!="apple"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c!="apple"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c!="apple"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c!="apple"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c<"hello"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c<"hello"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c<"hello"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c<"hello"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c<="hello"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c<="hello"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c<="hello"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c<="hello"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c="apple "
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c="apple "
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c="apple "
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c="apple "
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "carrot")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "carrot")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "carrot")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "carrot")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "hello")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "hello")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "hello")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("apple", "hello")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("carrot")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("carrot")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c in ("carrot")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c in ("carrot")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "carrot"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "carrot"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "carrot"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "carrot"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "zombie"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "zombie"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "zombie"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "apple" and "zombie"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where c between "carrot" and "carrot1"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out
new file mode 100644
index 0000000..aba302e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out
@@ -0,0 +1,301 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.da EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: -- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where da='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: -- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where da='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da= date '1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da= date '1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as date)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da=cast('1970-02-20' as varchar(20))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da!='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da!='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da!='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da!='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-27'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-27'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-27'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-27'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-29'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-29'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-29'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-29'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-15'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-15'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-15'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<'1970-02-15'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-20'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-20'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-27'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-27'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-27'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da<='1970-02-27'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-19' and '1970-02-28'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where da between '1970-02-18' and '1970-02-19'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out
new file mode 100644
index 0000000..9e48df8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out
@@ -0,0 +1,490 @@
+PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtypestbl
+PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@newtypestbl
+POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@newtypestbl
+POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.da EXPRESSION []
+POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
+PREHOOK: query: -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where d=0.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypestbl where d=0.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d=0.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d=0.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d='0.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d='0.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d='0.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d='0.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d=cast('0.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d=cast('0.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d=cast('0.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d=cast('0.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!=0.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!=0.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!=0.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!=0.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!='0.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!='0.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!='0.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!='0.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d!=cast('0.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<11.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<11.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<11.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<11.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<'11.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<'11.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<'11.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<'11.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<cast('11.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<cast('11.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<cast('11.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<cast('11.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<='11.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<='11.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<='11.22'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<='11.22'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=cast('11.22' as decimal)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22BD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22BD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22BD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=11.22BD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=12
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=12
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d<=12
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d<=12
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '1.0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.22', '11.22')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', '1.0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1000
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1000
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1000
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and 1000
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+81475875500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and '2.0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and '2.0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and '2.0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and '2.0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 0 and cast(3 as float)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+-252951929000
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
+PREHOOK: query: select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(hash(*)) from newtypestbl where d between 1 and cast(30 as char(10))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtypestbl
+#### A masked pattern was here ####
+334427804500
[15/28] hive git commit: HIVE-11536 follow up,
seems earlier commit had strange line endings
Posted by se...@apache.org.
HIVE-11536 follow up, seems earlier commit had strange line endings
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c0690a69
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c0690a69
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c0690a69
Branch: refs/heads/llap
Commit: c0690a69dcd4976435f8b33084f9b9b3c0c16889
Parents: b22e54e
Author: Alan Gates <ga...@hortonworks.com>
Authored: Tue Sep 1 09:57:24 2015 -0700
Committer: Alan Gates <ga...@hortonworks.com>
Committed: Tue Sep 1 09:57:24 2015 -0700
----------------------------------------------------------------------
.../src/test/queries/db/rowtype_attribute.sql | 42 ++++----
hplsql/src/test/queries/db/type_attribute.sql | 14 +--
.../local/create_procedure_no_params.sql | 36 +++----
.../test/queries/offline/create_table_ora.sql | 104 +++++++++----------
4 files changed, 98 insertions(+), 98 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/db/rowtype_attribute.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/rowtype_attribute.sql b/hplsql/src/test/queries/db/rowtype_attribute.sql
index 6a84b57..2fc65ce 100644
--- a/hplsql/src/test/queries/db/rowtype_attribute.sql
+++ b/hplsql/src/test/queries/db/rowtype_attribute.sql
@@ -1,22 +1,22 @@
-DECLARE
- v1 default.src%ROWTYPE;
- v2 src %ROWTYPE;
- v3 src % ROWTYPE;
- CURSOR c1 IS SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1;
-BEGIN
- SELECT 'A' AS key, 'B' AS value INTO v1 FROM src LIMIT 1;
- PRINT v1.key || v1.value;
-
- OPEN c1;
- FETCH c1 INTO v2;
- PRINT v2.key || v2.value;
- CLOSE c1;
-
- FOR rec IN (SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1)
- LOOP
- PRINT rec.key || rec.value;
- END LOOP;
-
- EXECUTE IMMEDIATE 'SELECT ''A'' AS key, ''B'' AS value FROM src LIMIT 1' INTO v3;
- PRINT v3.key || v3.value;
+DECLARE
+ v1 default.src%ROWTYPE;
+ v2 src %ROWTYPE;
+ v3 src % ROWTYPE;
+ CURSOR c1 IS SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1;
+BEGIN
+ SELECT 'A' AS key, 'B' AS value INTO v1 FROM src LIMIT 1;
+ PRINT v1.key || v1.value;
+
+ OPEN c1;
+ FETCH c1 INTO v2;
+ PRINT v2.key || v2.value;
+ CLOSE c1;
+
+ FOR rec IN (SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1)
+ LOOP
+ PRINT rec.key || rec.value;
+ END LOOP;
+
+ EXECUTE IMMEDIATE 'SELECT ''A'' AS key, ''B'' AS value FROM src LIMIT 1' INTO v3;
+ PRINT v3.key || v3.value;
END
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/db/type_attribute.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/type_attribute.sql b/hplsql/src/test/queries/db/type_attribute.sql
index 2d93bfd..059c291 100644
--- a/hplsql/src/test/queries/db/type_attribute.sql
+++ b/hplsql/src/test/queries/db/type_attribute.sql
@@ -1,8 +1,8 @@
-DECLARE
- v1 default.src.key%TYPE;
- v2 src.Key %TYPE;
- v3 src.key3 % TYPE;
-BEGIN
- SELECT 'A', 'B', 1 INTO v1, v2, v3 FROM src LIMIT 1;
- PRINT v1 || v2 || v3;
+DECLARE
+ v1 default.src.key%TYPE;
+ v2 src.Key %TYPE;
+ v3 src.key3 % TYPE;
+BEGIN
+ SELECT 'A', 'B', 1 INTO v1, v2, v3 FROM src LIMIT 1;
+ PRINT v1 || v2 || v3;
END
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/local/create_procedure_no_params.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/local/create_procedure_no_params.sql b/hplsql/src/test/queries/local/create_procedure_no_params.sql
index 535ba98..c4c2992 100644
--- a/hplsql/src/test/queries/local/create_procedure_no_params.sql
+++ b/hplsql/src/test/queries/local/create_procedure_no_params.sql
@@ -1,19 +1,19 @@
-create procedure sp1
-begin
- print 'a';
-end;
-
-create procedure sp2()
-begin
- print 'b';
-end;
-
-call sp1;
-call sp1();
-sp1;
-sp1();
-
-call sp2;
-call sp2();
-sp2;
+create procedure sp1
+begin
+ print 'a';
+end;
+
+create procedure sp2()
+begin
+ print 'b';
+end;
+
+call sp1;
+call sp1();
+sp1;
+sp1();
+
+call sp2;
+call sp2();
+sp2;
sp2();
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/offline/create_table_ora.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/offline/create_table_ora.sql b/hplsql/src/test/queries/offline/create_table_ora.sql
index bb1d9c7..40a543a 100644
--- a/hplsql/src/test/queries/offline/create_table_ora.sql
+++ b/hplsql/src/test/queries/offline/create_table_ora.sql
@@ -1,53 +1,53 @@
-CREATE TABLE ora_t1 (
- n1 NUMBER(3,0),
- v1 VARCHAR2(10)
-);
-
-CREATE TABLE "USER"."EMP"
- ( "EMPNO" NUMBER(4,0),
- "ENAME" VARCHAR2(10 BYTE),
- "JOB" VARCHAR2(9 BYTE),
- "MGR" NUMBER(4,0),
- "HIREDATE" DATE,
- "SAL" NUMBER(7,2),
- "COMM" NUMBER(7,2),
- "DEPTNO" NUMBER(2,0)
- ) SEGMENT CREATION IMMEDIATE
- PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255 NOCOMPRESS LOGGING
- STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
- PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
- TABLESPACE "USERS" ;
-
-CREATE TABLE language (
- id NUMBER(7) NOT NULL PRIMARY KEY,
- cd CHAR(2) NOT NULL,
- description VARCHAR2(50)
-);
-CREATE TABLE author (
- id NUMBER(7) NOT NULL PRIMARY KEY,
- first_name VARCHAR2(50),
- last_name VARCHAR2(50) NOT NULL,
- date_of_birth DATE,
- year_of_birth NUMBER(7),
- distinguished NUMBER(1)
-);
-CREATE TABLE book (
- id NUMBER(7) NOT NULL PRIMARY KEY,
- author_id NUMBER(7) NOT NULL,
- title VARCHAR2(400) NOT NULL,
- published_in NUMBER(7) NOT NULL,
- language_id NUMBER(7) NOT NULL,
- CONSTRAINT fk_book_author FOREIGN KEY (author_id) REFERENCES author(id),
- CONSTRAINT fk_book_language FOREIGN KEY (language_id) REFERENCES language(id)
-);
-CREATE TABLE book_store (
- name VARCHAR2(400) NOT NULL UNIQUE
-);
-CREATE TABLE book_to_book_store (
- name VARCHAR2(400) NOT NULL,
- book_id INTEGER NOT NULL,
- stock INTEGER,
- PRIMARY KEY(name, book_id),
- CONSTRAINT fk_b2bs_book_store FOREIGN KEY (name) REFERENCES book_store (name) ON DELETE CASCADE,
- CONSTRAINT fk_b2bs_book FOREIGN KEY (book_id) REFERENCES book (id) ON DELETE CASCADE
+CREATE TABLE ora_t1 (
+ n1 NUMBER(3,0),
+ v1 VARCHAR2(10)
+);
+
+CREATE TABLE "USER"."EMP"
+ ( "EMPNO" NUMBER(4,0),
+ "ENAME" VARCHAR2(10 BYTE),
+ "JOB" VARCHAR2(9 BYTE),
+ "MGR" NUMBER(4,0),
+ "HIREDATE" DATE,
+ "SAL" NUMBER(7,2),
+ "COMM" NUMBER(7,2),
+ "DEPTNO" NUMBER(2,0)
+ ) SEGMENT CREATION IMMEDIATE
+ PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255 NOCOMPRESS LOGGING
+ STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
+ PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
+ TABLESPACE "USERS" ;
+
+CREATE TABLE language (
+ id NUMBER(7) NOT NULL PRIMARY KEY,
+ cd CHAR(2) NOT NULL,
+ description VARCHAR2(50)
+);
+CREATE TABLE author (
+ id NUMBER(7) NOT NULL PRIMARY KEY,
+ first_name VARCHAR2(50),
+ last_name VARCHAR2(50) NOT NULL,
+ date_of_birth DATE,
+ year_of_birth NUMBER(7),
+ distinguished NUMBER(1)
+);
+CREATE TABLE book (
+ id NUMBER(7) NOT NULL PRIMARY KEY,
+ author_id NUMBER(7) NOT NULL,
+ title VARCHAR2(400) NOT NULL,
+ published_in NUMBER(7) NOT NULL,
+ language_id NUMBER(7) NOT NULL,
+ CONSTRAINT fk_book_author FOREIGN KEY (author_id) REFERENCES author(id),
+ CONSTRAINT fk_book_language FOREIGN KEY (language_id) REFERENCES language(id)
+);
+CREATE TABLE book_store (
+ name VARCHAR2(400) NOT NULL UNIQUE
+);
+CREATE TABLE book_to_book_store (
+ name VARCHAR2(400) NOT NULL,
+ book_id INTEGER NOT NULL,
+ stock INTEGER,
+ PRIMARY KEY(name, book_id),
+ CONSTRAINT fk_b2bs_book_store FOREIGN KEY (name) REFERENCES book_store (name) ON DELETE CASCADE,
+ CONSTRAINT fk_b2bs_book FOREIGN KEY (book_id) REFERENCES book (id) ON DELETE CASCADE
);
\ No newline at end of file
[08/28] hive git commit: HIVE-11629: CBO: Calcite Operator To Hive
Operator (Calcite Return Path) : fix the filter expressions for full outer
join and right outer join (Pengcheng Xiong,
reviewed by Jesus Camacho Rodriguez)
Posted by se...@apache.org.
HIVE-11629: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : fix the filter expressions for full outer join and right outer join (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5aa16ecb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5aa16ecb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5aa16ecb
Branch: refs/heads/llap
Commit: 5aa16ecb3aadbeb1770ae08f1f1d476503cbbb6e
Parents: dcf21cd
Author: Pengcheng Xiong <px...@apache.org>
Authored: Sat Aug 29 23:48:09 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Sat Aug 29 23:48:09 2015 -0700
----------------------------------------------------------------------
.../calcite/translator/HiveOpConverter.java | 22 +-
.../clientpositive/cbo_rp_outer_join_ppr.q | 40 +
.../cbo_rp_outer_join_ppr.q.java1.7.out | 855 +++++++++++++++++++
3 files changed, 914 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5aa16ecb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
index 1931880..9391952 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
@@ -995,7 +995,7 @@ public class HiveOpConverter {
* to be expressed that way.
*/
private static int updateExprNode(ExprNodeDesc expr, final Map<String, Byte> reversedExprs,
- final Map<String, ExprNodeDesc> colExprMap) {
+ final Map<String, ExprNodeDesc> colExprMap) throws SemanticException {
int inputPos = -1;
if (expr instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) expr;
@@ -1003,10 +1003,26 @@ public class HiveOpConverter {
for (ExprNodeDesc functionChild : func.getChildren()) {
if (functionChild instanceof ExprNodeColumnDesc) {
String colRef = functionChild.getExprString();
- inputPos = reversedExprs.get(colRef);
+ int pos = reversedExprs.get(colRef);
+ if (pos != -1) {
+ if (inputPos == -1) {
+ inputPos = pos;
+ } else if (inputPos != pos) {
+ throw new SemanticException(
+ "UpdateExprNode is expecting only one position for join operator convert. But there are more than one.");
+ }
+ }
newChildren.add(colExprMap.get(colRef));
} else {
- inputPos = updateExprNode(functionChild, reversedExprs, colExprMap);
+ int pos = updateExprNode(functionChild, reversedExprs, colExprMap);
+ if (pos != -1) {
+ if (inputPos == -1) {
+ inputPos = pos;
+ } else if (inputPos != pos) {
+ throw new SemanticException(
+ "UpdateExprNode is expecting only one position for join operator convert. But there are more than one.");
+ }
+ }
newChildren.add(functionChild);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/5aa16ecb/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q b/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q
new file mode 100644
index 0000000..8daf718
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q
@@ -0,0 +1,40 @@
+set hive.cbo.enable=true;
+set hive.cbo.returnpath.hiveop=true;
+set hive.optimize.ppd=true;
+
+-- SORT_QUERY_RESULTS
+-- JAVA_VERSION_SPECIFIC_OUTPUT
+
+EXPLAIN EXTENDED
+ FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25;
+
+ FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25;
+
+EXPLAIN EXTENDED
+ FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08';
+
+ FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08';
http://git-wip-us.apache.org/repos/asf/hive/blob/5aa16ecb/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
new file mode 100644
index 0000000..c19b47a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
@@ -0,0 +1,855 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+-- JAVA_VERSION_SPECIFIC_OUTPUT
+
+EXPLAIN EXTENDED
+ FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
+PREHOOK: type: QUERY
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+-- JAVA_VERSION_SPECIFIC_OUTPUT
+
+EXPLAIN EXTENDED
+ FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_FULLOUTERJOIN
+ TOK_TABREF
+ TOK_TABNAME
+ src
+ a
+ TOK_TABREF
+ TOK_TABNAME
+ srcpart
+ b
+ AND
+ =
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ =
+ .
+ TOK_TABLE_OR_COL
+ b
+ ds
+ '2008-04-08'
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ a
+ value
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ b
+ value
+ TOK_WHERE
+ AND
+ AND
+ AND
+ >
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ 10
+ <
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ 20
+ >
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ 15
+ <
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ 25
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
+ TableScan
+ alias: b
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), value (type: string), ds (type: string)
+ outputColumnNames: key, value, ds
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string), ds (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Truncated Path -> Alias:
+ /src [a]
+ /srcpart/ds=2008-04-08/hr=11 [b]
+ /srcpart/ds=2008-04-08/hr=12 [b]
+ /srcpart/ds=2008-04-09/hr=11 [b]
+ /srcpart/ds=2008-04-09/hr=12 [b]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Outer Join 0 to 1
+ filter mappings:
+ 1 [0, 1]
+ filter predicates:
+ 0
+ 1 {(VALUE.ds = '2008-04-08')}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: key, value, key0, value0
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key0) > 15.0)) and (UDFToDouble(key0) < 25.0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string)
+ outputColumnNames: key, value, key0, value0
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns key,value,key0,value0
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+17 val_17 17 val_17
+17 val_17 17 val_17
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+19 val_19 19 val_19
+19 val_19 19 val_19
+PREHOOK: query: EXPLAIN EXTENDED
+ FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+ FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_FULLOUTERJOIN
+ TOK_TABREF
+ TOK_TABNAME
+ src
+ a
+ TOK_TABREF
+ TOK_TABNAME
+ srcpart
+ b
+ =
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ a
+ value
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ b
+ value
+ TOK_WHERE
+ AND
+ AND
+ AND
+ AND
+ >
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ 10
+ <
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ 20
+ >
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ 15
+ <
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ 25
+ =
+ .
+ TOK_TABLE_OR_COL
+ b
+ ds
+ '2008-04-08'
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
+ TableScan
+ alias: b
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Truncated Path -> Alias:
+ /src [a]
+ /srcpart/ds=2008-04-08/hr=11 [b]
+ /srcpart/ds=2008-04-08/hr=12 [b]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: key, value, key0, value0
+ Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
+ Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string)
+ outputColumnNames: key, value, key0, value0
+ Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns key,value,key0,value0
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+ src a
+ FULL OUTER JOIN
+ srcpart b
+ ON (a.key = b.key)
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+#### A masked pattern was here ####
+17 val_17 17 val_17
+17 val_17 17 val_17
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+19 val_19 19 val_19
+19 val_19 19 val_19
[03/28] hive git commit: HIVE-11357 ACID enable predicate pushdown
for insert-only delta file 2 (Eugene Koifman, reviewed by Alan Gates)
Posted by se...@apache.org.
HIVE-11357 ACID enable predicate pushdown for insert-only delta file 2 (Eugene Koifman, reviewed by Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ed4517cf
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ed4517cf
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ed4517cf
Branch: refs/heads/llap
Commit: ed4517cfb14b90d03f3cf33d653827bec90bcb98
Parents: 8e712da
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Fri Aug 28 12:19:32 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Fri Aug 28 12:19:32 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 18 ++++-
.../apache/hadoop/hive/ql/TestTxnCommands2.java | 85 ++++++++++++++++----
2 files changed, 88 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ed4517cf/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index fd6d2ad..8c138b9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -127,7 +127,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
/**
* When picking the hosts for a split that crosses block boundaries,
- * any drop any host that has fewer than MIN_INCLUDED_LOCATION of the
+ * drop any host that has fewer than MIN_INCLUDED_LOCATION of the
* number of bytes available on the host with the most.
* If host1 has 10MB of the split, host2 has 20MB, and host3 has 18MB the
* split will contain host2 (100% of host2) and host3 (90% of host2). Host1
@@ -1283,6 +1283,22 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
} else {
bucket = (int) split.getStart();
reader = null;
+ if(deltas != null && deltas.length > 0) {
+ Path bucketPath = AcidUtils.createBucketFile(deltas[0], bucket);
+ OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf);
+ FileSystem fs = readerOptions.getFilesystem();
+ if(fs == null) {
+ fs = path.getFileSystem(options.getConfiguration());
+ }
+ if(fs.exists(bucketPath)) {
+ /* w/o schema evolution (which ACID doesn't support yet) all delta
+ files have the same schema, so choosing the 1st one*/
+ final List<OrcProto.Type> types =
+ OrcFile.createReader(bucketPath, readerOptions).getTypes();
+ readOptions.include(genIncludedColumns(types, conf, split.isOriginal()));
+ setSearchArgument(readOptions, types, conf, split.isOriginal());
+ }
+ }
}
String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY,
Long.MAX_VALUE + ":");
http://git-wip-us.apache.org/repos/asf/hive/blob/ed4517cf/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index 58c2fca..5aa2500 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -22,6 +22,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.txn.compactor.Worker;
@@ -51,7 +52,7 @@ public class TestTxnCommands2 {
).getPath().replaceAll("\\\\", "/");
private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse";
//bucket count for test tables; set it to 1 for easier debugging
- private static int BUCKET_COUNT = 1;
+ private static int BUCKET_COUNT = 2;
@Rule
public TestName testName = new TestName();
private HiveConf hiveConf;
@@ -107,7 +108,6 @@ public class TestTxnCommands2 {
public void tearDown() throws Exception {
try {
if (d != null) {
- // runStatementOnDriver("set autocommit true");
dropTables();
d.destroy();
d.close();
@@ -126,13 +126,51 @@ public class TestTxnCommands2 {
public void testOrcNoPPD() throws Exception {
testOrcPPD(false);
}
- private void testOrcPPD(boolean enablePPD) throws Exception {
+
+ /**
+ * this is run 2 times: 1 with PPD on, 1 with off
+ * Also, the queries are such that if we were to push predicate down to an update/delete delta,
+ * the test would produce wrong results
+ * @param enablePPD
+ * @throws Exception
+ */
+ private void testOrcPPD(boolean enablePPD) throws Exception {
boolean originalPpd = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER, enablePPD);//enables ORC PPD
- int[][] tableData = {{1,2},{3,4}};
- runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData));
- List<String> rs2 = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " where a > 1 order by a,b");
- runStatementOnDriver("alter table "+ Table.ACIDTBL + " compact 'MAJOR'");
+ //create delta_0001_0001_0000 (should push predicate here)
+ runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(new int[][]{{1, 2}, {3, 4}}));
+ List<String> explain;
+ String query = "update " + Table.ACIDTBL + " set b = 5 where a = 3";
+ if (enablePPD) {
+ explain = runStatementOnDriver("explain " + query);
+ /*
+ here is a portion of the above "explain". The "filterExpr:" in the TableScan is the pushed predicate
+ w/o PPD, the line is simply not there, otherwise the plan is the same
+ Map Operator Tree:,
+ TableScan,
+ alias: acidtbl,
+ filterExpr: (a = 3) (type: boolean),
+ Filter Operator,
+ predicate: (a = 3) (type: boolean),
+ Select Operator,
+ ...
+ */
+ assertPredicateIsPushed("filterExpr: (a = 3)", explain);
+ }
+ //create delta_0002_0002_0000 (can't push predicate)
+ runStatementOnDriver(query);
+ query = "select a,b from " + Table.ACIDTBL + " where b = 4 order by a,b";
+ if (enablePPD) {
+ /*at this point we have 2 delta files, 1 for insert 1 for update
+ * we should push predicate into 1st one but not 2nd. If the following 'select' were to
+ * push into the 'update' delta, we'd filter out {3,5} before doing merge and thus
+ * produce {3,4} as the value for 2nd row. The right result is 0-rows.*/
+ explain = runStatementOnDriver("explain " + query);
+ assertPredicateIsPushed("filterExpr: (b = 4)", explain);
+ }
+ List<String> rs0 = runStatementOnDriver(query);
+ Assert.assertEquals("Read failed", 0, rs0.size());
+ runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
Worker t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(hiveConf);
@@ -142,18 +180,37 @@ public class TestTxnCommands2 {
t.init(stop, looped);
t.run();
//now we have base_0001 file
- int[][] tableData2 = {{1,7},{5,6},{7,8},{9,10}};
+ int[][] tableData2 = {{1, 7}, {5, 6}, {7, 8}, {9, 10}};
runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData2));
- //now we have delta_0002_0002_0000 with inserts only (ok to push predicate)
+ //now we have delta_0003_0003_0000 with inserts only (ok to push predicate)
+ if (enablePPD) {
+ explain = runStatementOnDriver("explain delete from " + Table.ACIDTBL + " where a=7 and b=8");
+ assertPredicateIsPushed("filterExpr: ((a = 7) and (b = 8))", explain);
+ }
runStatementOnDriver("delete from " + Table.ACIDTBL + " where a=7 and b=8");
- //now we have delta_0003_0003_0000 with delete events (can't push predicate)
- runStatementOnDriver("update " + Table.ACIDTBL + " set b = 11 where a = 9");
- //and another delta with update op
- List<String> rs1 = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " where a > 1 order by a,b");
- int [][] resultData = {{3,4},{5,6},{9,11}};
+ //now we have delta_0004_0004_0000 with delete events
+
+ /*(can't push predicate to 'delete' delta)
+ * if we were to push to 'delete' delta, we'd filter out all rows since the 'row' is always NULL for
+ * delete events and we'd produce data as if the delete never happened*/
+ query = "select a,b from " + Table.ACIDTBL + " where a > 1 order by a,b";
+ if(enablePPD) {
+ explain = runStatementOnDriver("explain " + query);
+ assertPredicateIsPushed("filterExpr: (a > 1)", explain);
+ }
+ List<String> rs1 = runStatementOnDriver(query);
+ int [][] resultData = new int[][] {{3, 5}, {5, 6}, {9, 10}};
Assert.assertEquals("Update failed", stringifyValues(resultData), rs1);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER, originalPpd);
}
+ private static void assertPredicateIsPushed(String ppd, List<String> queryPlan) {
+ for(String line : queryPlan) {
+ if(line != null && line.contains(ppd)) {
+ return;
+ }
+ }
+ Assert.assertFalse("PPD '" + ppd + "' wasn't pushed", true);
+ }
@Ignore("alter table")
@Test
public void testAlterTable() throws Exception {
[05/28] hive git commit: HIVE-11595 : refactor ORC footer reading to
make it usable from outside (Sergey Shelukhin,
reviewed by Prasanth Jayachandran)
Posted by se...@apache.org.
HIVE-11595 : refactor ORC footer reading to make it usable from outside (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/22fa9216
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/22fa9216
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/22fa9216
Branch: refs/heads/llap
Commit: 22fa9216d4e32d7681d3c1be8cbedc8c7999e56d
Parents: 97bf32a
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Aug 28 18:23:05 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Aug 28 18:23:05 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/io/orc/Reader.java | 6 +
.../hadoop/hive/ql/io/orc/ReaderImpl.java | 281 +++++++++++++------
2 files changed, 204 insertions(+), 83 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/22fa9216/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
index 7bddefc..187924d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -358,4 +359,9 @@ public interface Reader {
String[] neededColumns) throws IOException;
MetadataReader metadata() throws IOException;
+
+ /** Gets serialized file metadata read from disk for the purposes of caching, etc. */
+ ByteBuffer getSerializedFileFooter();
+
+ Footer getFooter();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/22fa9216/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index c990d85..ab539c4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -35,6 +35,8 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.DiskRange;
import org.apache.hadoop.hive.ql.io.FileFormatException;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer;
import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
import org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem;
import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
@@ -74,6 +76,9 @@ public class ReaderImpl implements Reader {
// will help avoid cpu cycles spend in deserializing at cost of increased
// memory footprint.
private final ByteBuffer footerByteBuffer;
+ // Same for metastore cache - maintains the same background buffer, but includes postscript.
+ // This will only be set if the file footer/metadata was read from disk.
+ private final ByteBuffer footerMetaAndPsBuffer;
static class StripeInformationImpl
implements StripeInformation {
@@ -166,11 +171,7 @@ public class ReaderImpl implements Reader {
@Override
public List<StripeInformation> getStripes() {
- List<StripeInformation> result = new ArrayList<StripeInformation>();
- for(OrcProto.StripeInformation info: footer.getStripesList()) {
- result.add(new StripeInformationImpl(info));
- }
- return result;
+ return convertProtoStripesToStripes(footer.getStripesList());
}
@Override
@@ -274,7 +275,7 @@ public class ReaderImpl implements Reader {
* Check to see if this ORC file is from a future version and if so,
* warn the user that we may not be able to read all of the column encodings.
* @param log the logger to write any error message to
- * @param path the filename for error messages
+ * @param path the data source path for error messages
* @param version the version of hive that wrote the file.
*/
static void checkOrcVersion(Log log, Path path, List<Integer> version) {
@@ -287,8 +288,7 @@ public class ReaderImpl implements Reader {
if (major > OrcFile.Version.CURRENT.getMajor() ||
(major == OrcFile.Version.CURRENT.getMajor() &&
minor > OrcFile.Version.CURRENT.getMinor())) {
- log.warn("ORC file " + path +
- " was written by a future Hive version " +
+ log.warn(path + " was written by a future Hive version " +
versionString(version) +
". This file may not be readable by this version of Hive.");
}
@@ -313,9 +313,11 @@ public class ReaderImpl implements Reader {
FileMetaInfo footerMetaData;
if (options.getFileMetaInfo() != null) {
footerMetaData = options.getFileMetaInfo();
+ this.footerMetaAndPsBuffer = null;
} else {
footerMetaData = extractMetaInfoFromFooter(fs, path,
options.getMaxLength());
+ this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
}
MetaInfoObjExtractor rInfo =
new MetaInfoObjExtractor(footerMetaData.compressionType,
@@ -349,6 +351,111 @@ public class ReaderImpl implements Reader {
return OrcFile.WriterVersion.ORIGINAL;
}
+ /** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */
+ public static FooterInfo extractMetaInfoFromFooter(
+ ByteBuffer bb, Path srcPath) throws IOException {
+ // Read the PostScript. Be very careful as some parts of this historically use bb position
+ // and some use absolute offsets that have to take position into account.
+ int baseOffset = bb.position();
+ int lastByteAbsPos = baseOffset + bb.remaining() - 1;
+ int psLen = bb.get(lastByteAbsPos) & 0xff;
+ int psAbsPos = lastByteAbsPos - psLen;
+ OrcProto.PostScript ps = extractPostScript(bb, srcPath, psLen, psAbsPos);
+ assert baseOffset == bb.position();
+
+ // Extract PS information.
+ int footerSize = (int)ps.getFooterLength(), metadataSize = (int)ps.getMetadataLength(),
+ footerAbsPos = psAbsPos - footerSize, metadataAbsPos = footerAbsPos - metadataSize;
+ String compressionType = ps.getCompression().toString();
+ CompressionCodec codec = WriterImpl.createCodec(CompressionKind.valueOf(compressionType));
+ int bufferSize = (int)ps.getCompressionBlockSize();
+ bb.position(metadataAbsPos);
+ bb.mark();
+
+ // Extract metadata and footer.
+ Metadata metadata = new Metadata(extractMetadata(
+ bb, metadataAbsPos, metadataSize, codec, bufferSize));
+ OrcProto.Footer footer = extractFooter(bb, footerAbsPos, footerSize, codec, bufferSize);
+ bb.position(metadataAbsPos);
+ bb.limit(psAbsPos);
+ // TODO: do we need footer buffer here? FileInfo/FileMetaInfo is a mess...
+ FileMetaInfo fmi = new FileMetaInfo(
+ compressionType, bufferSize, metadataSize, bb, extractWriterVersion(ps));
+ return new FooterInfo(metadata, footer, fmi);
+ }
+
+ private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos,
+ int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
+ bb.position(footerAbsPos);
+ bb.limit(footerAbsPos + footerSize);
+ InputStream instream = InStream.create("footer", Lists.<DiskRange>newArrayList(
+ new BufferChunk(bb, 0)), footerSize, codec, bufferSize);
+ return OrcProto.Footer.parseFrom(instream);
+ }
+
+ private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
+ int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
+ bb.position(metadataAbsPos);
+ bb.limit(metadataAbsPos + metadataSize);
+ InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
+ new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
+ CodedInputStream in = CodedInputStream.newInstance(instream);
+ int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
+ OrcProto.Metadata meta = null;
+ do {
+ try {
+ in.setSizeLimit(msgLimit);
+ meta = OrcProto.Metadata.parseFrom(in);
+ } catch (InvalidProtocolBufferException e) {
+ if (e.getMessage().contains("Protocol message was too large")) {
+ LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
+ " size of the coded input stream." );
+
+ msgLimit = msgLimit << 1;
+ if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
+ LOG.error("Metadata section exceeds max protobuf message size of " +
+ PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
+ throw e;
+ }
+
+ // we must have failed in the middle of reading instream and instream doesn't support
+ // resetting the stream
+ instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
+ new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
+ in = CodedInputStream.newInstance(instream);
+ } else {
+ throw e;
+ }
+ }
+ } while (meta == null);
+ return meta;
+ }
+
+ private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,
+ int psLen, int psAbsOffset) throws IOException {
+ // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here.
+ assert bb.hasArray();
+ CodedInputStream in = CodedInputStream.newInstance(
+ bb.array(), bb.arrayOffset() + psAbsOffset, psLen);
+ OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
+ checkOrcVersion(LOG, path, ps.getVersionList());
+
+ // Check compression codec.
+ switch (ps.getCompression()) {
+ case NONE:
+ break;
+ case ZLIB:
+ break;
+ case SNAPPY:
+ break;
+ case LZO:
+ break;
+ default:
+ throw new IllegalArgumentException("Unknown compression");
+ }
+ return ps;
+ }
+
private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
Path path,
long maxFileLength
@@ -367,44 +474,24 @@ public class ReaderImpl implements Reader {
int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
file.seek(size - readSize);
ByteBuffer buffer = ByteBuffer.allocate(readSize);
- file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(),
- buffer.remaining());
+ assert buffer.position() == 0;
+ file.readFully(buffer.array(), buffer.arrayOffset(), readSize);
+ buffer.position(0);
//read the PostScript
//get length of PostScript
int psLen = buffer.get(readSize - 1) & 0xff;
ensureOrcFooter(file, path, psLen, buffer);
int psOffset = readSize - 1 - psLen;
- CodedInputStream in = CodedInputStream.newInstance(buffer.array(),
- buffer.arrayOffset() + psOffset, psLen);
- OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
-
- checkOrcVersion(LOG, path, ps.getVersionList());
+ OrcProto.PostScript ps = extractPostScript(buffer, path, psLen, psOffset);
int footerSize = (int) ps.getFooterLength();
int metadataSize = (int) ps.getMetadataLength();
- OrcFile.WriterVersion writerVersion;
- if (ps.hasWriterVersion()) {
- writerVersion = getWriterVersion(ps.getWriterVersion());
- } else {
- writerVersion = OrcFile.WriterVersion.ORIGINAL;
- }
+ OrcFile.WriterVersion writerVersion = extractWriterVersion(ps);
- //check compression codec
- switch (ps.getCompression()) {
- case NONE:
- break;
- case ZLIB:
- break;
- case SNAPPY:
- break;
- case LZO:
- break;
- default:
- throw new IllegalArgumentException("Unknown compression");
- }
//check if extra bytes need to be read
+ ByteBuffer fullFooterBuffer = null;
int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
if (extra > 0) {
//more bytes need to be read, seek back to the right place and read extra bytes
@@ -417,10 +504,12 @@ public class ReaderImpl implements Reader {
extraBuf.put(buffer);
buffer = extraBuf;
buffer.position(0);
+ fullFooterBuffer = buffer.slice();
buffer.limit(footerSize + metadataSize);
} else {
//footer is already in the bytes in buffer, just adjust position, length
buffer.position(psOffset - footerSize - metadataSize);
+ fullFooterBuffer = buffer.slice();
buffer.limit(psOffset);
}
@@ -435,11 +524,24 @@ public class ReaderImpl implements Reader {
(int) ps.getMetadataLength(),
buffer,
ps.getVersionList(),
- writerVersion
+ writerVersion,
+ fullFooterBuffer
);
}
+ private static OrcFile.WriterVersion extractWriterVersion(OrcProto.PostScript ps) {
+ return (ps.hasWriterVersion()
+ ? getWriterVersion(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
+ }
+ private static List<StripeInformation> convertProtoStripesToStripes(
+ List<OrcProto.StripeInformation> stripes) {
+ List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
+ for (OrcProto.StripeInformation info : stripes) {
+ result.add(new StripeInformationImpl(info));
+ }
+ return result;
+ }
/**
* MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl
@@ -467,46 +569,10 @@ public class ReaderImpl implements Reader {
int position = footerBuffer.position();
int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
- footerBuffer.limit(position + metadataSize);
-
- InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
- new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
- CodedInputStream in = CodedInputStream.newInstance(instream);
- int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
- OrcProto.Metadata meta = null;
- do {
- try {
- in.setSizeLimit(msgLimit);
- meta = OrcProto.Metadata.parseFrom(in);
- } catch (InvalidProtocolBufferException e) {
- if (e.getMessage().contains("Protocol message was too large")) {
- LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
- " size of the coded input stream." );
-
- msgLimit = msgLimit << 1;
- if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
- LOG.error("Metadata section exceeds max protobuf message size of " +
- PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
- throw e;
- }
-
- // we must have failed in the middle of reading instream and instream doesn't support
- // resetting the stream
- instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
- new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
- in = CodedInputStream.newInstance(instream);
- } else {
- throw e;
- }
- }
- } while (meta == null);
- this.metadata = meta;
- footerBuffer.position(position + metadataSize);
- footerBuffer.limit(position + metadataSize + footerBufferSize);
- instream = InStream.create("footer", Lists.<DiskRange>newArrayList(
- new BufferChunk(footerBuffer, 0)), footerBufferSize, codec, bufferSize);
- this.footer = OrcProto.Footer.parseFrom(instream);
+ this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
+ this.footer = extractFooter(
+ footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);
footerBuffer.position(position);
this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList());
@@ -518,7 +584,8 @@ public class ReaderImpl implements Reader {
* that is useful for Reader implementation
*
*/
- static class FileMetaInfo{
+ static class FileMetaInfo {
+ private ByteBuffer footerMetaAndPsBuffer;
final String compressionType;
final int bufferSize;
final int metadataSize;
@@ -526,30 +593,68 @@ public class ReaderImpl implements Reader {
final List<Integer> versionList;
final OrcFile.WriterVersion writerVersion;
+ /** Ctor used when reading splits - no version list or full footer buffer. */
FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
this(compressionType, bufferSize, metadataSize, footerBuffer, null,
- writerVersion);
+ writerVersion, null);
}
- FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer, List<Integer> versionList,
- OrcFile.WriterVersion writerVersion){
+ /** Ctor used when creating file info during init and when getting a new one. */
+ public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
+ ByteBuffer footerBuffer, List<Integer> versionList, WriterVersion writerVersion,
+ ByteBuffer fullFooterBuffer) {
this.compressionType = compressionType;
this.bufferSize = bufferSize;
this.metadataSize = metadataSize;
this.footerBuffer = footerBuffer;
this.versionList = versionList;
this.writerVersion = writerVersion;
+ this.footerMetaAndPsBuffer = fullFooterBuffer;
}
}
- public FileMetaInfo getFileMetaInfo(){
+ public FileMetaInfo getFileMetaInfo() {
return new FileMetaInfo(compressionKind.toString(), bufferSize,
- metadataSize, footerByteBuffer, versionList, writerVersion);
+ metadataSize, footerByteBuffer, versionList, writerVersion, footerMetaAndPsBuffer);
}
+ /** Same as FileMetaInfo, but with extra fields. FileMetaInfo is serialized for splits
+ * and so we don't just add fields to it, it's already messy and confusing. */
+ public static final class FooterInfo {
+ private final OrcProto.Footer footer;
+ private final Metadata metadata;
+ private final List<StripeInformation> stripes;
+ private final FileMetaInfo fileMetaInfo;
+ private FooterInfo(Metadata metadata, OrcProto.Footer footer, FileMetaInfo fileMetaInfo) {
+ this.metadata = metadata;
+ this.footer = footer;
+ this.fileMetaInfo = fileMetaInfo;
+ this.stripes = convertProtoStripesToStripes(footer.getStripesList());
+ }
+
+ public OrcProto.Footer getFooter() {
+ return footer;
+ }
+
+ public Metadata getMetadata() {
+ return metadata;
+ }
+
+ public FileMetaInfo getFileMetaInfo() {
+ return fileMetaInfo;
+ }
+
+ public List<StripeInformation> getStripes() {
+ return stripes;
+ }
+ }
+
+ @Override
+ public ByteBuffer getSerializedFileFooter() {
+ return footerMetaAndPsBuffer;
+ }
@Override
public RecordReader rows() throws IOException {
@@ -609,14 +714,19 @@ public class ReaderImpl implements Reader {
@Override
public long getRawDataSizeFromColIndices(List<Integer> colIndices) {
+ return getRawDataSizeFromColIndices(colIndices, footer);
+ }
+
+ public static long getRawDataSizeFromColIndices(
+ List<Integer> colIndices, OrcProto.Footer footer) {
long result = 0;
for (int colIdx : colIndices) {
- result += getRawDataSizeOfColumn(colIdx);
+ result += getRawDataSizeOfColumn(colIdx, footer);
}
return result;
}
- private long getRawDataSizeOfColumn(int colIdx) {
+ private static long getRawDataSizeOfColumn(int colIdx, OrcProto.Footer footer) {
OrcProto.ColumnStatistics colStat = footer.getStatistics(colIdx);
long numVals = colStat.getNumberOfValues();
Type type = footer.getTypes(colIdx);
@@ -738,4 +848,9 @@ public class ReaderImpl implements Reader {
public MetadataReader metadata() throws IOException {
return new MetadataReader(fileSystem, path, codec, bufferSize, footer.getTypesCount());
}
+
+ @Override
+ public Footer getFooter() {
+ return footer;
+ }
}
[07/28] hive git commit: "Alter index rebuild" statements submitted
through HiveServer2 fail when Sentry is enabled (Aihua Xu,
reviewed by Chao Sun and Ashutosh Chauhan)
Posted by se...@apache.org.
"Alter index rebuild" statements submitted through HiveServer2 fail when Sentry is enabled (Aihua Xu, reviewed by Chao Sun and Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dcf21cd6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dcf21cd6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dcf21cd6
Branch: refs/heads/llap
Commit: dcf21cd6fa98fb5db01ef661bb3b9f94d9ca2d15
Parents: af91308
Author: Aihua Xu <ai...@gmail.com>
Authored: Sat Aug 29 12:57:52 2015 -0700
Committer: Chao Sun <su...@apache.org>
Committed: Sat Aug 29 12:57:52 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/optimizer/IndexUtils.java | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/dcf21cd6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
index 92cae67..0b30258 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.index.IndexWhereProcessor;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.session.SessionState;
/**
* Utility class for index support.
@@ -213,13 +214,17 @@ public final class IndexUtils {
return hive.getIndexes(table.getTTable().getDbName(), table.getTTable().getTableName(), max);
}
- public static Task<?> createRootTask(HiveConf builderConf, Set<ReadEntity> inputs,
- Set<WriteEntity> outputs, StringBuilder command,
+ public static Task<?> createRootTask(
+ HiveConf builderConf,
+ Set<ReadEntity> inputs,
+ Set<WriteEntity> outputs,
+ StringBuilder command,
LinkedHashMap<String, String> partSpec,
- String indexTableName, String dbName){
+ String indexTableName,
+ String dbName){
// Don't try to index optimize the query to build the index
HiveConf.setBoolVar(builderConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER, false);
- Driver driver = new Driver(builderConf);
+ Driver driver = new Driver(builderConf, SessionState.get().getUserName());
driver.compile(command.toString(), false);
Task<?> rootTask = driver.getPlan().getRootTasks().get(0);
[22/28] hive git commit: HIVE-11668 : make sure directsql calls
pre-query init when needed (Sergey Shelukhin, reviewed by Sushanth Sowmyan)
Posted by se...@apache.org.
HIVE-11668 : make sure directsql calls pre-query init when needed (Sergey Shelukhin, reviewed by Sushanth Sowmyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5a1957fc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5a1957fc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5a1957fc
Branch: refs/heads/llap
Commit: 5a1957fc61da4d5e32c46e8e38bdf596eaeef8a3
Parents: 308ae90
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 2 11:05:44 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 2 12:27:11 2015 -0700
----------------------------------------------------------------------
.../hive/metastore/MetaStoreDirectSql.java | 29 ++++++++++++++++----
1 file changed, 23 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5a1957fc/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 522fcc2..1f89b7c 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -178,7 +178,13 @@ class MetaStoreDirectSql {
private boolean ensureDbInit() {
Transaction tx = pm.currentTransaction();
+ boolean doCommit = false;
+ if (!tx.isActive()) {
+ tx.begin();
+ doCommit = true;
+ }
Query dbQuery = null, tblColumnQuery = null, partColumnQuery = null;
+
try {
// Force the underlying db to initialize.
dbQuery = pm.newQuery(MDatabase.class, "name == ''");
@@ -192,10 +198,14 @@ class MetaStoreDirectSql {
return true;
} catch (Exception ex) {
+ doCommit = false;
LOG.warn("Database initialization failed; direct SQL is disabled", ex);
tx.rollback();
return false;
} finally {
+ if (doCommit) {
+ tx.commit();
+ }
if (dbQuery != null) {
dbQuery.closeAll();
}
@@ -210,23 +220,28 @@ class MetaStoreDirectSql {
private boolean runTestQuery() {
Transaction tx = pm.currentTransaction();
+ boolean doCommit = false;
if (!tx.isActive()) {
tx.begin();
+ doCommit = true;
}
Query query = null;
// Run a self-test query. If it doesn't work, we will self-disable. What a PITA...
String selfTestQuery = "select \"DB_ID\" from \"DBS\"";
try {
+ doDbSpecificInitializationsBeforeQuery();
query = pm.newQuery("javax.jdo.query.SQL", selfTestQuery);
query.execute();
- tx.commit();
return true;
- } catch (Exception ex) {
- LOG.warn("Self-test query [" + selfTestQuery + "] failed; direct SQL is disabled", ex);
+ } catch (Throwable t) {
+ doCommit = false;
+ LOG.warn("Self-test query [" + selfTestQuery + "] failed; direct SQL is disabled", t);
tx.rollback();
return false;
- }
- finally {
+ } finally {
+ if (doCommit) {
+ tx.commit();
+ }
if (query != null) {
query.closeAll();
}
@@ -524,7 +539,6 @@ class MetaStoreDirectSql {
+ "where \"PART_ID\" in (" + partIds + ") order by \"PART_NAME\" asc";
long start = doTrace ? System.nanoTime() : 0;
Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
- @SuppressWarnings("unchecked")
List<Object[]> sqlResult = executeWithArray(query, null, queryText);
long queryTime = doTrace ? System.nanoTime() : 0;
Deadline.checkTimeout();
@@ -1095,6 +1109,7 @@ class MetaStoreDirectSql {
if (colNames.isEmpty()) {
return null;
}
+ doDbSpecificInitializationsBeforeQuery();
boolean doTrace = LOG.isDebugEnabled();
long start = doTrace ? System.nanoTime() : 0;
String queryText = "select " + STATS_COLLIST + " from \"TAB_COL_STATS\" "
@@ -1214,6 +1229,7 @@ class MetaStoreDirectSql {
private List<ColumnStatisticsObj> columnStatisticsObjForPartitions(String dbName,
String tableName, List<String> partNames, List<String> colNames, long partsFound,
boolean useDensityFunctionForNDVEstimation) throws MetaException {
+ doDbSpecificInitializationsBeforeQuery();
// TODO: all the extrapolation logic should be moved out of this class,
// only mechanical data retrieval should remain here.
String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", "
@@ -1530,6 +1546,7 @@ class MetaStoreDirectSql {
return Lists.newArrayList();
}
boolean doTrace = LOG.isDebugEnabled();
+ doDbSpecificInitializationsBeforeQuery();
long start = doTrace ? System.nanoTime() : 0;
String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+ " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
[09/28] hive git commit: HIVE-11670 : Strip out password information
from TezSessionState configuration (Hari Subramaniyan,
reviewed by Vikram Dixit K)
Posted by se...@apache.org.
HIVE-11670 : Strip out password information from TezSessionState configuration (Hari Subramaniyan, reviewed by Vikram Dixit K)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/78e70159
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/78e70159
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/78e70159
Branch: refs/heads/llap
Commit: 78e701590f1cb8b8b07a4871280a31f7c3d35034
Parents: 5aa16ec
Author: Hari Subramaniyan <ha...@apache.org>
Authored: Mon Aug 31 11:55:07 2015 -0700
Committer: Hari Subramaniyan <ha...@apache.org>
Committed: Mon Aug 31 11:55:07 2015 -0700
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java | 2 ++
1 file changed, 2 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/78e70159/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
index 8555c6a..568ebbe 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.shims.Utils;
@@ -165,6 +166,7 @@ public class TezSessionState {
// generate basic tez config
TezConfiguration tezConfig = new TezConfiguration(conf);
tezConfig.set(TezConfiguration.TEZ_AM_STAGING_DIR, tezScratchDir.toUri().toString());
+ Utilities.stripHivePasswordDetails(tezConfig);
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED)) {
int n = HiveConf.getIntVar(conf, ConfVars.HIVE_PREWARM_NUM_CONTAINERS);