You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/09/25 03:39:39 UTC
[01/50] [abbrv] hive git commit: HIVE-11583 : When PTF is used over a
large partitions result could be corrupted (Illya Yalovyy via Ashutosh
Chauhan)
Repository: hive
Updated Branches:
refs/heads/beeline-cli 046c5ebdb -> 6b3e82d39
HIVE-11583 : When PTF is used over a large partitions result could be corrupted (Illya Yalovyy via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8d524e06
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8d524e06
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8d524e06
Branch: refs/heads/beeline-cli
Commit: 8d524e062e6a8ad8c592e6067cea254c054797cd
Parents: 7be02ae
Author: Illya Yalovyy <ya...@amazon.com>
Authored: Mon Sep 14 10:18:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Sep 16 18:15:08 2015 -0700
----------------------------------------------------------------------
.../ql/exec/persistence/PTFRowContainer.java | 14 +++++----
.../hive/ql/exec/persistence/RowContainer.java | 12 +++++---
.../exec/persistence/TestPTFRowContainer.java | 31 ++++++++++++++------
3 files changed, 38 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8d524e06/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/PTFRowContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/PTFRowContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/PTFRowContainer.java
index d2bfea6..61cc6e8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/PTFRowContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/PTFRowContainer.java
@@ -81,8 +81,8 @@ import org.apache.hadoop.util.Progressable;
*/
public class PTFRowContainer<Row extends List<Object>> extends RowContainer<Row> {
- ArrayList<BlockInfo> blockInfos;
- int currentReadBlockStartRow;
+ private ArrayList<BlockInfo> blockInfos;
+ private int currentReadBlockStartRow;
public PTFRowContainer(int bs, Configuration jc, Reporter reporter
) throws HiveException {
@@ -190,14 +190,16 @@ public class PTFRowContainer<Row extends List<Object>> extends RowContainer<Row>
BlockInfo bI = blockInfos.get(blockNum);
int startSplit = bI.startingSplit;
- int endSplit = startSplit;
- if ( blockNum != blockInfos.size() - 1) {
- endSplit = blockInfos.get(blockNum+1).startingSplit;
+ int endSplit;
+ if ( blockNum != blockInfos.size() - 1 ) {
+ endSplit = blockInfos.get(blockNum + 1).startingSplit;
+ } else {
+ endSplit = getLastActualSplit();
}
try {
int readIntoOffset = 0;
- for(int i = startSplit; i <= endSplit; i++ ) {
+ for(int i = startSplit; i <= endSplit && readIntoOffset < getBlockSize(); i++ ) {
org.apache.hadoop.mapred.RecordReader rr = setReaderAtSplit(i);
if ( i == startSplit ) {
((PTFSequenceFileRecordReader)rr).seek(bI.startOffset);
http://git-wip-us.apache.org/repos/asf/hive/blob/8d524e06/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java
index 4252bd1..68dc482 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java
@@ -103,7 +103,7 @@ public class RowContainer<ROW extends List<Object>>
boolean firstCalled = false; // once called first, it will never be able to
// write again.
- int acutalSplitNum = 0;
+ private int actualSplitNum = 0;
int currentSplitPointer = 0;
org.apache.hadoop.mapred.RecordReader rr = null; // record reader
RecordWriter rw = null;
@@ -220,7 +220,7 @@ public class RowContainer<ROW extends List<Object>>
HiveConf.setVar(localJc, HiveConf.ConfVars.HADOOPMAPREDINPUTDIR,
org.apache.hadoop.util.StringUtils.escapeString(parentFile.getAbsolutePath()));
inputSplits = inputFormat.getSplits(localJc, 1);
- acutalSplitNum = inputSplits.length;
+ actualSplitNum = inputSplits.length;
}
currentSplitPointer = 0;
rr = inputFormat.getRecordReader(inputSplits[currentSplitPointer],
@@ -375,7 +375,7 @@ public class RowContainer<ROW extends List<Object>>
}
}
- if (nextSplit && this.currentSplitPointer < this.acutalSplitNum) {
+ if (nextSplit && this.currentSplitPointer < this.actualSplitNum) {
JobConf localJc = getLocalFSJobConfClone(jc);
// open record reader to read next split
rr = inputFormat.getRecordReader(inputSplits[currentSplitPointer], jobCloneUsingLocalFs,
@@ -421,7 +421,7 @@ public class RowContainer<ROW extends List<Object>>
addCursor = 0;
numFlushedBlocks = 0;
this.readBlockSize = 0;
- this.acutalSplitNum = 0;
+ this.actualSplitNum = 0;
this.currentSplitPointer = -1;
this.firstCalled = false;
this.inputSplits = null;
@@ -606,4 +606,8 @@ public class RowContainer<ROW extends List<Object>>
clearRows();
currentReadBlock = firstReadBlockPointer = currentWriteBlock = null;
}
+
+ protected int getLastActualSplit() {
+ return actualSplitNum - 1;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/8d524e06/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestPTFRowContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestPTFRowContainer.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestPTFRowContainer.java
index a404ff0..0611072 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestPTFRowContainer.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestPTFRowContainer.java
@@ -18,12 +18,14 @@
package org.apache.hadoop.hive.ql.exec.persistence;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -37,11 +39,13 @@ import org.apache.hadoop.io.Text;
import org.junit.BeforeClass;
import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
@SuppressWarnings("deprecation")
public class TestPTFRowContainer {
- private static final String COL_NAMES = "x,y,z,a,b";
- private static final String COL_TYPES = "int,string,double,int,string";
+ private static final String COL_NAMES = "x,y,z,a,b,v";
+ private static final String COL_TYPES = "int,string,double,int,string,string";
static SerDe serDe;
static Configuration cfg;
@@ -70,7 +74,7 @@ public class TestPTFRowContainer {
return rc;
}
- private void runTest(int sz, int blockSize) throws SerDeException, HiveException {
+ private void runTest(int sz, int blockSize, String value) throws SerDeException, HiveException {
List<Object> row;
PTFRowContainer<List<Object>> rc = rowContainer(blockSize);
@@ -82,16 +86,17 @@ public class TestPTFRowContainer {
row.add(new DoubleWritable(i));
row.add(new IntWritable(i));
row.add(new Text("def " + i));
+ row.add(new Text(value));
rc.addRow(row);
}
// test forward scan
- assert(rc.rowCount() == sz);
+ assertEquals(sz, rc.rowCount());
i = 0;
row = new ArrayList<Object>();
row = rc.first();
while(row != null ) {
- assert(row.get(1).toString().equals("abc " + i));
+ assertEquals("abc " + i, row.get(1).toString());
i++;
row = rc.next();
}
@@ -100,7 +105,7 @@ public class TestPTFRowContainer {
row = rc.first();
for(i = sz - 1; i >= 0; i-- ) {
row = rc.getAt(i);
- assert(row.get(1).toString().equals("abc " + i));
+ assertEquals("abc " + i, row.get(1).toString());
}
Random r = new Random(1000L);
@@ -109,20 +114,23 @@ public class TestPTFRowContainer {
for(i=0; i < 100; i++) {
int j = r.nextInt(sz);
row = rc.getAt(j);
- assert(row.get(1).toString().equals("abc " + j));
+ assertEquals("abc " + j, row.get(1).toString());
}
// intersperse getAt and next calls
for(i=0; i < 100; i++) {
int j = r.nextInt(sz);
row = rc.getAt(j);
- assert(row.get(1).toString().equals("abc " + j));
+ assertEquals("abc " + j, row.get(1).toString());
for(int k = j + 1; k < j + (blockSize/4) && k < sz; k++) {
row = rc.next();
- assert(row.get(4).toString().equals("def " + k));
+ assertEquals("def " + k, row.get(4).toString());
}
}
+ }
+ private void runTest(int sz, int blockSize) throws SerDeException, HiveException {
+ runTest(sz, blockSize, "");
}
@Test
@@ -134,4 +142,9 @@ public class TestPTFRowContainer {
public void testSmallBlockSize() throws SerDeException, HiveException {
runTest(10 * 1000, 5);
}
+
+ @Test
+ public void testBlocksLargerThanSplit() throws SerDeException, HiveException, IOException {
+ runTest(5, 2, new String(new char[(int)FileSystem.getLocal(cfg).getDefaultBlockSize()]));
+ }
}
[27/50] [abbrv] hive git commit: HIVE-11783: Extending HPL/SQL parser
(Dmitry Tolpeko reviewed by Alan Gates)
Posted by xu...@apache.org.
HIVE-11783: Extending HPL/SQL parser (Dmitry Tolpeko reviewed by Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/06790789
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/06790789
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/06790789
Branch: refs/heads/beeline-cli
Commit: 06790789bddb35f60706071d8d3682d434fa05dd
Parents: d51c62a
Author: Dmitry Tolpeko <dm...@gmail.com>
Authored: Tue Sep 22 06:38:06 2015 -0700
Committer: Dmitry Tolpeko <dm...@gmail.com>
Committed: Tue Sep 22 06:38:06 2015 -0700
----------------------------------------------------------------------
.../antlr4/org/apache/hive/hplsql/Hplsql.g4 | 70 ++--
.../main/java/org/apache/hive/hplsql/Cmp.java | 314 ++++++++++++++++++
.../java/org/apache/hive/hplsql/Column.java | 29 +-
.../main/java/org/apache/hive/hplsql/Conn.java | 21 ++
.../main/java/org/apache/hive/hplsql/Copy.java | 50 ++-
.../main/java/org/apache/hive/hplsql/Exec.java | 66 +++-
.../java/org/apache/hive/hplsql/Expression.java | 33 +-
.../main/java/org/apache/hive/hplsql/File.java | 18 +-
.../main/java/org/apache/hive/hplsql/Meta.java | 28 +-
.../main/java/org/apache/hive/hplsql/Query.java | 18 ++
.../java/org/apache/hive/hplsql/Select.java | 23 +-
.../main/java/org/apache/hive/hplsql/Stmt.java | 8 +-
.../main/java/org/apache/hive/hplsql/Var.java | 110 ++++++-
.../apache/hive/hplsql/functions/Function.java | 6 +-
.../hive/hplsql/functions/FunctionMisc.java | 121 +++++++
.../org/apache/hive/hplsql/TestHplsqlLocal.java | 18 ++
.../apache/hive/hplsql/TestHplsqlOffline.java | 5 +
hplsql/src/test/queries/db/cmp_row_count.sql | 4 +
hplsql/src/test/queries/db/cmp_sum.sql | 3 +
hplsql/src/test/queries/db/copy_to_file.sql | 2 +
hplsql/src/test/queries/db/copy_to_hdfs.sql | 2 +
hplsql/src/test/queries/db/copy_to_table.sql | 2 +
hplsql/src/test/queries/db/part_count.sql | 17 +
hplsql/src/test/queries/db/part_count_by.sql | 4 +
hplsql/src/test/queries/db/schema.sql | 32 ++
hplsql/src/test/queries/db/select_into.sql | 20 +-
hplsql/src/test/queries/db/select_into2.sql | 17 +
.../test/queries/local/create_procedure2.sql | 16 +
hplsql/src/test/queries/local/if2.sql | 5 +
hplsql/src/test/queries/local/include.sql | 2 +
hplsql/src/test/queries/local/include_file.sql | 1 +
hplsql/src/test/queries/local/mult_div.sql | 8 +
hplsql/src/test/queries/offline/select_db2.sql | 5 +
.../src/test/results/db/cmp_row_count.out.txt | 12 +
hplsql/src/test/results/db/cmp_sum.out.txt | 320 +++++++++++++++++++
hplsql/src/test/results/db/copy_to_file.out.txt | 6 +
hplsql/src/test/results/db/copy_to_hdfs.out.txt | 4 +
.../src/test/results/db/copy_to_table.out.txt | 2 +
hplsql/src/test/results/db/part_count.out.txt | 15 +
.../src/test/results/db/part_count_by.out.txt | 13 +
hplsql/src/test/results/db/select_into.out.txt | 58 +++-
hplsql/src/test/results/db/select_into2.out.txt | 19 ++
.../results/local/create_procedure2.out.txt | 10 +
hplsql/src/test/results/local/if2.out.txt | 4 +
hplsql/src/test/results/local/include.out.txt | 8 +
hplsql/src/test/results/local/mult_div.out.txt | 7 +
.../src/test/results/offline/select_db2.out.txt | 6 +
47 files changed, 1471 insertions(+), 91 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4
----------------------------------------------------------------------
diff --git a/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4 b/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4
index ff772fe..bbe7276 100644
--- a/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4
+++ b/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4
@@ -43,6 +43,7 @@ stmt :
| break_stmt
| call_stmt
| close_stmt
+ | cmp_stmt
| copy_from_local_stmt
| copy_stmt
| commit_stmt
@@ -183,7 +184,7 @@ declare_handler_item : // Condition handler declaration
;
declare_temporary_table_item : // DECLARE TEMPORARY TABLE statement
- T_GLOBAL? T_TEMPORARY T_TABLE ident T_OPEN_P create_table_columns T_CLOSE_P create_table_options?
+ T_GLOBAL? T_TEMPORARY T_TABLE ident (T_AS? T_OPEN_P select_stmt T_CLOSE_P | T_AS? select_stmt | T_OPEN_P create_table_columns T_CLOSE_P) create_table_options?
;
create_table_stmt :
@@ -252,6 +253,7 @@ create_table_options_db2_item :
| T_DISTRIBUTE T_BY T_HASH T_OPEN_P ident (T_COMMA ident)* T_CLOSE_P
| T_LOGGED
| T_NOT T_LOGGED
+ | T_DEFINITION T_ONLY
;
create_table_options_hive_item :
@@ -283,6 +285,7 @@ dtype : // Data types
| T_DATETIME
| T_DEC
| T_DECIMAL
+ | T_DOUBLE T_PRECISION?
| T_FLOAT
| T_INT
| T_INTEGER
@@ -377,7 +380,7 @@ else_block :
;
include_stmt : // INCLUDE statement
- T_INCLUDE file_name
+ T_INCLUDE (file_name | expr)
;
insert_stmt : // INSERT statement
@@ -418,7 +421,7 @@ get_diag_stmt_rowcount_item :
;
grant_stmt :
- T_GRANT grant_stmt_item (T_COMMA grant_stmt_item)* T_TO ident
+ T_GRANT grant_stmt_item (T_COMMA grant_stmt_item)* T_TO T_ROLE ident
;
grant_stmt_item :
@@ -445,12 +448,20 @@ close_stmt : // CLOSE cursor statement
T_CLOSE L_ID
;
+cmp_stmt : // CMP statement
+ T_CMP (T_ROW_COUNT | T_SUM) cmp_source T_COMMA cmp_source
+ ;
+
+cmp_source :
+ (table_name where_clause? | T_OPEN_P select_stmt T_CLOSE_P) (T_AT ident)?
+ ;
+
copy_from_local_stmt : // COPY FROM LOCAL statement
T_COPY T_FROM T_LOCAL copy_source (T_COMMA copy_source)* T_TO copy_target copy_file_option*
;
copy_stmt : // COPY statement
- T_COPY (table_name | T_OPEN_P select_stmt T_CLOSE_P) T_TO copy_target copy_option*
+ T_COPY (table_name | T_OPEN_P select_stmt T_CLOSE_P) T_TO T_HDFS? copy_target copy_option*
;
copy_source :
@@ -458,7 +469,7 @@ copy_source :
;
copy_target :
- (ident | expr | L_FILE)
+ (file_name | expr)
;
copy_option :
@@ -615,7 +626,7 @@ select_list_item :
;
select_list_alias :
- {!_input.LT(1).getText().equalsIgnoreCase("FROM")}? T_AS? ident
+ {!_input.LT(1).getText().equalsIgnoreCase("INTO") && !_input.LT(1).getText().equalsIgnoreCase("FROM")}? T_AS? ident
| T_OPEN_P T_TITLE L_S_STRING T_CLOSE_P
;
@@ -642,7 +653,7 @@ from_table_name_clause :
;
from_subselect_clause :
- T_OPEN_P subselect_stmt T_CLOSE_P from_alias_clause?
+ T_OPEN_P select_stmt T_CLOSE_P from_alias_clause?
;
from_join_clause :
@@ -669,7 +680,8 @@ from_alias_clause :
!_input.LT(1).getText().equalsIgnoreCase("EXECUTE") &&
!_input.LT(1).getText().equalsIgnoreCase("GROUP") &&
!_input.LT(1).getText().equalsIgnoreCase("ORDER") &&
- !_input.LT(1).getText().equalsIgnoreCase("LIMIT")}?
+ !_input.LT(1).getText().equalsIgnoreCase("LIMIT") &&
+ !_input.LT(1).getText().equalsIgnoreCase("WITH")}?
T_AS? ident (T_OPEN_P L_ID (T_COMMA L_ID)* T_CLOSE_P)?
;
@@ -699,7 +711,7 @@ select_options :
select_options_item :
T_LIMIT expr
- | T_WITH (T_RR | T_RS | T_CS | T_UR)
+ | T_WITH (T_RR | T_RS | T_CS | T_UR) (T_USE T_AND T_KEEP (T_EXCLUSIVE | T_UPDATE | T_SHARE) T_LOCKS)?
;
update_stmt : // UPDATE statement
@@ -738,7 +750,7 @@ delete_stmt : // DELETE statement
;
bool_expr : // Boolean condition
- T_OPEN_P bool_expr T_CLOSE_P
+ T_NOT? T_OPEN_P bool_expr T_CLOSE_P
| bool_expr bool_expr_logical_operator bool_expr
| bool_expr_atom
;
@@ -900,6 +912,7 @@ expr_spec_func :
| T_MIN_PART_INT T_OPEN_P expr (T_COMMA expr (T_COMMA expr T_EQUAL expr)*)? T_CLOSE_P
| T_MAX_PART_DATE T_OPEN_P expr (T_COMMA expr (T_COMMA expr T_EQUAL expr)*)? T_CLOSE_P
| T_MIN_PART_DATE T_OPEN_P expr (T_COMMA expr (T_COMMA expr T_EQUAL expr)*)? T_CLOSE_P
+ | T_PART_COUNT T_OPEN_P expr (T_COMMA expr T_EQUAL expr)* T_CLOSE_P
| T_PART_LOC T_OPEN_P expr (T_COMMA expr T_EQUAL expr)+ (T_COMMA expr)? T_CLOSE_P
| T_TRIM T_OPEN_P expr T_CLOSE_P
| T_SUBSTRING T_OPEN_P expr T_FROM expr (T_FOR expr)? T_CLOSE_P
@@ -946,7 +959,7 @@ host_stmt :
;
file_name :
- L_ID | L_FILE
+ L_FILE | '/'? ident ('/' ident)*
;
date_literal : // DATE 'YYYY-MM-DD' literal
@@ -1012,6 +1025,7 @@ non_reserved_words : // Tokens that are not reserved words
| T_CLIENT
| T_CLOSE
| T_CLUSTERED
+ | T_CMP
| T_COLLECTION
| T_COPY
| T_COMMIT
@@ -1043,6 +1057,7 @@ non_reserved_words : // Tokens that are not reserved words
| T_DEFERRED
| T_DEFINED
| T_DEFINER
+ | T_DEFINITION
| T_DELETE
| T_DELIMITED
| T_DELIMITER
@@ -1051,7 +1066,8 @@ non_reserved_words : // Tokens that are not reserved words
| T_DIAGNOSTICS
| T_DISTINCT
| T_DISTRIBUTE
- | T_DO
+ | T_DO
+ | T_DOUBLE
| T_DROP
| T_DYNAMIC
// T_ELSE reserved word
@@ -1062,7 +1078,8 @@ non_reserved_words : // Tokens that are not reserved words
| T_EXCEPT
| T_EXEC
| T_EXECUTE
- | T_EXCEPTION
+ | T_EXCEPTION
+ | T_EXCLUSIVE
| T_EXISTS
| T_EXIT
| T_FETCH
@@ -1085,6 +1102,7 @@ non_reserved_words : // Tokens that are not reserved words
| T_HANDLER
| T_HASH
| T_HAVING
+ | T_HDFS
| T_HIVE
| T_HOST
| T_IDENTITY
@@ -1106,7 +1124,8 @@ non_reserved_words : // Tokens that are not reserved words
| T_ITEMS
| T_IS
| T_ISOPEN
- | T_JOIN
+ | T_JOIN
+ | T_KEEP
| T_KEY
| T_KEYS
| T_LAG
@@ -1121,6 +1140,7 @@ non_reserved_words : // Tokens that are not reserved words
| T_LOCAL
| T_LOCATOR
| T_LOCATORS
+ | T_LOCKS
| T_LOGGED
| T_LOGGING
| T_LOOP
@@ -1157,10 +1177,12 @@ non_reserved_words : // Tokens that are not reserved words
| T_OVER
| T_OVERWRITE
| T_OWNER
+ | T_PART_COUNT
| T_PART_LOC
| T_PARTITION
| T_PCTFREE
- | T_PCTUSED
+ | T_PCTUSED
+ | T_PRECISION
| T_PRESERVE
| T_PRIMARY
| T_PRINT
@@ -1181,7 +1203,8 @@ non_reserved_words : // Tokens that are not reserved words
| T_REVERSE
| T_RIGHT
| T_RLIKE
- | T_RS
+ | T_RS
+ | T_ROLE
| T_ROLLBACK
| T_ROW
| T_ROWS
@@ -1194,6 +1217,7 @@ non_reserved_words : // Tokens that are not reserved words
| T_SELECT
| T_SET
| T_SETS
+ | T_SHARE
| T_SIGNAL
| T_SMALLDATETIME
| T_SMALLINT
@@ -1277,6 +1301,7 @@ T_CHARACTER : C H A R A C T E R ;
T_CLIENT : C L I E N T ;
T_CLOSE : C L O S E ;
T_CLUSTERED : C L U S T E R E D;
+T_CMP : C M P ;
T_COLLECTION : C O L L E C T I O N ;
T_COPY : C O P Y ;
T_COMMIT : C O M M I T ;
@@ -1304,6 +1329,7 @@ T_DEFAULT : D E F A U L T ;
T_DEFERRED : D E F E R R E D ;
T_DEFINED : D E F I N E D ;
T_DEFINER : D E F I N E R ;
+T_DEFINITION : D E F I N I T I O N ;
T_DELETE : D E L E T E ;
T_DELIMITED : D E L I M I T E D ;
T_DELIMITER : D E L I M I T E R ;
@@ -1312,6 +1338,7 @@ T_DIAGNOSTICS : D I A G N O S T I C S ;
T_DISTINCT : D I S T I N C T ;
T_DISTRIBUTE : D I S T R I B U T E ;
T_DO : D O ;
+T_DOUBLE : D O U B L E ;
T_DROP : D R O P ;
T_DYNAMIC : D Y N A M I C ;
T_ELSE : E L S E ;
@@ -1323,6 +1350,7 @@ T_EXCEPT : E X C E P T ;
T_EXEC : E X E C ;
T_EXECUTE : E X E C U T E ;
T_EXCEPTION : E X C E P T I O N ;
+T_EXCLUSIVE : E X C L U S I V E ;
T_EXISTS : E X I S T S ;
T_EXIT : E X I T ;
T_FETCH : F E T C H ;
@@ -1344,6 +1372,7 @@ T_GROUP : G R O U P ;
T_HANDLER : H A N D L E R ;
T_HASH : H A S H ;
T_HAVING : H A V I N G ;
+T_HDFS : H D F S ;
T_HIVE : H I V E ;
T_HOST : H O S T ;
T_IDENTITY : I D E N T I T Y ;
@@ -1366,6 +1395,7 @@ T_IS : I S ;
T_ISOPEN : I S O P E N ;
T_ITEMS : I T E M S ;
T_JOIN : J O I N ;
+T_KEEP : K E E P;
T_KEY : K E Y ;
T_KEYS : K E Y S ;
T_LANGUAGE : L A N G U A G E ;
@@ -1377,6 +1407,7 @@ T_LINES : L I N E S ;
T_LOCAL : L O C A L ;
T_LOCATOR : L O C A T O R ;
T_LOCATORS : L O C A T O R S ;
+T_LOCKS : L O C K S ;
T_LOGGED : L O G G E D ;
T_LOGGING : L O G G I N G ;
T_LOOP : L O O P ;
@@ -1416,6 +1447,7 @@ T_OWNER : O W N E R ;
T_PARTITION : P A R T I T I O N ;
T_PCTFREE : P C T F R E E ;
T_PCTUSED : P C T U S E D ;
+T_PRECISION : P R E C I S I O N ;
T_PRESERVE : P R E S E R V E ;
T_PRIMARY : P R I M A R Y ;
T_PRINT : P R I N T ;
@@ -1434,6 +1466,7 @@ T_RETURNS : R E T U R N S ;
T_REVERSE : R E V E R S E ;
T_RIGHT : R I G H T ;
T_RLIKE : R L I K E ;
+T_ROLE : R O L E ;
T_ROLLBACK : R O L L B A C K ;
T_ROW : R O W ;
T_ROWS : R O W S ;
@@ -1449,6 +1482,7 @@ T_SEL : S E L ;
T_SELECT : S E L E C T ;
T_SET : S E T ;
T_SETS : S E T S;
+T_SHARE : S H A R E ;
T_SIGNAL : S I G N A L ;
T_SMALLDATETIME : S M A L L D A T E T I M E ;
T_SMALLINT : S M A L L I N T ;
@@ -1513,6 +1547,7 @@ T_MAX_PART_INT : M A X '_' P A R T '_' I N T ;
T_MIN_PART_INT : M I N '_' P A R T '_' I N T ;
T_MAX_PART_DATE : M A X '_' P A R T '_' D A T E ;
T_MIN_PART_DATE : M I N '_' P A R T '_' D A T E ;
+T_PART_COUNT : P A R T '_' C O U N T ;
T_PART_LOC : P A R T '_' L O C ;
T_RANK : R A N K ;
T_ROW_NUMBER : R O W '_' N U M B E R;
@@ -1566,8 +1601,7 @@ L_WS : L_BLANK+ -> skip ; // Whites
L_M_COMMENT : '/*' .*? '*/' -> channel(HIDDEN) ; // Multiline comment
L_S_COMMENT : ('--' | '//') .*? '\r'? '\n' -> channel(HIDDEN) ; // Single line comment
-L_FILE : '/'? L_ID ('/' L_ID)* // File path
- | ([a-zA-Z] ':' '\\'?)? L_ID ('\\' L_ID)*
+L_FILE : ([a-zA-Z] ':' '\\'?)? L_ID ('\\' L_ID)* // File path (a/b/c Linux path causes conflicts with division operator and handled at parser level)
;
L_LABEL : ([a-zA-Z] | L_DIGIT | '_')* ':'
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Cmp.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Cmp.java b/hplsql/src/main/java/org/apache/hive/hplsql/Cmp.java
new file mode 100644
index 0000000..ee65a88
--- /dev/null
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Cmp.java
@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.hplsql;
+
+import java.math.BigDecimal;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.util.ArrayList;
+
+import org.antlr.v4.runtime.ParserRuleContext;
+
+public class Cmp implements Runnable {
+
+ Exec exec;
+ Timer timer = new Timer();
+ boolean trace = false;
+ boolean info = false;
+
+ Query query;
+ String conn;
+ HplsqlParser.Cmp_stmtContext ctx;
+
+ int tests = 0;
+ int failedTests = 0;
+ int failedTestsHighDiff = 0;
+
+ Cmp(Exec e) {
+ exec = e;
+ trace = exec.getTrace();
+ info = exec.getInfo();
+ }
+
+ Cmp(Exec e, HplsqlParser.Cmp_stmtContext c, Query q, String cn) {
+ exec = e;
+ trace = exec.getTrace();
+ info = exec.getInfo();
+ ctx = c;
+ query = q;
+ conn = cn;
+ }
+
+ /**
+ * Run CMP command
+ */
+ Integer run(HplsqlParser.Cmp_stmtContext ctx) {
+ trace(ctx, "CMP");
+ this.ctx = ctx;
+ timer.start();
+ StringBuilder conn1 = new StringBuilder();
+ StringBuilder conn2 = new StringBuilder();
+ Query query1 = new Query();
+ Query query2 = new Query();
+ Boolean equal = null;
+ try {
+ String sql1 = getSql(ctx, conn1, 0);
+ String sql2 = getSql(ctx, conn2, 1);
+ if (trace) {
+ trace(ctx, "Query 1: " + sql1);
+ trace(ctx, "Query 2: " + sql2);
+ }
+ query1.setSql(sql1);
+ query2.setSql(sql2);
+ Cmp cmp1 = new Cmp(exec, ctx, query1, conn1.toString());
+ Cmp cmp2 = new Cmp(exec, ctx, query2, conn2.toString());
+ Thread t1 = new Thread(cmp1);
+ Thread t2 = new Thread(cmp2);
+ t1.start();
+ t2.start();
+ t1.join();
+ t2.join();
+ equal = compare(query1, query2);
+ }
+ catch(Exception e) {
+ exec.signal(e);
+ return -1;
+ }
+ finally {
+ long elapsed = timer.stop();
+ if (info) {
+ String message = "CMP ";
+ if (equal != null) {
+ if (equal) {
+ message += "Equal, " + tests + " tests";
+ }
+ else {
+ message += "Not Equal, " + failedTests + " of " + tests + " tests failed";
+ message += ", " + failedTestsHighDiff + " failed tests with more than 0.01% difference";
+ }
+ }
+ else {
+ message += "Failed";
+ }
+ info(ctx, message + ", " + timer.format());
+ }
+ exec.closeQuery(query1, conn1.toString());
+ exec.closeQuery(query2, conn2.toString());
+ }
+ return 0;
+ }
+
+ /**
+ * Get data for comparison from the source
+ */
+ public void run() {
+ exec.executeQuery(ctx, query, conn);
+ }
+
+ /**
+ * Compare the results
+ */
+ Boolean compare(Query query1, Query query2) {
+ if (query1.error()) {
+ exec.signal(query1);
+ return null;
+ }
+ else if (query2.error()) {
+ exec.signal(query2);
+ return null;
+ }
+ ResultSet rs1 = query1.getResultSet();
+ ResultSet rs2 = query2.getResultSet();
+ if (rs1 == null || rs2 == null) {
+ exec.setSqlCode(-1);
+ return null;
+ }
+ boolean equal = true;
+ tests = 0;
+ failedTests = 0;
+ try {
+ ResultSetMetaData rm1 = rs1.getMetaData();
+ ResultSetMetaData rm2 = rs2.getMetaData();
+ int cnt1 = rm1.getColumnCount();
+ int cnt2 = rm2.getColumnCount();
+ tests = cnt1;
+ while (rs1.next() && rs2.next()) {
+ for (int i = 1; i <= tests; i++) {
+ Var v1 = new Var(Var.Type.DERIVED_TYPE);
+ Var v2 = new Var(Var.Type.DERIVED_TYPE);
+ v1.setValue(rs1, rm1, i);
+ if (i <= cnt2) {
+ v2.setValue(rs2, rm2, i);
+ }
+ boolean e = true;
+ if (!(v1.isNull() && v2.isNull()) && !v1.equals(v2)) {
+ equal = false;
+ e = false;
+ failedTests++;
+ }
+ if (trace || info) {
+ String m = rm1.getColumnName(i) + "\t" + v1.toString() + "\t" + v2.toString();
+ if (!e) {
+ m += "\tNot equal";
+ BigDecimal diff = v1.percentDiff(v2);
+ if (diff != null) {
+ if (diff.compareTo(BigDecimal.ZERO) != 0) {
+ m += ", " + diff + "% difference";
+ failedTestsHighDiff++;
+ }
+ else {
+ m += ", less then 0.01% difference";
+ }
+ }
+ else {
+ failedTestsHighDiff++;
+ }
+ }
+ if (trace) {
+ trace(null, m);
+ }
+ else {
+ info(null, m);
+ }
+ }
+ }
+ if (equal) {
+ exec.setSqlSuccess();
+ }
+ else {
+ exec.setSqlCode(1);
+ }
+ }
+ }
+ catch(Exception e) {
+ exec.signal(e);
+ return null;
+ }
+ return new Boolean(equal);
+ }
+
+ /**
+ * Define the SQL query to access data
+ */
+ String getSql(HplsqlParser.Cmp_stmtContext ctx, StringBuilder conn, int idx) throws Exception {
+ StringBuilder sql = new StringBuilder();
+ String table = null;
+ String query = null;
+ if (ctx.cmp_source(idx).table_name() != null) {
+ table = evalPop(ctx.cmp_source(idx).table_name()).toString();
+ }
+ else {
+ query = evalPop(ctx.cmp_source(idx).select_stmt()).toString();
+ }
+ if (ctx.cmp_source(idx).T_AT() != null) {
+ conn.append(ctx.cmp_source(idx).ident().getText());
+ }
+ else if (table != null) {
+ conn.append(exec.getObjectConnection(ctx.cmp_source(idx).table_name().getText()));
+ }
+ else {
+ conn.append(exec.getStatementConnection());
+ }
+ sql.append("SELECT ");
+ sql.append(getSelectList(ctx, conn.toString(), table, query));
+ sql.append(" FROM ");
+ if (table != null) {
+ sql.append(table);
+ if (ctx.cmp_source(idx).where_clause() != null) {
+ sql.append(" " + evalPop(ctx.cmp_source(idx).where_clause()).toString());
+ }
+ }
+ else {
+ sql.append("(");
+ sql.append(query);
+ sql.append(") t");
+ }
+ return sql.toString();
+ }
+
+ /**
+ * Define SELECT listto access data
+ */
+ String getSelectList(HplsqlParser.Cmp_stmtContext ctx, String conn, String table, String query) throws Exception {
+ StringBuilder sql = new StringBuilder();
+ sql.append("COUNT(1) AS row_count");
+ if (ctx.T_SUM() != null && table != null) {
+ Row row = exec.meta.getRowDataType(ctx, conn, table);
+ if (row != null) {
+ ArrayList<Column> cols = row.getColumns();
+ int cnt = row.size();
+ sql.append(",\n");
+ for (int i = 0; i < cnt; i++) {
+ Column col = cols.get(i);
+ String name = col.getName();
+ Var.Type type = Var.defineType(col.getType());
+ sql.append("COUNT(" + name + ") AS " + name + "_COUNT_NOT_NULL");
+ if (type == Var.Type.STRING) {
+ sql.append(",\n");
+ sql.append("SUM(LENGTH(" + name + ")) AS " + name + "_SUM_LENGTH,\n");
+ sql.append("MIN(LENGTH(" + name + ")) AS " + name + "_MIN_LENGTH,\n");
+ sql.append("MAX(LENGTH(" + name + ")) AS " + name + "_MAX_LENGTH");
+ }
+ else if (type == Var.Type.BIGINT || type == Var.Type.DECIMAL || type == Var.Type.DOUBLE) {
+ sql.append(",\n");
+ sql.append("SUM(" + name + ") AS " + name + "_SUM,\n");
+ sql.append("MIN(" + name + ") AS " + name + "_MIN,\n");
+ sql.append("MAX(" + name + ") AS " + name + "_MAX");
+ }
+ else if (type == Var.Type.DATE || type == Var.Type.TIMESTAMP) {
+ sql.append(",\n");
+ sql.append("SUM(YEAR(" + name + ")) AS " + name + "_SUM_YEAR,\n");
+ sql.append("SUM(MONTH(" + name + ")) AS " + name + "_SUM_MONTH,\n");
+ sql.append("SUM(DAY(" + name + ")) AS " + name + "_SUM_DAY,\n");
+ sql.append("MIN(" + name + ") AS " + name + "_MIN,\n");
+ sql.append("MAX(" + name + ") AS " + name + "_MAX");
+ }
+ if (i + 1 < cnt) {
+ sql.append(",\n");
+ }
+ }
+ }
+ }
+ return sql.toString();
+ }
+
+ /**
+ * Evaluate the expression and pop value from the stack
+ */
+ Var evalPop(ParserRuleContext ctx) {
+ exec.visit(ctx);
+ if (!exec.stack.isEmpty()) {
+ return exec.stackPop();
+ }
+ return Var.Empty;
+ }
+
+ /**
+ * Trace and information
+ */
+ public void trace(ParserRuleContext ctx, String message) {
+ exec.trace(ctx, message);
+ }
+
+ public void info(ParserRuleContext ctx, String message) {
+ exec.info(ctx, message);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Column.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Column.java b/hplsql/src/main/java/org/apache/hive/hplsql/Column.java
index 252a870..e4e914c 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Column.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Column.java
@@ -27,9 +27,36 @@ public class Column {
String type;
Var value;
+ int len;
+ int scale;
+
Column(String name, String type) {
this.name = name;
- this.type = type;
+ len = 0;
+ scale = 0;
+ setType(type);
+ }
+
+ /**
+ * Set the column type with its length/precision
+ */
+ void setType(String type) {
+ int open = type.indexOf('(');
+ if (open == -1) {
+ this.type = type;
+ }
+ else {
+ this.type = type.substring(0, open);
+ int comma = type.indexOf(',', open);
+ int close = type.indexOf(')', open);
+ if (comma == -1) {
+ len = Integer.parseInt(type.substring(open + 1, close));
+ }
+ else {
+ len = Integer.parseInt(type.substring(open + 1, comma));
+ scale = Integer.parseInt(type.substring(comma + 1, close));
+ }
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Conn.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Conn.java b/hplsql/src/main/java/org/apache/hive/hplsql/Conn.java
index c8cc910..12f43c9 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Conn.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Conn.java
@@ -26,6 +26,7 @@ import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.ResultSet;
+import java.sql.PreparedStatement;
public class Conn {
@@ -76,6 +77,25 @@ public class Conn {
}
/**
+ * Prepare a SQL query
+ */
+ public Query prepareQuery(Query query, String connName) {
+ try {
+ Connection conn = getConnection(connName);
+ timer.start();
+ PreparedStatement stmt = conn.prepareStatement(query.sql);
+ timer.stop();
+ query.set(conn, stmt);
+ if (info) {
+ exec.info(null, "Prepared statement executed successfully (" + timer.format() + ")");
+ }
+ } catch (Exception e) {
+ query.setError(e);
+ }
+ return query;
+ }
+
+ /**
* Execute a SQL statement
*/
public Query executeSql(String sql, String connName) {
@@ -117,6 +137,7 @@ public class Conn {
if (sqls != null) {
Statement s = conn.createStatement();
for (String sql : sqls) {
+ exec.info(null, "Starting pre-SQL statement");
s.execute(sql);
}
s.close();
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Copy.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Copy.java b/hplsql/src/main/java/org/apache/hive/hplsql/Copy.java
index 30b98ca..9968b24 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Copy.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Copy.java
@@ -26,6 +26,7 @@ import java.util.HashMap;
import java.util.Map;
import java.util.List;
import java.io.FileOutputStream;
+import java.io.OutputStream;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
@@ -165,8 +166,8 @@ public class Copy {
exec.returnConnection(targetConn, conn);
exec.setRowCount(rows);
long elapsed = timer.stop();
- if (trace) {
- trace(ctx, "COPY completed: " + rows + " row(s), " + timer.format() + ", " + rows/(elapsed/1000) + " rows/sec");
+ if (info) {
+ info(ctx, "COPY completed: " + rows + " row(s), " + timer.format() + ", " + rows/(elapsed/1000) + " rows/sec");
}
}
@@ -192,16 +193,35 @@ public class Copy {
byte[] nullstr = "NULL".getBytes();
int cols = rm.getColumnCount();
int rows = 0;
- if (trace) {
- trace(ctx, "SELECT executed: " + cols + " columns, output file: " + filename);
+ if (trace || info) {
+ String mes = "Query executed: " + cols + " columns, output file: " + filename;
+ if (trace) {
+ trace(ctx, mes);
+ }
+ else {
+ info(ctx, mes);
+ }
}
- java.io.File file = new java.io.File(filename);
- FileOutputStream out = null;
+ java.io.File file = null;
+ File hdfsFile = null;
+ if (ctx.T_HDFS() == null) {
+ file = new java.io.File(filename);
+ }
+ else {
+ hdfsFile = new File();
+ }
+ OutputStream out = null;
+ timer.start();
try {
- if (!file.exists()) {
- file.createNewFile();
+ if (file != null) {
+ if (!file.exists()) {
+ file.createNewFile();
+ }
+ out = new FileOutputStream(file, false /*append*/);
+ }
+ else {
+ out = hdfsFile.create(filename, true /*overwrite*/);
}
- out = new FileOutputStream(file, false /*append*/);
String col;
String sql = "";
if (sqlInsert) {
@@ -237,8 +257,9 @@ public class Copy {
out.close();
}
}
- if (trace) {
- trace(ctx, "COPY rows: " + rows);
+ long elapsed = timer.stop();
+ if (info) {
+ info(ctx, "COPY completed: " + rows + " row(s), " + timer.format() + ", " + rows/elapsed/1000 + " rows/sec");
}
}
@@ -376,7 +397,12 @@ public class Copy {
}
else if (option.T_AT() != null) {
targetConn = option.ident().getText();
- sqlInsertName = ctx.copy_target().ident().getText();
+ if (ctx.copy_target().expr() != null) {
+ sqlInsertName = evalPop(ctx.copy_target().expr()).toString();
+ }
+ else {
+ sqlInsertName = ctx.copy_target().getText();
+ }
}
else if (option.T_BATCHSIZE() != null) {
batchSize = evalPop(option.expr()).intValue();
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Exec.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Exec.java b/hplsql/src/main/java/org/apache/hive/hplsql/Exec.java
index f5592e1..38b5380 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Exec.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Exec.java
@@ -506,6 +506,24 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
public Query executeQuery(ParserRuleContext ctx, String sql, String connProfile) {
return executeQuery(ctx, new Query(sql), connProfile);
}
+
+ /**
+ * Prepare a SQL query (SELECT)
+ */
+ public Query prepareQuery(ParserRuleContext ctx, Query query, String connProfile) {
+ if (!exec.offline) {
+ exec.rowCount = 0;
+ exec.conn.prepareQuery(query, connProfile);
+ return query;
+ }
+ setSqlNoData();
+ info(ctx, "Not executed - offline mode set");
+ return query;
+ }
+
+ public Query prepareQuery(ParserRuleContext ctx, String sql, String connProfile) {
+ return prepareQuery(ctx, new Query(sql), connProfile);
+ }
/**
* Execute a SQL statement
@@ -950,6 +968,11 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
}
@Override
+ public Integer visitFrom_subselect_clause(HplsqlParser.From_subselect_clauseContext ctx) {
+ return exec.select.fromSubselect(ctx);
+ }
+
+ @Override
public Integer visitFrom_join_clause(HplsqlParser.From_join_clauseContext ctx) {
return exec.select.fromJoin(ctx);
}
@@ -1162,6 +1185,14 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
}
/**
+ * CMP statement
+ */
+ @Override
+ public Integer visitCmp_stmt(HplsqlParser.Cmp_stmtContext ctx) {
+ return new Cmp(exec).run(ctx);
+ }
+
+ /**
* COPY statement
*/
@Override
@@ -1926,8 +1957,13 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
*/
@Override
public Integer visitDate_literal(HplsqlParser.Date_literalContext ctx) {
- String str = evalPop(ctx.string()).toString();
- stackPush(new Var(Var.Type.DATE, Utils.toDate(str)));
+ if (!exec.buildSql) {
+ String str = evalPop(ctx.string()).toString();
+ stackPush(new Var(Var.Type.DATE, Utils.toDate(str)));
+ }
+ else {
+ stackPush(getFormattedText(ctx));
+ }
return 0;
}
@@ -1936,16 +1972,21 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
*/
@Override
public Integer visitTimestamp_literal(HplsqlParser.Timestamp_literalContext ctx) {
- String str = evalPop(ctx.string()).toString();
- int len = str.length();
- int precision = 0;
- if (len > 19 && len <= 29) {
- precision = len - 20;
- if (precision > 3) {
- precision = 3;
+ if (!exec.buildSql) {
+ String str = evalPop(ctx.string()).toString();
+ int len = str.length();
+ int precision = 0;
+ if (len > 19 && len <= 29) {
+ precision = len - 20;
+ if (precision > 3) {
+ precision = 3;
+ }
}
+ stackPush(new Var(Utils.toTimestamp(str), precision));
+ }
+ else {
+ stackPush(getFormattedText(ctx));
}
- stackPush(new Var(Utils.toTimestamp(str), precision));
return 0;
}
@@ -1979,6 +2020,9 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
* @throws Exception
*/
Connection getConnection(String conn) throws Exception {
+ if (conn == null || conn.equalsIgnoreCase("default")) {
+ conn = exec.conf.defaultConnection;
+ }
return exec.conn.getConnection(conn);
}
@@ -1993,7 +2037,7 @@ public class Exec extends HplsqlBaseVisitor<Integer> {
* Define the database type by profile name
*/
Conn.Type getConnectionType(String conn) {
- return exec.conn.getType(conn);
+ return exec.conn.getTypeByProfile(conn);
}
/**
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Expression.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Expression.java b/hplsql/src/main/java/org/apache/hive/hplsql/Expression.java
index 7269798..7c500a8 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Expression.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Expression.java
@@ -49,6 +49,9 @@ public class Expression {
else if (ctx.T_SUB() != null) {
operatorSub(ctx);
}
+ else if (ctx.T_MUL() != null) {
+ operatorMultiply(ctx);
+ }
else if (ctx.T_DIV() != null) {
operatorDiv(ctx);
}
@@ -98,16 +101,17 @@ public class Expression {
* Evaluate a boolean expression
*/
public void execBool(HplsqlParser.Bool_exprContext ctx) {
- if (ctx.T_OPEN_P() != null) {
- eval(ctx.bool_expr(0));
- return;
- }
- else if (ctx.bool_expr_atom() != null) {
+ if (ctx.bool_expr_atom() != null) {
eval(ctx.bool_expr_atom());
return;
}
Var result = evalPop(ctx.bool_expr(0));
- if (ctx.bool_expr_logical_operator() != null) {
+ if (ctx.T_OPEN_P() != null) {
+ if (ctx.T_NOT() != null) {
+ result.negate();
+ }
+ }
+ else if (ctx.bool_expr_logical_operator() != null) {
if (ctx.bool_expr_logical_operator().T_AND() != null) {
if (result.isTrue()) {
result = evalPop(ctx.bool_expr(1));
@@ -359,6 +363,23 @@ public class Expression {
}
/**
+ * Multiplication operator
+ */
+ public void operatorMultiply(HplsqlParser.ExprContext ctx) {
+ Var v1 = evalPop(ctx.expr(0));
+ Var v2 = evalPop(ctx.expr(1));
+ if (v1.value == null || v2.value == null) {
+ evalNull();
+ }
+ else if (v1.type == Type.BIGINT && v2.type == Type.BIGINT) {
+ exec.stackPush(new Var((Long)v1.value * (Long)v2.value));
+ }
+ else {
+ exec.signal(Signal.Type.UNSUPPORTED_OPERATION, "Unsupported data types in multiplication operator");
+ }
+ }
+
+ /**
* Division operator
*/
public void operatorDiv(HplsqlParser.ExprContext ctx) {
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/File.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/File.java b/hplsql/src/main/java/org/apache/hive/hplsql/File.java
index 6a8ddfe..e748772 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/File.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/File.java
@@ -46,17 +46,27 @@ public class File {
/**
* Create a file
*/
- public void create(String dir, String file, boolean overwrite) {
- path = new Path(dir, file);
+ public FSDataOutputStream create(boolean overwrite) {
try {
if (fs == null) {
- fs = FileSystem.get(new Configuration());
+ fs = createFs();
}
out = fs.create(path, overwrite);
}
catch (IOException e) {
e.printStackTrace();
}
+ return out;
+ }
+
+ public FSDataOutputStream create(String dir, String file, boolean overwrite) {
+ path = new Path(dir, file);
+ return create(overwrite);
+ }
+
+ public FSDataOutputStream create(String file, boolean overwrite) {
+ path = new Path(file);
+ return create(overwrite);
}
/**
@@ -66,7 +76,7 @@ public class File {
path = new Path(dir, file);
try {
if (fs == null) {
- fs = FileSystem.get(new Configuration());
+ fs = createFs();
}
in = fs.open(path);
} catch (IOException e) {
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Meta.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Meta.java b/hplsql/src/main/java/org/apache/hive/hplsql/Meta.java
index 485bcdf..2e04ef9 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Meta.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Meta.java
@@ -18,7 +18,9 @@
package org.apache.hive.hplsql;
+import java.sql.PreparedStatement;
import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
import java.util.ArrayList;
import java.util.HashMap;
@@ -90,12 +92,9 @@ public class Meta {
*/
Row readColumns(ParserRuleContext ctx, String conn, String table, HashMap<String, Row> map) {
Row row = null;
- String sql = null;
Conn.Type connType = exec.getConnectionType(conn);
if (connType == Conn.Type.HIVE) {
- sql = "DESCRIBE " + table;
- }
- if (sql != null) {
+ String sql = "DESCRIBE " + table;
Query query = new Query(sql);
exec.executeQuery(ctx, query, conn);
if (!query.error()) {
@@ -115,6 +114,27 @@ public class Meta {
}
exec.closeQuery(query, conn);
}
+ else {
+ Query query = exec.prepareQuery(ctx, "SELECT * FROM " + table, conn);
+ if (!query.error()) {
+ try {
+ PreparedStatement stmt = query.getPreparedStatement();
+ ResultSetMetaData rm = stmt.getMetaData();
+ int cols = rm.getColumnCount();
+ for (int i = 1; i <= cols; i++) {
+ String col = rm.getColumnName(i);
+ String typ = rm.getColumnTypeName(i);
+ if (row == null) {
+ row = new Row();
+ }
+ row.addColumn(col.toUpperCase(), typ);
+ }
+ map.put(table, row);
+ }
+ catch (Exception e) {}
+ }
+ exec.closeQuery(query, conn);
+ }
return row;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Query.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Query.java b/hplsql/src/main/java/org/apache/hive/hplsql/Query.java
index 08cd6a7..e196f86 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Query.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Query.java
@@ -21,6 +21,7 @@ package org.apache.hive.hplsql;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
+import java.sql.PreparedStatement;
import java.sql.Statement;
import org.antlr.v4.runtime.ParserRuleContext;
@@ -32,6 +33,7 @@ public class Query {
Connection conn;
Statement stmt;
+ PreparedStatement pstmt;
ResultSet rs;
Exception exception;
@@ -59,6 +61,11 @@ public class Query {
}
}
+ public void set(Connection conn, PreparedStatement pstmt) {
+ this.conn = conn;
+ this.pstmt = pstmt;
+ }
+
/**
* Set the fetch status
*/
@@ -132,6 +139,10 @@ public class Query {
stmt.close();
stmt = null;
}
+ if(pstmt != null) {
+ pstmt.close();
+ pstmt = null;
+ }
state = State.CLOSE;
} catch (SQLException e) {
e.printStackTrace();
@@ -190,6 +201,13 @@ public class Query {
}
/**
+ * Get the prepared statement object
+ */
+ public PreparedStatement getPreparedStatement() {
+ return pstmt;
+ }
+
+ /**
* Get the connection object
*/
public Connection getConnection() {
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Select.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Select.java b/hplsql/src/main/java/org/apache/hive/hplsql/Select.java
index 71ca848..56fbb05 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Select.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Select.java
@@ -218,7 +218,10 @@ public class Select {
sql.append(" " + getText(ctx.order_by_clause()));
}
if (ctx.select_options() != null) {
- sql.append(" " + evalPop(ctx.select_options()));
+ Var opt = evalPop(ctx.select_options());
+ if (!opt.isNull()) {
+ sql.append(" " + opt.toString());
+ }
}
if (ctx.select_list().select_list_limit() != null) {
sql.append(" LIMIT " + evalPop(ctx.select_list().select_list_limit().expr()));
@@ -281,6 +284,21 @@ public class Select {
exec.stackPush(sql);
return 0;
}
+
+ /**
+ * Subselect in FROM
+ */
+ public Integer fromSubselect(HplsqlParser.From_subselect_clauseContext ctx) {
+ StringBuilder sql = new StringBuilder();
+ sql.append("(");
+ sql.append(evalPop(ctx.select_stmt()).toString());
+ sql.append(")");
+ if (ctx.from_alias_clause() != null) {
+ sql.append(" ").append(exec.getText(ctx.from_alias_clause()));
+ }
+ exec.stackPush(sql);
+ return 0;
+ }
/**
* JOIN clause in FROM
@@ -341,10 +359,13 @@ public class Select {
* WHERE clause
*/
public Integer where(HplsqlParser.Where_clauseContext ctx) {
+ boolean oldBuildSql = exec.buildSql;
+ exec.buildSql = true;
StringBuilder sql = new StringBuilder();
sql.append(ctx.T_WHERE().getText());
sql.append(" " + evalPop(ctx.bool_expr()));
exec.stackPush(sql);
+ exec.buildSql = oldBuildSql;
return 0;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Stmt.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Stmt.java b/hplsql/src/main/java/org/apache/hive/hplsql/Stmt.java
index 6193f49..db9ea65 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Stmt.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Stmt.java
@@ -435,7 +435,13 @@ public class Stmt {
* INCLUDE statement
*/
public Integer include(HplsqlParser.Include_stmtContext ctx) {
- String file = ctx.file_name().getText();
+ String file;
+ if (ctx.file_name() != null) {
+ file = ctx.file_name().getText();
+ }
+ else {
+ file = evalPop(ctx.expr()).toString();
+ }
trace(ctx, "INCLUDE " + file);
exec.includeFile(file);
return 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/Var.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Var.java b/hplsql/src/main/java/org/apache/hive/hplsql/Var.java
index b31a14d..150e8b4 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/Var.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/Var.java
@@ -19,6 +19,7 @@
package org.apache.hive.hplsql;
import java.math.BigDecimal;
+import java.math.RoundingMode;
import java.util.ArrayList;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
@@ -32,7 +33,7 @@ import java.sql.Timestamp;
public class Var {
// Data types
- public enum Type {BOOL, CURSOR, DATE, DECIMAL, DERIVED_TYPE, DERIVED_ROWTYPE, FILE, IDENT, BIGINT, INTERVAL, ROW,
+ public enum Type {BOOL, CURSOR, DATE, DECIMAL, DERIVED_TYPE, DERIVED_ROWTYPE, DOUBLE, FILE, IDENT, BIGINT, INTERVAL, ROW,
RS_LOCATOR, STRING, STRINGLIST, TIMESTAMP, NULL};
public static final String DERIVED_TYPE = "DERIVED%TYPE";
public static final String DERIVED_ROWTYPE = "DERIVED%ROWTYPE";
@@ -79,6 +80,11 @@ public class Var {
this.value = value;
}
+ public Var(Double value) {
+ this.type = Type.DOUBLE;
+ this.value = value;
+ }
+
public Var(Date value) {
this.type = Type.DATE;
this.value = value;
@@ -169,6 +175,9 @@ public class Var {
if (val.type == Type.BIGINT) {
value = BigDecimal.valueOf(val.longValue());
}
+ else if (val.type == Type.DOUBLE) {
+ value = BigDecimal.valueOf(val.doubleValue());
+ }
}
else if (type == Type.DATE) {
value = Utils.toDate(val.toString());
@@ -238,6 +247,9 @@ public class Var {
else if (type == java.sql.Types.DECIMAL || type == java.sql.Types.NUMERIC) {
cast(new Var(rs.getBigDecimal(idx)));
}
+ else if (type == java.sql.Types.FLOAT || type == java.sql.Types.DOUBLE) {
+ cast(new Var(new Double(rs.getDouble(idx))));
+ }
return this;
}
@@ -287,6 +299,9 @@ public class Var {
else if (type.equalsIgnoreCase("DEC") || type.equalsIgnoreCase("DECIMAL") || type.equalsIgnoreCase("NUMERIC")) {
return Type.DECIMAL;
}
+ else if (type.equalsIgnoreCase("FLOAT") || type.toUpperCase().startsWith("DOUBLE")) {
+ return Type.DOUBLE;
+ }
else if (type.equalsIgnoreCase("DATE")) {
return Type.DATE;
}
@@ -332,34 +347,59 @@ public class Var {
scale = 0;
}
- /*
+ /**
* Compare values
*/
@Override
public boolean equals(Object obj) {
- if (this == obj) {
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ Var var = (Var)obj;
+ if (this == var) {
return true;
}
- else if (obj == null || this.value == null) {
+ else if (var == null || var.value == null || this.value == null) {
return false;
}
- else if (getClass() != obj.getClass()) {
- return false;
- }
-
- Var var = (Var)obj;
- if (type == Type.BIGINT && var.type == Type.BIGINT &&
- ((Long)value).longValue() == ((Long)var.value).longValue()) {
- return true;
+ if (type == Type.BIGINT) {
+ if (var.type == Type.BIGINT && ((Long)value).longValue() == ((Long)var.value).longValue()) {
+ return true;
+ }
+ else if (var.type == Type.DECIMAL) {
+ return equals((BigDecimal)var.value, (Long)value);
+ }
}
else if (type == Type.STRING && var.type == Type.STRING &&
((String)value).equals((String)var.value)) {
return true;
}
+ else if (type == Type.DECIMAL && var.type == Type.DECIMAL &&
+ ((BigDecimal)value).compareTo((BigDecimal)var.value) == 0) {
+ return true;
+ }
+ else if (type == Type.DOUBLE) {
+ if (var.type == Type.DOUBLE && ((Double)value).compareTo((Double)var.value) == 0) {
+ return true;
+ }
+ else if (var.type == Type.DECIMAL && ((Double)value).compareTo(((BigDecimal)var.value).doubleValue()) == 0) {
+ return true;
+ }
+ }
return false;
}
+
+ /**
+ * Check if variables of different data types are equal
+ */
+ public boolean equals(BigDecimal d, Long i) {
+ if (d.compareTo(new BigDecimal(i)) == 0) {
+ return true;
+ }
+ return false;
+ }
- /*
+ /**
* Compare values
*/
public int compareTo(Var v) {
@@ -377,6 +417,20 @@ public class Var {
}
return -1;
}
+
+ /**
+ * Calculate difference between values in percent
+ */
+ public BigDecimal percentDiff(Var var) {
+ BigDecimal d1 = new Var(Var.Type.DECIMAL).cast(this).decimalValue();
+ BigDecimal d2 = new Var(Var.Type.DECIMAL).cast(var).decimalValue();
+ if (d1 != null && d2 != null) {
+ if (d1.compareTo(BigDecimal.ZERO) != 0) {
+ return d1.subtract(d2).abs().multiply(new BigDecimal(100)).divide(d1, 2, RoundingMode.HALF_UP);
+ }
+ }
+ return null;
+ }
/**
* Increment an integer value
@@ -417,6 +471,26 @@ public class Var {
}
return -1;
}
+
+ /**
+ * Return a decimal value
+ */
+ public BigDecimal decimalValue() {
+ if (type == Type.DECIMAL) {
+ return (BigDecimal)value;
+ }
+ return null;
+ }
+
+ /**
+ * Return a double value
+ */
+ public double doubleValue() {
+ if (type == Type.DOUBLE) {
+ return ((Double)value).doubleValue();
+ }
+ return -1;
+ }
/**
* Return true/false for BOOL type
@@ -429,6 +503,16 @@ public class Var {
}
/**
+ * Negate the boolean value
+ */
+ public void negate() {
+ if(type == Type.BOOL && value != null) {
+ boolean v = ((Boolean)value).booleanValue();
+ value = Boolean.valueOf(!v);
+ }
+ }
+
+ /**
* Check if the variable contains NULL
*/
public boolean isNull() {
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/functions/Function.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/functions/Function.java b/hplsql/src/main/java/org/apache/hive/hplsql/functions/Function.java
index ae7acae..aa40a0a 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/functions/Function.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/functions/Function.java
@@ -88,7 +88,7 @@ public class Function {
if (trace && ctx.parent.parent instanceof HplsqlParser.Expr_stmtContext) {
trace(ctx, "FUNC " + name);
}
- FuncCommand func = map.get(name);
+ FuncCommand func = map.get(name.toUpperCase());
if (func != null) {
func.run(ctx);
}
@@ -693,6 +693,10 @@ public class Function {
exec.stackPush(new Var(i));
}
+ void evalInt(int i) {
+ evalInt(new Long(i));
+ }
+
/**
* Evaluate the expression to specified Date value
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/main/java/org/apache/hive/hplsql/functions/FunctionMisc.java
----------------------------------------------------------------------
diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/functions/FunctionMisc.java b/hplsql/src/main/java/org/apache/hive/hplsql/functions/FunctionMisc.java
index e022024..091552f 100644
--- a/hplsql/src/main/java/org/apache/hive/hplsql/functions/FunctionMisc.java
+++ b/hplsql/src/main/java/org/apache/hive/hplsql/functions/FunctionMisc.java
@@ -18,6 +18,12 @@
package org.apache.hive.hplsql.functions;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+
import org.apache.hive.hplsql.*;
public class FunctionMisc extends Function {
@@ -34,11 +40,13 @@ public class FunctionMisc extends Function {
f.map.put("DECODE", new FuncCommand() { public void run(HplsqlParser.Expr_func_paramsContext ctx) { decode(ctx); }});
f.map.put("NVL", new FuncCommand() { public void run(HplsqlParser.Expr_func_paramsContext ctx) { nvl(ctx); }});
f.map.put("NVL2", new FuncCommand() { public void run(HplsqlParser.Expr_func_paramsContext ctx) { nvl2(ctx); }});
+ f.map.put("PART_COUNT_BY", new FuncCommand() { public void run(HplsqlParser.Expr_func_paramsContext ctx) { partCountBy(ctx); }});
f.specMap.put("ACTIVITY_COUNT", new FuncSpecCommand() { public void run(HplsqlParser.Expr_spec_funcContext ctx) { activityCount(ctx); }});
f.specMap.put("CAST", new FuncSpecCommand() { public void run(HplsqlParser.Expr_spec_funcContext ctx) { cast(ctx); }});
f.specMap.put("CURRENT", new FuncSpecCommand() { public void run(HplsqlParser.Expr_spec_funcContext ctx) { current(ctx); }});
f.specMap.put("CURRENT_USER", new FuncSpecCommand() { public void run(HplsqlParser.Expr_spec_funcContext ctx) { currentUser(ctx); }});
+ f.specMap.put("PART_COUNT", new FuncSpecCommand() { public void run(HplsqlParser.Expr_spec_funcContext ctx) { partCount(ctx); }});
f.specMap.put("USER", new FuncSpecCommand() { public void run(HplsqlParser.Expr_spec_funcContext ctx) { currentUser(ctx); }});
f.specSqlMap.put("CURRENT", new FuncSpecCommand() { public void run(HplsqlParser.Expr_spec_funcContext ctx) { currentSql(ctx); }});
@@ -185,4 +193,117 @@ public class FunctionMisc extends Function {
evalNull();
}
}
+
+ /**
+ * PART_COUNT function
+ */
+ public void partCount(HplsqlParser.Expr_spec_funcContext ctx) {
+ String tabname = evalPop(ctx.expr(0)).toString();
+ StringBuilder sql = new StringBuilder();
+ sql.append("SHOW PARTITIONS ");
+ sql.append(tabname);
+ int cnt = ctx.expr().size();
+ if (cnt > 1) {
+ sql.append(" PARTITION (");
+ int i = 1;
+ while (i + 1 < cnt) {
+ String col = evalPop(ctx.expr(i)).toString();
+ String val = evalPop(ctx.expr(i + 1)).toSqlString();
+ if (i > 2) {
+ sql.append(", ");
+ }
+ sql.append(col);
+ sql.append("=");
+ sql.append(val);
+ i += 2;
+ }
+ sql.append(")");
+ }
+ if (trace) {
+ trace(ctx, "Query: " + sql);
+ }
+ if (exec.getOffline()) {
+ evalNull();
+ return;
+ }
+ Query query = exec.executeQuery(ctx, sql.toString(), exec.conf.defaultConnection);
+ if (query.error()) {
+ evalNullClose(query, exec.conf.defaultConnection);
+ return;
+ }
+ int result = 0;
+ ResultSet rs = query.getResultSet();
+ try {
+ while (rs.next()) {
+ result++;
+ }
+ } catch (SQLException e) {
+ evalNullClose(query, exec.conf.defaultConnection);
+ return;
+ }
+ evalInt(result);
+ exec.closeQuery(query, exec.conf.defaultConnection);
+ }
+
+ /**
+ * PART_COUNT_BY function
+ */
+ public void partCountBy(HplsqlParser.Expr_func_paramsContext ctx) {
+ int cnt = ctx.func_param().size();
+ if (cnt < 1 || exec.getOffline()) {
+ return;
+ }
+ String tabname = evalPop(ctx.func_param(0).expr()).toString();
+ ArrayList<String> keys = null;
+ if (cnt > 1) {
+ keys = new ArrayList<String>();
+ for (int i = 1; i < cnt; i++) {
+ keys.add(evalPop(ctx.func_param(i).expr()).toString().toUpperCase());
+ }
+ }
+ String sql = "SHOW PARTITIONS " + tabname;
+ Query query = exec.executeQuery(ctx, sql, exec.conf.defaultConnection);
+ if (query.error()) {
+ exec.closeQuery(query, exec.conf.defaultConnection);
+ return;
+ }
+ ResultSet rs = query.getResultSet();
+ HashMap<String, Integer> group = new HashMap<String, Integer>();
+ try {
+ while (rs.next()) {
+ String part = rs.getString(1);
+ String[] parts = part.split("/");
+ String key = parts[0];
+ if (cnt > 1) {
+ StringBuilder k = new StringBuilder();
+ for (int i = 0; i < parts.length; i++) {
+ if (keys.contains(parts[i].split("=")[0].toUpperCase())) {
+ if (k.length() > 0) {
+ k.append("/");
+ }
+ k.append(parts[i]);
+ }
+ }
+ key = k.toString();
+ }
+ Integer count = group.get(key);
+ if (count == null) {
+ count = new Integer(0);
+ }
+ group.put(key, count + 1);
+ }
+ } catch (SQLException e) {
+ exec.closeQuery(query, exec.conf.defaultConnection);
+ return;
+ }
+ if (cnt == 1) {
+ evalInt(group.size());
+ }
+ else {
+ for (Map.Entry<String, Integer> i : group.entrySet()) {
+ System.out.println(i.getKey() + '\t' + i.getValue());
+ }
+ }
+ exec.closeQuery(query, exec.conf.defaultConnection);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlLocal.java
----------------------------------------------------------------------
diff --git a/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlLocal.java b/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlLocal.java
index 6a67cd0..8299828 100644
--- a/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlLocal.java
+++ b/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlLocal.java
@@ -94,6 +94,10 @@ public class TestHplsqlLocal {
}
@Test
+ public void testCreateProcedure2() throws Exception {
+ run("create_procedure2");
+ }
+ @Test
public void testCreateProcedureNoParams() throws Exception {
run("create_procedure_no_params");
}
@@ -162,8 +166,17 @@ public class TestHplsqlLocal {
public void testIf() throws Exception {
run("if");
}
+
+ @Test
+ public void testIf2() throws Exception {
+ run("if2");
+ }
@Test
+ public void testInclude() throws Exception {
+ run("include");
+ }
+ @Test
public void testInstr() throws Exception {
run("instr");
}
@@ -199,6 +212,11 @@ public class TestHplsqlLocal {
}
@Test
+ public void testMultDiv() throws Exception {
+ run("mult_div");
+ }
+
+ @Test
public void testNvl() throws Exception {
run("nvl");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlOffline.java
----------------------------------------------------------------------
diff --git a/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlOffline.java b/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlOffline.java
index eeaa395..55238ed 100644
--- a/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlOffline.java
+++ b/hplsql/src/test/java/org/apache/hive/hplsql/TestHplsqlOffline.java
@@ -43,6 +43,11 @@ public class TestHplsqlOffline {
run("create_table_ora");
}
+ @Test
+ public void testSelectDb2() throws Exception {
+ run("select_db2");
+ }
+
/**
* Run a test file
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/db/cmp_row_count.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/cmp_row_count.sql b/hplsql/src/test/queries/db/cmp_row_count.sql
new file mode 100644
index 0000000..b33d841
--- /dev/null
+++ b/hplsql/src/test/queries/db/cmp_row_count.sql
@@ -0,0 +1,4 @@
+cmp row_count src, src at hive2conn;
+cmp row_count src where 1=1, src at hive2conn;
+cmp row_count (select 'A' from src), src where 2=2 at hive2conn;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/db/cmp_sum.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/cmp_sum.sql b/hplsql/src/test/queries/db/cmp_sum.sql
new file mode 100644
index 0000000..32347e1
--- /dev/null
+++ b/hplsql/src/test/queries/db/cmp_sum.sql
@@ -0,0 +1,3 @@
+cmp sum src_dt, src_dt at hive2conn;
+cmp sum src_dt where 1=1, src_dt at hive2conn;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/db/copy_to_file.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/copy_to_file.sql b/hplsql/src/test/queries/db/copy_to_file.sql
new file mode 100644
index 0000000..6135471
--- /dev/null
+++ b/hplsql/src/test/queries/db/copy_to_file.sql
@@ -0,0 +1,2 @@
+copy src to target/tmp/src.txt;
+copy (select * from src) to target/tmp/src2.txt sqlinsert src2;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/db/copy_to_hdfs.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/copy_to_hdfs.sql b/hplsql/src/test/queries/db/copy_to_hdfs.sql
new file mode 100644
index 0000000..fd01d7b
--- /dev/null
+++ b/hplsql/src/test/queries/db/copy_to_hdfs.sql
@@ -0,0 +1,2 @@
+--copy src to hdfs src.txt;
+copy (select * from src) to hdfs /user/hplsql/src2.txt delimiter '\01';
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/db/copy_to_table.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/copy_to_table.sql b/hplsql/src/test/queries/db/copy_to_table.sql
new file mode 100644
index 0000000..674c0fc
--- /dev/null
+++ b/hplsql/src/test/queries/db/copy_to_table.sql
@@ -0,0 +1,2 @@
+copy src to src2 at mysqlconn;
+copy (select * from src) to src2 at mysqlconn;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/db/part_count.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/part_count.sql b/hplsql/src/test/queries/db/part_count.sql
new file mode 100644
index 0000000..9d62c38
--- /dev/null
+++ b/hplsql/src/test/queries/db/part_count.sql
@@ -0,0 +1,17 @@
+if part_count(partition_date_1) = 5 then
+ print 'success';
+else
+ print 'failed';
+end if;
+
+if part_count(partition_date_1, region='1') = 2 then
+ print 'success';
+else
+ print 'failed';
+end if;
+
+if part_count(partition_date_1a) is null then -- table does not exist
+ print 'success';
+else
+ print 'failed';
+end if;
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/db/part_count_by.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/part_count_by.sql b/hplsql/src/test/queries/db/part_count_by.sql
new file mode 100644
index 0000000..599dc5b
--- /dev/null
+++ b/hplsql/src/test/queries/db/part_count_by.sql
@@ -0,0 +1,4 @@
+part_count_by(partition_date_1);
+part_count_by(partition_date_1, dt);
+part_count_by(partition_date_1, dt, region);
+part_count_by(partition_date_1, region);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/db/schema.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/schema.sql b/hplsql/src/test/queries/db/schema.sql
new file mode 100644
index 0000000..0c41569
--- /dev/null
+++ b/hplsql/src/test/queries/db/schema.sql
@@ -0,0 +1,32 @@
+drop table if exists src_dt;
+
+create table src_dt (
+ c1 string,
+ c2 varchar(30),
+ c3 char(30),
+ c4 tinyint,
+ c5 smallint,
+ c6 int,
+ c7 bigint,
+ c8 decimal(19,4),
+ c9 float,
+ c10 double,
+ c11 date,
+ c12 timestamp
+);
+
+insert overwrite table src_dt
+select
+ value c1,
+ value c2,
+ value c3,
+ cast(key as tinyint) c4,
+ cast(key as smallint) c5,
+ cast(key as int) c6,
+ cast(key as bigint) c7,
+ cast(key as decimal)/10 c8,
+ cast(key as float)/10 c9,
+ cast(key as double)/10 c10,
+ date '2015-09-07' c11,
+ cast(date '2015-09-07' as timestamp) c12
+from src;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/db/select_into.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/select_into.sql b/hplsql/src/test/queries/db/select_into.sql
index 3995ba2..1da610a 100644
--- a/hplsql/src/test/queries/db/select_into.sql
+++ b/hplsql/src/test/queries/db/select_into.sql
@@ -1,17 +1,33 @@
+DECLARE v_bint BIGINT;
DECLARE v_int INT;
+DECLARE v_sint SMALLINT;
+DECLARE v_tint TINYINT;
DECLARE v_dec DECIMAL(18,2);
DECLARE v_dec0 DECIMAL(18,0);
+DECLARE v_str STRING;
SELECT TOP 1
+ CAST(1 AS BIGINT),
CAST(1 AS INT),
+ CAST(1 AS SMALLINT),
+ CAST(1 AS TINYINT),
CAST(1.1 AS DECIMAL(18,2)),
CAST(1.1 AS DECIMAL(18,0))
INTO
+ v_bint,
v_int,
+ v_sint,
+ v_tint,
v_dec,
v_dec0
-FROM src ;
+FROM src;
+PRINT 'BIGINT: ' || v_bint;
PRINT 'INT: ' || v_int;
+PRINT 'SMALLINT: ' || v_sint;
+PRINT 'TINYINT: ' || v_tint;
PRINT 'DECIMAL: ' || v_dec;
-PRINT 'DECIMAL0: ' || v_dec0;
\ No newline at end of file
+PRINT 'DECIMAL0: ' || v_dec0;
+
+select 'a' into v_str from src limit 1;
+print 'string: ' || v_str;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/db/select_into2.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/db/select_into2.sql b/hplsql/src/test/queries/db/select_into2.sql
new file mode 100644
index 0000000..e0f738c
--- /dev/null
+++ b/hplsql/src/test/queries/db/select_into2.sql
@@ -0,0 +1,17 @@
+declare v_float float;
+declare v_double double;
+declare v_double2 double precision;
+
+select
+ cast(1.1 as float),
+ cast(1.1 as double),
+ cast(1.1 as double)
+into
+ v_float,
+ v_double,
+ v_double2
+from src limit 1;
+
+print 'float: ' || v_float;
+print 'double: ' || v_double;
+print 'double precision: ' || v_double2;
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/local/create_procedure2.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/local/create_procedure2.sql b/hplsql/src/test/queries/local/create_procedure2.sql
new file mode 100644
index 0000000..8875c6a
--- /dev/null
+++ b/hplsql/src/test/queries/local/create_procedure2.sql
@@ -0,0 +1,16 @@
+CREATE PROCEDURE set_message(IN name STRING, OUT result STRING)
+BEGIN
+ DECLARE str STRING DEFAULT 'Hello, ' || name || '!';
+ Work: begin
+ declare continue handler for sqlexception begin
+ set result = null;
+ print 'error';
+ end;
+ set result = str;
+ end;
+END;
+
+DECLARE str STRING;
+CALL set_message('world', str);
+PRINT str;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/local/if2.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/local/if2.sql b/hplsql/src/test/queries/local/if2.sql
new file mode 100644
index 0000000..b645b86
--- /dev/null
+++ b/hplsql/src/test/queries/local/if2.sql
@@ -0,0 +1,5 @@
+if not (coalesce(1,0) between 3 and 5) then
+ print 'correct';
+else
+ print 'failed';
+end if;
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/local/include.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/local/include.sql b/hplsql/src/test/queries/local/include.sql
new file mode 100644
index 0000000..c1dfb96
--- /dev/null
+++ b/hplsql/src/test/queries/local/include.sql
@@ -0,0 +1,2 @@
+include src/test/queries/local/include_file.sql
+include 'src/test/queries/local/include_file' || '.sql'
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/local/include_file.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/local/include_file.sql b/hplsql/src/test/queries/local/include_file.sql
new file mode 100644
index 0000000..ac5e0f0
--- /dev/null
+++ b/hplsql/src/test/queries/local/include_file.sql
@@ -0,0 +1 @@
+print 'file included successfully';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/local/mult_div.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/local/mult_div.sql b/hplsql/src/test/queries/local/mult_div.sql
new file mode 100644
index 0000000..ebad8f4
--- /dev/null
+++ b/hplsql/src/test/queries/local/mult_div.sql
@@ -0,0 +1,8 @@
+declare a int default 8;
+declare b int default 4;
+declare c int default 2;
+
+print a/b/c;
+
+set a = 4 * 2 / cast(4 as int) /2;
+set b = 4 * 2 /cast(4 as int)/2;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/queries/offline/select_db2.sql
----------------------------------------------------------------------
diff --git a/hplsql/src/test/queries/offline/select_db2.sql b/hplsql/src/test/queries/offline/select_db2.sql
new file mode 100644
index 0000000..a0d2da5
--- /dev/null
+++ b/hplsql/src/test/queries/offline/select_db2.sql
@@ -0,0 +1,5 @@
+select coalesce(max(info_id)+1,0) into NextID from sproc_info with rr use and keep exclusive locks;
+
+select cd, cd + inc days, cd - inc days + coalesce(inc, 0) days
+from (select date '2015-09-02' as cd, 3 as inc from sysibm.sysdummy1);
+
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/db/cmp_row_count.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/cmp_row_count.out.txt b/hplsql/src/test/results/db/cmp_row_count.out.txt
new file mode 100644
index 0000000..16fadfd
--- /dev/null
+++ b/hplsql/src/test/results/db/cmp_row_count.out.txt
@@ -0,0 +1,12 @@
+Ln:1 CMP
+Ln:1 Query 1: SELECT COUNT(1) AS row_count FROM src
+Ln:1 Query 2: SELECT COUNT(1) AS row_count FROM src
+row_count 500 500
+Ln:2 CMP
+Ln:2 Query 1: SELECT COUNT(1) AS row_count FROM src where 1 = 1
+Ln:2 Query 2: SELECT COUNT(1) AS row_count FROM src
+row_count 500 500
+Ln:3 CMP
+Ln:3 Query 1: SELECT COUNT(1) AS row_count FROM (select 'A' from src) t
+Ln:3 Query 2: SELECT COUNT(1) AS row_count FROM src where 2 = 2
+row_count 500 500
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/db/cmp_sum.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/cmp_sum.out.txt b/hplsql/src/test/results/db/cmp_sum.out.txt
new file mode 100644
index 0000000..fad64b7
--- /dev/null
+++ b/hplsql/src/test/results/db/cmp_sum.out.txt
@@ -0,0 +1,320 @@
+Ln:1 CMP
+Ln:1 Query 1: SELECT COUNT(1) AS row_count,
+COUNT(C1) AS C1_COUNT_NOT_NULL,
+SUM(LENGTH(C1)) AS C1_SUM_LENGTH,
+MIN(LENGTH(C1)) AS C1_MIN_LENGTH,
+MAX(LENGTH(C1)) AS C1_MAX_LENGTH,
+COUNT(C2) AS C2_COUNT_NOT_NULL,
+SUM(LENGTH(C2)) AS C2_SUM_LENGTH,
+MIN(LENGTH(C2)) AS C2_MIN_LENGTH,
+MAX(LENGTH(C2)) AS C2_MAX_LENGTH,
+COUNT(C3) AS C3_COUNT_NOT_NULL,
+SUM(LENGTH(C3)) AS C3_SUM_LENGTH,
+MIN(LENGTH(C3)) AS C3_MIN_LENGTH,
+MAX(LENGTH(C3)) AS C3_MAX_LENGTH,
+COUNT(C4) AS C4_COUNT_NOT_NULL,
+SUM(C4) AS C4_SUM,
+MIN(C4) AS C4_MIN,
+MAX(C4) AS C4_MAX,
+COUNT(C5) AS C5_COUNT_NOT_NULL,
+SUM(C5) AS C5_SUM,
+MIN(C5) AS C5_MIN,
+MAX(C5) AS C5_MAX,
+COUNT(C6) AS C6_COUNT_NOT_NULL,
+SUM(C6) AS C6_SUM,
+MIN(C6) AS C6_MIN,
+MAX(C6) AS C6_MAX,
+COUNT(C7) AS C7_COUNT_NOT_NULL,
+SUM(C7) AS C7_SUM,
+MIN(C7) AS C7_MIN,
+MAX(C7) AS C7_MAX,
+COUNT(C8) AS C8_COUNT_NOT_NULL,
+SUM(C8) AS C8_SUM,
+MIN(C8) AS C8_MIN,
+MAX(C8) AS C8_MAX,
+COUNT(C9) AS C9_COUNT_NOT_NULL,
+SUM(C9) AS C9_SUM,
+MIN(C9) AS C9_MIN,
+MAX(C9) AS C9_MAX,
+COUNT(C10) AS C10_COUNT_NOT_NULL,
+SUM(C10) AS C10_SUM,
+MIN(C10) AS C10_MIN,
+MAX(C10) AS C10_MAX,
+COUNT(C11) AS C11_COUNT_NOT_NULL,
+SUM(YEAR(C11)) AS C11_SUM_YEAR,
+SUM(MONTH(C11)) AS C11_SUM_MONTH,
+SUM(DAY(C11)) AS C11_SUM_DAY,
+MIN(C11) AS C11_MIN,
+MAX(C11) AS C11_MAX,
+COUNT(C12) AS C12_COUNT_NOT_NULL,
+SUM(YEAR(C12)) AS C12_SUM_YEAR,
+SUM(MONTH(C12)) AS C12_SUM_MONTH,
+SUM(DAY(C12)) AS C12_SUM_DAY,
+MIN(C12) AS C12_MIN,
+MAX(C12) AS C12_MAX FROM src_dt
+Ln:1 Query 2: SELECT COUNT(1) AS row_count,
+COUNT(C1) AS C1_COUNT_NOT_NULL,
+SUM(LENGTH(C1)) AS C1_SUM_LENGTH,
+MIN(LENGTH(C1)) AS C1_MIN_LENGTH,
+MAX(LENGTH(C1)) AS C1_MAX_LENGTH,
+COUNT(C2) AS C2_COUNT_NOT_NULL,
+SUM(LENGTH(C2)) AS C2_SUM_LENGTH,
+MIN(LENGTH(C2)) AS C2_MIN_LENGTH,
+MAX(LENGTH(C2)) AS C2_MAX_LENGTH,
+COUNT(C3) AS C3_COUNT_NOT_NULL,
+SUM(LENGTH(C3)) AS C3_SUM_LENGTH,
+MIN(LENGTH(C3)) AS C3_MIN_LENGTH,
+MAX(LENGTH(C3)) AS C3_MAX_LENGTH,
+COUNT(C4) AS C4_COUNT_NOT_NULL,
+SUM(C4) AS C4_SUM,
+MIN(C4) AS C4_MIN,
+MAX(C4) AS C4_MAX,
+COUNT(C5) AS C5_COUNT_NOT_NULL,
+SUM(C5) AS C5_SUM,
+MIN(C5) AS C5_MIN,
+MAX(C5) AS C5_MAX,
+COUNT(C6) AS C6_COUNT_NOT_NULL,
+SUM(C6) AS C6_SUM,
+MIN(C6) AS C6_MIN,
+MAX(C6) AS C6_MAX,
+COUNT(C7) AS C7_COUNT_NOT_NULL,
+SUM(C7) AS C7_SUM,
+MIN(C7) AS C7_MIN,
+MAX(C7) AS C7_MAX,
+COUNT(C8) AS C8_COUNT_NOT_NULL,
+SUM(C8) AS C8_SUM,
+MIN(C8) AS C8_MIN,
+MAX(C8) AS C8_MAX,
+COUNT(C9) AS C9_COUNT_NOT_NULL,
+SUM(C9) AS C9_SUM,
+MIN(C9) AS C9_MIN,
+MAX(C9) AS C9_MAX,
+COUNT(C10) AS C10_COUNT_NOT_NULL,
+SUM(C10) AS C10_SUM,
+MIN(C10) AS C10_MIN,
+MAX(C10) AS C10_MAX,
+COUNT(C11) AS C11_COUNT_NOT_NULL,
+SUM(YEAR(C11)) AS C11_SUM_YEAR,
+SUM(MONTH(C11)) AS C11_SUM_MONTH,
+SUM(DAY(C11)) AS C11_SUM_DAY,
+MIN(C11) AS C11_MIN,
+MAX(C11) AS C11_MAX,
+COUNT(C12) AS C12_COUNT_NOT_NULL,
+SUM(YEAR(C12)) AS C12_SUM_YEAR,
+SUM(MONTH(C12)) AS C12_SUM_MONTH,
+SUM(DAY(C12)) AS C12_SUM_DAY,
+MIN(C12) AS C12_MIN,
+MAX(C12) AS C12_MAX FROM src_dt
+row_count 500 500
+c1_count_not_null 500 500
+c1_sum_length 3406 3406
+c1_min_length 5 5
+c1_max_length 7 7
+c2_count_not_null 500 500
+c2_sum_length 3406 3406
+c2_min_length 5 5
+c2_max_length 7 7
+c3_count_not_null 500 500
+c3_sum_length 3406 3406
+c3_min_length 5 5
+c3_max_length 7 7
+c4_count_not_null 106 106
+c4_sum 6697 6697
+c4_min 0 0
+c4_max 126 126
+c5_count_not_null 500 500
+c5_sum 130091 130091
+c5_min 0 0
+c5_max 498 498
+c6_count_not_null 500 500
+c6_sum 130091 130091
+c6_min 0 0
+c6_max 498 498
+c7_count_not_null 500 500
+c7_sum 130091 130091
+c7_min 0 0
+c7_max 498 498
+c8_count_not_null 500 500
+c8_sum 13009.1 13009.1
+c8_min 0 0
+c8_max 49.8 49.8
+c9_count_not_null 500 500
+c9_sum 13009.10001783073 13009.10001783073
+c9_min 0.0 0.0
+c9_max 49.79999923706055 49.79999923706055
+c10_count_not_null 500 500
+c10_sum 13009.09999999999 13009.09999999999
+c10_min 0.0 0.0
+c10_max 49.8 49.8
+c11_count_not_null 500 500
+c11_sum_year 1007500 1007500
+c11_sum_month 4500 4500
+c11_sum_day 3500 3500
+c11_min null null
+c11_max null null
+c12_count_not_null 500 500
+c12_sum_year 1007500 1007500
+c12_sum_month 4500 4500
+c12_sum_day 3500 3500
+c12_min null null
+c12_max null null
+Ln:2 CMP
+Ln:2 Query 1: SELECT COUNT(1) AS row_count,
+COUNT(C1) AS C1_COUNT_NOT_NULL,
+SUM(LENGTH(C1)) AS C1_SUM_LENGTH,
+MIN(LENGTH(C1)) AS C1_MIN_LENGTH,
+MAX(LENGTH(C1)) AS C1_MAX_LENGTH,
+COUNT(C2) AS C2_COUNT_NOT_NULL,
+SUM(LENGTH(C2)) AS C2_SUM_LENGTH,
+MIN(LENGTH(C2)) AS C2_MIN_LENGTH,
+MAX(LENGTH(C2)) AS C2_MAX_LENGTH,
+COUNT(C3) AS C3_COUNT_NOT_NULL,
+SUM(LENGTH(C3)) AS C3_SUM_LENGTH,
+MIN(LENGTH(C3)) AS C3_MIN_LENGTH,
+MAX(LENGTH(C3)) AS C3_MAX_LENGTH,
+COUNT(C4) AS C4_COUNT_NOT_NULL,
+SUM(C4) AS C4_SUM,
+MIN(C4) AS C4_MIN,
+MAX(C4) AS C4_MAX,
+COUNT(C5) AS C5_COUNT_NOT_NULL,
+SUM(C5) AS C5_SUM,
+MIN(C5) AS C5_MIN,
+MAX(C5) AS C5_MAX,
+COUNT(C6) AS C6_COUNT_NOT_NULL,
+SUM(C6) AS C6_SUM,
+MIN(C6) AS C6_MIN,
+MAX(C6) AS C6_MAX,
+COUNT(C7) AS C7_COUNT_NOT_NULL,
+SUM(C7) AS C7_SUM,
+MIN(C7) AS C7_MIN,
+MAX(C7) AS C7_MAX,
+COUNT(C8) AS C8_COUNT_NOT_NULL,
+SUM(C8) AS C8_SUM,
+MIN(C8) AS C8_MIN,
+MAX(C8) AS C8_MAX,
+COUNT(C9) AS C9_COUNT_NOT_NULL,
+SUM(C9) AS C9_SUM,
+MIN(C9) AS C9_MIN,
+MAX(C9) AS C9_MAX,
+COUNT(C10) AS C10_COUNT_NOT_NULL,
+SUM(C10) AS C10_SUM,
+MIN(C10) AS C10_MIN,
+MAX(C10) AS C10_MAX,
+COUNT(C11) AS C11_COUNT_NOT_NULL,
+SUM(YEAR(C11)) AS C11_SUM_YEAR,
+SUM(MONTH(C11)) AS C11_SUM_MONTH,
+SUM(DAY(C11)) AS C11_SUM_DAY,
+MIN(C11) AS C11_MIN,
+MAX(C11) AS C11_MAX,
+COUNT(C12) AS C12_COUNT_NOT_NULL,
+SUM(YEAR(C12)) AS C12_SUM_YEAR,
+SUM(MONTH(C12)) AS C12_SUM_MONTH,
+SUM(DAY(C12)) AS C12_SUM_DAY,
+MIN(C12) AS C12_MIN,
+MAX(C12) AS C12_MAX FROM src_dt where 1 = 1
+Ln:2 Query 2: SELECT COUNT(1) AS row_count,
+COUNT(C1) AS C1_COUNT_NOT_NULL,
+SUM(LENGTH(C1)) AS C1_SUM_LENGTH,
+MIN(LENGTH(C1)) AS C1_MIN_LENGTH,
+MAX(LENGTH(C1)) AS C1_MAX_LENGTH,
+COUNT(C2) AS C2_COUNT_NOT_NULL,
+SUM(LENGTH(C2)) AS C2_SUM_LENGTH,
+MIN(LENGTH(C2)) AS C2_MIN_LENGTH,
+MAX(LENGTH(C2)) AS C2_MAX_LENGTH,
+COUNT(C3) AS C3_COUNT_NOT_NULL,
+SUM(LENGTH(C3)) AS C3_SUM_LENGTH,
+MIN(LENGTH(C3)) AS C3_MIN_LENGTH,
+MAX(LENGTH(C3)) AS C3_MAX_LENGTH,
+COUNT(C4) AS C4_COUNT_NOT_NULL,
+SUM(C4) AS C4_SUM,
+MIN(C4) AS C4_MIN,
+MAX(C4) AS C4_MAX,
+COUNT(C5) AS C5_COUNT_NOT_NULL,
+SUM(C5) AS C5_SUM,
+MIN(C5) AS C5_MIN,
+MAX(C5) AS C5_MAX,
+COUNT(C6) AS C6_COUNT_NOT_NULL,
+SUM(C6) AS C6_SUM,
+MIN(C6) AS C6_MIN,
+MAX(C6) AS C6_MAX,
+COUNT(C7) AS C7_COUNT_NOT_NULL,
+SUM(C7) AS C7_SUM,
+MIN(C7) AS C7_MIN,
+MAX(C7) AS C7_MAX,
+COUNT(C8) AS C8_COUNT_NOT_NULL,
+SUM(C8) AS C8_SUM,
+MIN(C8) AS C8_MIN,
+MAX(C8) AS C8_MAX,
+COUNT(C9) AS C9_COUNT_NOT_NULL,
+SUM(C9) AS C9_SUM,
+MIN(C9) AS C9_MIN,
+MAX(C9) AS C9_MAX,
+COUNT(C10) AS C10_COUNT_NOT_NULL,
+SUM(C10) AS C10_SUM,
+MIN(C10) AS C10_MIN,
+MAX(C10) AS C10_MAX,
+COUNT(C11) AS C11_COUNT_NOT_NULL,
+SUM(YEAR(C11)) AS C11_SUM_YEAR,
+SUM(MONTH(C11)) AS C11_SUM_MONTH,
+SUM(DAY(C11)) AS C11_SUM_DAY,
+MIN(C11) AS C11_MIN,
+MAX(C11) AS C11_MAX,
+COUNT(C12) AS C12_COUNT_NOT_NULL,
+SUM(YEAR(C12)) AS C12_SUM_YEAR,
+SUM(MONTH(C12)) AS C12_SUM_MONTH,
+SUM(DAY(C12)) AS C12_SUM_DAY,
+MIN(C12) AS C12_MIN,
+MAX(C12) AS C12_MAX FROM src_dt
+row_count 500 500
+c1_count_not_null 500 500
+c1_sum_length 3406 3406
+c1_min_length 5 5
+c1_max_length 7 7
+c2_count_not_null 500 500
+c2_sum_length 3406 3406
+c2_min_length 5 5
+c2_max_length 7 7
+c3_count_not_null 500 500
+c3_sum_length 3406 3406
+c3_min_length 5 5
+c3_max_length 7 7
+c4_count_not_null 106 106
+c4_sum 6697 6697
+c4_min 0 0
+c4_max 126 126
+c5_count_not_null 500 500
+c5_sum 130091 130091
+c5_min 0 0
+c5_max 498 498
+c6_count_not_null 500 500
+c6_sum 130091 130091
+c6_min 0 0
+c6_max 498 498
+c7_count_not_null 500 500
+c7_sum 130091 130091
+c7_min 0 0
+c7_max 498 498
+c8_count_not_null 500 500
+c8_sum 13009.1 13009.1
+c8_min 0 0
+c8_max 49.8 49.8
+c9_count_not_null 500 500
+c9_sum 13009.10001783073 13009.10001783073
+c9_min 0.0 0.0
+c9_max 49.79999923706055 49.79999923706055
+c10_count_not_null 500 500
+c10_sum 13009.09999999999 13009.09999999999
+c10_min 0.0 0.0
+c10_max 49.8 49.8
+c11_count_not_null 500 500
+c11_sum_year 1007500 1007500
+c11_sum_month 4500 4500
+c11_sum_day 3500 3500
+c11_min null null
+c11_max null null
+c12_count_not_null 500 500
+c12_sum_year 1007500 1007500
+c12_sum_month 4500 4500
+c12_sum_day 3500 3500
+c12_min null null
+c12_max null null
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/db/copy_to_file.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/copy_to_file.out.txt b/hplsql/src/test/results/db/copy_to_file.out.txt
new file mode 100644
index 0000000..e571d36
--- /dev/null
+++ b/hplsql/src/test/results/db/copy_to_file.out.txt
@@ -0,0 +1,6 @@
+Ln:1 COPY
+Ln:1 Query executed: 2 columns, output file: target/tmp/src.txt
+Ln:2 COPY
+Ln:2 Statement:
+select * from src
+Ln:2 Query executed: 2 columns, output file: target/tmp/src2.txt
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/db/copy_to_hdfs.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/copy_to_hdfs.out.txt b/hplsql/src/test/results/db/copy_to_hdfs.out.txt
new file mode 100644
index 0000000..23c0cb2
--- /dev/null
+++ b/hplsql/src/test/results/db/copy_to_hdfs.out.txt
@@ -0,0 +1,4 @@
+Ln:2 COPY
+Ln:2 Statement:
+select * from src
+Ln:2 Query executed: 2 columns, output file: /user/hplsql/src2.txt
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/db/copy_to_table.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/copy_to_table.out.txt b/hplsql/src/test/results/db/copy_to_table.out.txt
new file mode 100644
index 0000000..411b425
--- /dev/null
+++ b/hplsql/src/test/results/db/copy_to_table.out.txt
@@ -0,0 +1,2 @@
+Ln:1 COPY
+Ln:1 SELECT executed: 2 columns
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/db/part_count.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/part_count.out.txt b/hplsql/src/test/results/db/part_count.out.txt
new file mode 100644
index 0000000..485ffe1
--- /dev/null
+++ b/hplsql/src/test/results/db/part_count.out.txt
@@ -0,0 +1,15 @@
+Ln:1 IF
+Ln:1 Query: SHOW PARTITIONS partition_date_1
+Ln:1 IF TRUE executed
+Ln:2 PRINT
+success
+Ln:7 IF
+Ln:7 Query: SHOW PARTITIONS partition_date_1 PARTITION (region='1')
+Ln:7 IF TRUE executed
+Ln:8 PRINT
+success
+Ln:13 IF
+Ln:13 Query: SHOW PARTITIONS partition_date_1a
+Ln:13 IF TRUE executed
+Ln:14 PRINT
+success
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/db/part_count_by.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/part_count_by.out.txt b/hplsql/src/test/results/db/part_count_by.out.txt
new file mode 100644
index 0000000..61f51cd
--- /dev/null
+++ b/hplsql/src/test/results/db/part_count_by.out.txt
@@ -0,0 +1,13 @@
+3
+dt=2000-01-01 2
+dt=2013-12-10 1
+dt=2013-08-08 2
+dt=2013-08-08/region=1 1
+dt=2000-01-01/region=1 1
+dt=2013-12-10/region=2020-20-20 1
+dt=2000-01-01/region=2 1
+dt=2013-08-08/region=10 1
+region=10 1
+region=2020-20-20 1
+region=2 1
+region=1 2
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/db/select_into.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/select_into.out.txt b/hplsql/src/test/results/db/select_into.out.txt
index 80d067e..3f4ae31 100644
--- a/hplsql/src/test/results/db/select_into.out.txt
+++ b/hplsql/src/test/results/db/select_into.out.txt
@@ -1,19 +1,43 @@
-Ln:1 DECLARE v_int INT
-Ln:2 DECLARE v_dec DECIMAL
-Ln:3 DECLARE v_dec0 DECIMAL
-Ln:5 SELECT
-Ln:5 SELECT CAST(1 AS INT), CAST(1.1 AS DECIMAL(18,2)), CAST(1.1 AS DECIMAL(18,0)) FROM src LIMIT 1
-Ln:5 SELECT completed successfully
-Ln:5 SELECT INTO statement executed
-Ln:5 COLUMN: _c0, int
-Ln:5 SET v_int = 1
-Ln:5 COLUMN: _c1, decimal
-Ln:5 SET v_dec = 1.1
-Ln:5 COLUMN: _c2, decimal
-Ln:5 SET v_dec0 = 1
-Ln:15 PRINT
+Ln:1 DECLARE v_bint BIGINT
+Ln:2 DECLARE v_int INT
+Ln:3 DECLARE v_sint SMALLINT
+Ln:4 DECLARE v_tint TINYINT
+Ln:5 DECLARE v_dec DECIMAL
+Ln:6 DECLARE v_dec0 DECIMAL
+Ln:7 DECLARE v_str STRING
+Ln:9 SELECT
+Ln:9 SELECT CAST(1 AS BIGINT), CAST(1 AS INT), CAST(1 AS SMALLINT), CAST(1 AS TINYINT), CAST(1.1 AS DECIMAL(18,2)), CAST(1.1 AS DECIMAL(18,0)) FROM src LIMIT 1
+Ln:9 SELECT completed successfully
+Ln:9 SELECT INTO statement executed
+Ln:9 COLUMN: _c0, bigint
+Ln:9 SET v_bint = 1
+Ln:9 COLUMN: _c1, int
+Ln:9 SET v_int = 1
+Ln:9 COLUMN: _c2, smallint
+Ln:9 SET v_sint = 1
+Ln:9 COLUMN: _c3, tinyint
+Ln:9 SET v_tint = 1
+Ln:9 COLUMN: _c4, decimal
+Ln:9 SET v_dec = 1.1
+Ln:9 COLUMN: _c5, decimal
+Ln:9 SET v_dec0 = 1
+Ln:25 PRINT
+BIGINT: 1
+Ln:26 PRINT
INT: 1
-Ln:16 PRINT
+Ln:27 PRINT
+SMALLINT: 1
+Ln:28 PRINT
+TINYINT: 1
+Ln:29 PRINT
DECIMAL: 1.1
-Ln:17 PRINT
-DECIMAL0: 1
\ No newline at end of file
+Ln:30 PRINT
+DECIMAL0: 1
+Ln:32 SELECT
+Ln:32 select 'a' from src LIMIT 1
+Ln:32 SELECT completed successfully
+Ln:32 SELECT INTO statement executed
+Ln:32 COLUMN: _c0, string
+Ln:32 SET v_str = a
+Ln:33 PRINT
+string: a
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/db/select_into2.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/db/select_into2.out.txt b/hplsql/src/test/results/db/select_into2.out.txt
new file mode 100644
index 0000000..03e67ad
--- /dev/null
+++ b/hplsql/src/test/results/db/select_into2.out.txt
@@ -0,0 +1,19 @@
+Ln:1 DECLARE v_float float
+Ln:2 DECLARE v_double double
+Ln:3 DECLARE v_double2 double precision
+Ln:5 SELECT
+Ln:5 select cast(1.1 as float), cast(1.1 as double), cast(1.1 as double) from src LIMIT 1
+Ln:5 SELECT completed successfully
+Ln:5 SELECT INTO statement executed
+Ln:5 COLUMN: _c0, float
+Ln:5 SET v_float = 1.100000023841858
+Ln:5 COLUMN: _c1, double
+Ln:5 SET v_double = 1.1
+Ln:5 COLUMN: _c2, double
+Ln:5 SET v_double2 = 1.1
+Ln:15 PRINT
+float: 1.100000023841858
+Ln:16 PRINT
+double: 1.1
+Ln:17 PRINT
+double precision: 1.1
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/local/create_procedure2.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/local/create_procedure2.out.txt b/hplsql/src/test/results/local/create_procedure2.out.txt
new file mode 100644
index 0000000..765faa9
--- /dev/null
+++ b/hplsql/src/test/results/local/create_procedure2.out.txt
@@ -0,0 +1,10 @@
+Ln:1 CREATE PROCEDURE set_message
+Ln:13 DECLARE str STRING
+Ln:14 EXEC PROCEDURE set_message
+Ln:14 SET PARAM name = world
+Ln:14 SET PARAM result = null
+Ln:3 DECLARE str STRING = 'Hello, world!'
+Ln:5 DECLARE HANDLER
+Ln:9 SET result = 'Hello, world!'
+Ln:15 PRINT
+Hello, world!
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/local/if2.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/local/if2.out.txt b/hplsql/src/test/results/local/if2.out.txt
new file mode 100644
index 0000000..63a6213
--- /dev/null
+++ b/hplsql/src/test/results/local/if2.out.txt
@@ -0,0 +1,4 @@
+Ln:1 IF
+Ln:1 IF TRUE executed
+Ln:2 PRINT
+correct
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/local/include.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/local/include.out.txt b/hplsql/src/test/results/local/include.out.txt
new file mode 100644
index 0000000..86cfa05
--- /dev/null
+++ b/hplsql/src/test/results/local/include.out.txt
@@ -0,0 +1,8 @@
+Ln:1 INCLUDE src/test/queries/local/include_file.sql
+INLCUDE CONTENT src/test/queries/local/include_file.sql (non-empty)
+Ln:1 PRINT
+file included successfully
+Ln:2 INCLUDE src/test/queries/local/include_file.sql
+INLCUDE CONTENT src/test/queries/local/include_file.sql (non-empty)
+Ln:1 PRINT
+file included successfully
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/local/mult_div.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/local/mult_div.out.txt b/hplsql/src/test/results/local/mult_div.out.txt
new file mode 100644
index 0000000..cd17c16
--- /dev/null
+++ b/hplsql/src/test/results/local/mult_div.out.txt
@@ -0,0 +1,7 @@
+Ln:1 DECLARE a int = 8
+Ln:2 DECLARE b int = 4
+Ln:3 DECLARE c int = 2
+Ln:5 PRINT
+1
+Ln:7 SET a = 1
+Ln:8 SET b = 1
\ No newline at end of file
[05/50] [abbrv] hive git commit: HIVE-11815 : Correct the
column/table names in subquery expression when creating a view (Pengcheng
Xiong, reviewed by Ashutosh Chauhan)
Posted by xu...@apache.org.
HIVE-11815 : Correct the column/table names in subquery expression when creating a view (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8da2ed30
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8da2ed30
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8da2ed30
Branch: refs/heads/beeline-cli
Commit: 8da2ed304891dc8483fe3d78eda4c9f70c54ae18
Parents: a12e5f5
Author: Pengcheng Xiong <px...@apache.org>
Authored: Thu Sep 17 13:20:00 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Thu Sep 17 13:20:00 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/parse/QBSubQuery.java | 7 --
.../hadoop/hive/ql/parse/SubQueryUtils.java | 11 --
.../queries/clientpositive/subquery_views.q | 22 +++-
.../subquery_exists_implicit_gby.q.out | 8 +-
.../subquery_nested_subquery.q.out | 4 +-
.../subquery_notexists_implicit_gby.q.out | 8 +-
.../subquery_windowing_corr.q.out | 7 +-
.../results/clientpositive/subquery_views.q.out | 116 +++++++++++++++++++
8 files changed, 141 insertions(+), 42 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
index 92cbabc..f95ee8d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
@@ -401,7 +401,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
CNT_ALIAS,
subQryCorrExprs,
sqRR);
- SubQueryUtils.setOriginDeep(ast, QBSubQuery.this.originalSQASTOrigin);
return ast;
}
@@ -416,7 +415,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
public ASTNode getJoinConditionAST() {
ASTNode ast =
SubQueryUtils.buildNotInNullJoinCond(getAlias(), CNT_ALIAS);
- SubQueryUtils.setOriginDeep(ast, QBSubQuery.this.originalSQASTOrigin);
return ast;
}
@@ -576,8 +574,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
rewrite(outerQueryRR, forHavingClause, outerQueryAlias, insertClause, selectClause);
- SubQueryUtils.setOriginDeep(subQueryAST, originalSQASTOrigin);
-
/*
* Restriction.13.m :: In the case of an implied Group By on a
* correlated SubQuery, the SubQuery always returns 1 row.
@@ -696,8 +692,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
}
}
- SubQueryUtils.setOriginDeep(joinConditionAST, originalSQASTOrigin);
- SubQueryUtils.setOriginDeep(postJoinConditionAST, originalSQASTOrigin);
}
ASTNode updateOuterQueryFilter(ASTNode outerQryFilter) {
@@ -711,7 +705,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
return postJoinConditionAST;
}
ASTNode node = SubQueryUtils.andAST(outerQryFilter, postJoinConditionAST);
- node.setOrigin(originalSQASTOrigin);
return node;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
index 87a7ced..362a285 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
@@ -467,17 +467,6 @@ public class SubQueryUtils {
return check;
}
- static void setOriginDeep(ASTNode node, ASTNodeOrigin origin) {
- if ( node == null ) {
- return;
- }
- node.setOrigin(origin);
- int childCnt = node.getChildCount();
- for(int i=0; i<childCnt; i++) {
- setOriginDeep((ASTNode)node.getChild(i), origin);
- }
- }
-
/*
* Set of functions to create the Null Check Query for Not-In SubQuery predicates.
* For a SubQuery predicate like:
http://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/test/queries/clientpositive/subquery_views.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_views.q b/ql/src/test/queries/clientpositive/subquery_views.q
index f15d41b..e646310 100644
--- a/ql/src/test/queries/clientpositive/subquery_views.q
+++ b/ql/src/test/queries/clientpositive/subquery_views.q
@@ -10,6 +10,8 @@ where exists
where b.value = a.value and a.key = b.key and a.value > 'val_9')
;
+describe extended cv1;
+
select *
from cv1 where cv1.key in (select key from cv1 c where c.key > '95');
;
@@ -26,6 +28,8 @@ where b.key not in
)
;
+describe extended cv2;
+
explain
select *
from cv2 where cv2.key in (select key from cv2 c where c.key < '11');
@@ -44,10 +48,26 @@ group by key, value
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
+describe extended cv3;
+
select * from cv3;
-- join of subquery views
select *
from cv3
-where cv3.key in (select key from cv1);
\ No newline at end of file
+where cv3.key in (select key from cv1);
+
+drop table tc;
+
+create table tc (`@d` int);
+
+insert overwrite table tc select 1 from src limit 1;
+
+drop view tcv;
+
+create view tcv as select * from tc b where exists (select a.`@d` from tc a where b.`@d`=a.`@d`);
+
+describe extended tcv;
+
+select * from tcv;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/test/results/clientnegative/subquery_exists_implicit_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_exists_implicit_gby.q.out b/ql/src/test/results/clientnegative/subquery_exists_implicit_gby.q.out
index 4830c00..f7251e3 100644
--- a/ql/src/test/results/clientnegative/subquery_exists_implicit_gby.q.out
+++ b/ql/src/test/results/clientnegative/subquery_exists_implicit_gby.q.out
@@ -1,7 +1 @@
-FAILED: SemanticException Line 7:7 Invalid SubQuery expression 'key' in definition of SubQuery sq_1 [
-exists
- (select count(*)
- from src a
- where b.value = a.value and a.key = b.key and a.value > 'val_9'
- )
-] used as sq_1 at Line 5:6: An Exists predicate on SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten. (predicate will always return true).
+FAILED: SemanticException [Error 10250]: Line 7:7 Invalid SubQuery expression 'key': An Exists predicate on SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten. (predicate will always return true).
http://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out b/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out
index ae3bc8f..140b093 100644
--- a/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out
+++ b/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out
@@ -1,3 +1 @@
-FAILED: SemanticException Line 3:53 Unsupported SubQuery Expression 'p_name' in definition of SubQuery sq_1 [
-x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name))
-] used as sq_1 at Line 3:15: Nested SubQuery expressions are not supported.
+FAILED: SemanticException [Error 10249]: Line 3:53 Unsupported SubQuery Expression 'p_name': Nested SubQuery expressions are not supported.
http://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/test/results/clientnegative/subquery_notexists_implicit_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_notexists_implicit_gby.q.out b/ql/src/test/results/clientnegative/subquery_notexists_implicit_gby.q.out
index 74422af..6d9fa0a 100644
--- a/ql/src/test/results/clientnegative/subquery_notexists_implicit_gby.q.out
+++ b/ql/src/test/results/clientnegative/subquery_notexists_implicit_gby.q.out
@@ -1,7 +1 @@
-FAILED: SemanticException Line 7:7 Invalid SubQuery expression 'key' in definition of SubQuery sq_1 [
-exists
- (select sum(1)
- from src a
- where b.value = a.value and a.key = b.key and a.value > 'val_9'
- )
-] used as sq_1 at Line 5:10: A Not Exists predicate on SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten. (predicate will always return false).
+FAILED: SemanticException [Error 10250]: Line 7:7 Invalid SubQuery expression 'key': A Not Exists predicate on SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten. (predicate will always return false).
http://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/test/results/clientnegative/subquery_windowing_corr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_windowing_corr.q.out b/ql/src/test/results/clientnegative/subquery_windowing_corr.q.out
index 647a535..dcd3026 100644
--- a/ql/src/test/results/clientnegative/subquery_windowing_corr.q.out
+++ b/ql/src/test/results/clientnegative/subquery_windowing_corr.q.out
@@ -1,6 +1 @@
-FAILED: SemanticException Line 6:8 Unsupported SubQuery Expression '1' in definition of SubQuery sq_1 [
-a.p_size in
- (select first_value(p_size) over(partition by p_mfgr order by p_size)
- from part b
- where a.p_brand = b.p_brand)
-] used as sq_1 at Line 4:15: Correlated Sub Queries cannot contain Windowing clauses.
+FAILED: SemanticException [Error 10249]: Line 6:8 Unsupported SubQuery Expression '1': Correlated Sub Queries cannot contain Windowing clauses.
http://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/test/results/clientpositive/subquery_views.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_views.q.out b/ql/src/test/results/clientpositive/subquery_views.q.out
index cfa7339..470fa83 100644
--- a/ql/src/test/results/clientpositive/subquery_views.q.out
+++ b/ql/src/test/results/clientpositive/subquery_views.q.out
@@ -26,6 +26,26 @@ POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@src
POSTHOOK: Output: database:default
POSTHOOK: Output: default@cv1
+PREHOOK: query: describe extended cv1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@cv1
+POSTHOOK: query: describe extended cv1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@cv1
+key string
+value string
+
+#### A masked pattern was here ####
+from src b
+where exists
+ (select a.key
+ from src a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9'), viewExpandedText:select `b`.`key`, `b`.`value`
+from `default`.`src` `b`
+where exists
+ (select `a`.`key`
+ from `default`.`src` `a`
+ where `b`.`value` = `a`.`value` and `a`.`key` = `b`.`key` and `a`.`value` > 'val_9'), tableType:VIRTUAL_VIEW)
PREHOOK: query: select *
from cv1 where cv1.key in (select key from cv1 c where c.key > '95')
PREHOOK: type: QUERY
@@ -69,6 +89,28 @@ POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@src
POSTHOOK: Output: database:default
POSTHOOK: Output: default@cv2
+PREHOOK: query: describe extended cv2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@cv2
+POSTHOOK: query: describe extended cv2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@cv2
+key string
+value string
+
+#### A masked pattern was here ####
+from src b
+where b.key not in
+ (select a.key
+ from src a
+ where b.value = a.value and a.key = b.key and a.value > 'val_11'
+ ), viewExpandedText:select `b`.`key`, `b`.`value`
+from `default`.`src` `b`
+where `b`.`key` not in
+ (select `a`.`key`
+ from `default`.`src` `a`
+ where `b`.`value` = `a`.`value` and `a`.`key` = `b`.`key` and `a`.`value` > 'val_11'
+ ), tableType:VIRTUAL_VIEW)
Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
PREHOOK: query: explain
@@ -425,6 +467,25 @@ POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@src
POSTHOOK: Output: database:default
POSTHOOK: Output: default@cv3
+PREHOOK: query: describe extended cv3
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@cv3
+POSTHOOK: query: describe extended cv3
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@cv3
+key string
+value string
+_c2 bigint
+
+#### A masked pattern was here ####
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ), viewExpandedText:select `b`.`key`, `b`.`value`, count(*)
+from `default`.`src` `b`
+where `b`.`key` in (select `src`.`key` from `default`.`src` where `src`.`key` > '8')
+group by `b`.`key`, `b`.`value`
+having count(*) in (select count(*) from `default`.`src` `s1` where `s1`.`key` > '9' group by `s1`.`key` ), tableType:VIRTUAL_VIEW)
PREHOOK: query: select * from cv3
PREHOOK: type: QUERY
PREHOOK: Input: default@cv3
@@ -473,3 +534,58 @@ POSTHOOK: Input: default@src
96 val_96 1
97 val_97 2
98 val_98 2
+PREHOOK: query: drop table tc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table tc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tc (`@d` int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tc
+POSTHOOK: query: create table tc (`@d` int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tc
+PREHOOK: query: insert overwrite table tc select 1 from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tc
+POSTHOOK: query: insert overwrite table tc select 1 from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tc
+POSTHOOK: Lineage: tc.@d SIMPLE []
+PREHOOK: query: drop view tcv
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view tcv
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: create view tcv as select * from tc b where exists (select a.`@d` from tc a where b.`@d`=a.`@d`)
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@tc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tcv
+POSTHOOK: query: create view tcv as select * from tc b where exists (select a.`@d` from tc a where b.`@d`=a.`@d`)
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@tc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tcv
+PREHOOK: query: describe extended tcv
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tcv
+POSTHOOK: query: describe extended tcv
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tcv
+@d int
+
+#### A masked pattern was here ####
+PREHOOK: query: select * from tcv
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tc
+PREHOOK: Input: default@tcv
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tcv
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tc
+POSTHOOK: Input: default@tcv
+#### A masked pattern was here ####
+1
[24/50] [abbrv] hive git commit: HIVE-11711: Merge hbase-metastore
branch to trunk
Posted by xu...@apache.org.
HIVE-11711: Merge hbase-metastore branch to trunk
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4c17ecfd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4c17ecfd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4c17ecfd
Branch: refs/heads/beeline-cli
Commit: 4c17ecfda525f2f65a29dab40563c50267e46eba
Parents: 76828e0
Author: Daniel Dai <da...@hortonworks.com>
Authored: Mon Sep 21 21:54:52 2015 -0700
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Mon Sep 21 21:54:52 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4c17ecfd/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 1d98766..0d07173 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -177,7 +177,23 @@ public class HiveConf extends Configuration {
HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT,
HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT,
HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL
+ HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL,
+ HiveConf.ConfVars.METASTORE_FASTPATH,
+ HiveConf.ConfVars.METASTORE_HBASE_CATALOG_CACHE_SIZE,
+ HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE,
+ HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS,
+ HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY,
+ HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE,
+ HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE,
+ HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_WRITER_WAIT,
+ HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_READER_WAIT,
+ HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_FULL,
+ HiveConf.ConfVars.METASTORE_HBASE_CACHE_CLEAN_UNTIL,
+ HiveConf.ConfVars.METASTORE_HBASE_CONNECTION_CLASS,
+ HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES,
+ HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_MEMORY_TTL,
+ HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY,
+ HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_HBASE_TTL
};
/**
[46/50] [abbrv] hive git commit: HIVE-11517 Vectorized
auto_smb_mapjoin_14.q produces different results (Matt McCline,
reviewed by Vikram Dixit K)
Posted by xu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/461e38ec/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out
new file mode 100644
index 0000000..827e6b5
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out
@@ -0,0 +1,1792 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl1
+PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl2
+PREHOOK: query: insert overwrite table tbl1
+select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: insert overwrite table tbl1
+select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl1
+POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table tbl2
+select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: insert overwrite table tbl2
+select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl2
+POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+22
+PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+6
+PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+0 9 9
+2 1 1
+4 1 1
+5 9 9
+8 1 1
+9 1 1
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 key (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 8) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
+-- join should be performed
+explain
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
+-- join should be performed
+explain
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (key + 1) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (key + 1) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+22
+PREHOOK: query: -- One of the tables is a sub-query and the other is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- One of the tables is a sub-query and the other is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 key (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+56
+PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 key (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: CREATE TABLE dest1(key int, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(key int, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest2
+POSTHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest2
+PREHOOK: query: -- The join is followed by a multi-table insert. It should be converted to
+-- a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is followed by a multi-table insert. It should be converted to
+-- a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+ Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12
+ Stage-11
+ Stage-1 depends on stages: Stage-11, Stage-10, Stage-13
+ Stage-9 depends on stages: Stage-1
+ Stage-10
+ Stage-12
+ Stage-13 depends on stages: Stage-12
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col6
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+ Execution mode: vectorized
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-14
+ Conditional Operator
+
+ Stage: Stage-11
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-9
+ Stats-Aggr Operator
+
+ Stage: Stage-10
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-12
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-13
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+2 val_2
+4 val_4
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+8 val_8
+9 val_9
+PREHOOK: query: select * from dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+2 val_2 val_2
+4 val_4 val_4
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+8 val_8 val_8
+9 val_9 val_9
+PREHOOK: query: DROP TABLE dest2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest2
+PREHOOK: Output: default@dest2
+POSTHOOK: query: DROP TABLE dest2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest2
+POSTHOOK: Output: default@dest2
+PREHOOK: query: CREATE TABLE dest2(key int, cnt int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest2
+POSTHOOK: query: CREATE TABLE dest2(key int, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest2
+PREHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer.
+-- It should be converted to a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer.
+-- It should be converted to a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+ Stage-1 depends on stages: Stage-2
+ Stage-9 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-9
+ Stats-Aggr Operator
+
+PREHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(tbl1)a.null, (tbl2)b.null, ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+2 val_2
+4 val_4
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+8 val_8
+9 val_9
+PREHOOK: query: select * from dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+0 9
+2 1
+4 1
+5 9
+8 1
+9 1
[08/50] [abbrv] hive git commit: HIVE-11825 : get_json_object(col,
'$.a') is null in where clause didnt work (Cazen Lee via Ashutosh
Chauhan)
Posted by xu...@apache.org.
HIVE-11825 : get_json_object(col,'$.a') is null in where clause didnt work (Cazen Lee via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c7de9b9f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c7de9b9f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c7de9b9f
Branch: refs/heads/beeline-cli
Commit: c7de9b9f5f627c3f3eef25ca783e88ccd7fa3ff6
Parents: 2278548
Author: Cazen Lee <Ca...@samsung.com>
Authored: Wed Sep 16 01:04:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Sep 18 09:59:31 2015 -0700
----------------------------------------------------------------------
ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java | 2 ++
1 file changed, 2 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c7de9b9f/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
index e69ad68..2c42fae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
@@ -66,6 +66,8 @@ public class UDFJson extends UDF {
static {
// Allows for unescaped ASCII control characters in JSON values
JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
+ // Enabled to accept quoting of all character backslash qooting mechanism
+ JSON_FACTORY.enable(Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER);
}
private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY);
private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class);
[49/50] [abbrv] hive git commit: HIVE-11831 : TXN tables in Oracle
should be created with ROWDEPENDENCIES (Sergey Shelukhin,
reviewed by Alan Gates)
Posted by xu...@apache.org.
HIVE-11831 : TXN tables in Oracle should be created with ROWDEPENDENCIES (Sergey Shelukhin, reviewed by Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/41a12cb2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/41a12cb2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/41a12cb2
Branch: refs/heads/beeline-cli
Commit: 41a12cb26789c94be22fa2936fc4ca41b3e675ba
Parents: 7b92f44
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Sep 24 18:09:23 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Sep 24 18:09:23 2015 -0700
----------------------------------------------------------------------
.../scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql | 10 +++++-----
.../scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql | 10 +++++-----
.../upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql | 10 +++++-----
.../upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql | 10 +++++-----
4 files changed, 20 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/41a12cb2/metastore/scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql
index 6bd8df9..014b7c0 100644
--- a/metastore/scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql
@@ -766,21 +766,21 @@ CREATE TABLE TXNS (
TXN_LAST_HEARTBEAT NUMBER(19) NOT NULL,
TXN_USER varchar(128) NOT NULL,
TXN_HOST varchar(128) NOT NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE TXN_COMPONENTS (
TC_TXNID NUMBER(19) REFERENCES TXNS (TXN_ID),
TC_DATABASE VARCHAR2(128) NOT NULL,
TC_TABLE VARCHAR2(128),
TC_PARTITION VARCHAR2(767) NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE COMPLETED_TXN_COMPONENTS (
CTC_TXNID NUMBER(19),
CTC_DATABASE varchar(128) NOT NULL,
CTC_TABLE varchar(128),
CTC_PARTITION varchar(767)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_TXN_ID (
NTXN_NEXT NUMBER(19) NOT NULL
@@ -801,7 +801,7 @@ CREATE TABLE HIVE_LOCKS (
HL_USER varchar(128) NOT NULL,
HL_HOST varchar(128) NOT NULL,
PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID)
-);
+) ROWDEPENDENCIES;
CREATE INDEX HL_TXNID_INDEX ON HIVE_LOCKS (HL_TXNID);
@@ -820,7 +820,7 @@ CREATE TABLE COMPACTION_QUEUE (
CQ_WORKER_ID varchar(128),
CQ_START NUMBER(19),
CQ_RUN_AS varchar(128)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_COMPACTION_QUEUE_ID (
NCQ_NEXT NUMBER(19) NOT NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/41a12cb2/metastore/scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql
index f1f71ce..ec9abba 100644
--- a/metastore/scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql
@@ -766,21 +766,21 @@ CREATE TABLE TXNS (
TXN_LAST_HEARTBEAT NUMBER(19) NOT NULL,
TXN_USER varchar(128) NOT NULL,
TXN_HOST varchar(128) NOT NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE TXN_COMPONENTS (
TC_TXNID NUMBER(19) REFERENCES TXNS (TXN_ID),
TC_DATABASE VARCHAR2(128) NOT NULL,
TC_TABLE VARCHAR2(128),
TC_PARTITION VARCHAR2(767) NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE COMPLETED_TXN_COMPONENTS (
CTC_TXNID NUMBER(19),
CTC_DATABASE varchar(128) NOT NULL,
CTC_TABLE varchar(128),
CTC_PARTITION varchar(767)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_TXN_ID (
NTXN_NEXT NUMBER(19) NOT NULL
@@ -801,7 +801,7 @@ CREATE TABLE HIVE_LOCKS (
HL_USER varchar(128) NOT NULL,
HL_HOST varchar(128) NOT NULL,
PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID)
-);
+) ROWDEPENDENCIES;
CREATE INDEX HL_TXNID_INDEX ON HIVE_LOCKS (HL_TXNID);
@@ -820,7 +820,7 @@ CREATE TABLE COMPACTION_QUEUE (
CQ_WORKER_ID varchar(128),
CQ_START NUMBER(19),
CQ_RUN_AS varchar(128)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_COMPACTION_QUEUE_ID (
NCQ_NEXT NUMBER(19) NOT NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/41a12cb2/metastore/scripts/upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql
index 7435ea8..58e53c4 100644
--- a/metastore/scripts/upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql
@@ -24,21 +24,21 @@ CREATE TABLE TXNS (
TXN_LAST_HEARTBEAT NUMBER(19) NOT NULL,
TXN_USER varchar(128) NOT NULL,
TXN_HOST varchar(128) NOT NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE TXN_COMPONENTS (
TC_TXNID NUMBER(19) REFERENCES TXNS (TXN_ID),
TC_DATABASE VARCHAR2(128) NOT NULL,
TC_TABLE VARCHAR2(128),
TC_PARTITION VARCHAR2(767) NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE COMPLETED_TXN_COMPONENTS (
CTC_TXNID NUMBER(19),
CTC_DATABASE varchar(128) NOT NULL,
CTC_TABLE varchar(128),
CTC_PARTITION varchar(767)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_TXN_ID (
NTXN_NEXT NUMBER(19) NOT NULL
@@ -59,7 +59,7 @@ CREATE TABLE HIVE_LOCKS (
HL_USER varchar(128) NOT NULL,
HL_HOST varchar(128) NOT NULL,
PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID)
-);
+) ROWDEPENDENCIES;
CREATE INDEX HL_TXNID_INDEX ON HIVE_LOCKS (HL_TXNID);
@@ -78,7 +78,7 @@ CREATE TABLE COMPACTION_QUEUE (
CQ_WORKER_ID varchar(128),
CQ_START NUMBER(19),
CQ_RUN_AS varchar(128)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_COMPACTION_QUEUE_ID (
NCQ_NEXT NUMBER(19) NOT NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/41a12cb2/metastore/scripts/upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql
index 7435ea8..58e53c4 100644
--- a/metastore/scripts/upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql
@@ -24,21 +24,21 @@ CREATE TABLE TXNS (
TXN_LAST_HEARTBEAT NUMBER(19) NOT NULL,
TXN_USER varchar(128) NOT NULL,
TXN_HOST varchar(128) NOT NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE TXN_COMPONENTS (
TC_TXNID NUMBER(19) REFERENCES TXNS (TXN_ID),
TC_DATABASE VARCHAR2(128) NOT NULL,
TC_TABLE VARCHAR2(128),
TC_PARTITION VARCHAR2(767) NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE COMPLETED_TXN_COMPONENTS (
CTC_TXNID NUMBER(19),
CTC_DATABASE varchar(128) NOT NULL,
CTC_TABLE varchar(128),
CTC_PARTITION varchar(767)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_TXN_ID (
NTXN_NEXT NUMBER(19) NOT NULL
@@ -59,7 +59,7 @@ CREATE TABLE HIVE_LOCKS (
HL_USER varchar(128) NOT NULL,
HL_HOST varchar(128) NOT NULL,
PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID)
-);
+) ROWDEPENDENCIES;
CREATE INDEX HL_TXNID_INDEX ON HIVE_LOCKS (HL_TXNID);
@@ -78,7 +78,7 @@ CREATE TABLE COMPACTION_QUEUE (
CQ_WORKER_ID varchar(128),
CQ_START NUMBER(19),
CQ_RUN_AS varchar(128)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_COMPACTION_QUEUE_ID (
NCQ_NEXT NUMBER(19) NOT NULL
[47/50] [abbrv] hive git commit: HIVE-11517 Vectorized
auto_smb_mapjoin_14.q produces different results (Matt McCline,
reviewed by Vikram Dixit K)
Posted by xu...@apache.org.
HIVE-11517 Vectorized auto_smb_mapjoin_14.q produces different results (Matt McCline, reviewed by Vikram Dixit K)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/461e38ec
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/461e38ec
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/461e38ec
Branch: refs/heads/beeline-cli
Commit: 461e38ecee8b9fd1d829ff0884f78c1a75013bd3
Parents: 68d6cfd
Author: Matt McCline <mm...@hortonworks.com>
Authored: Thu Sep 24 15:23:50 2015 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu Sep 24 15:23:50 2015 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../exec/vector/VectorSMBMapJoinOperator.java | 15 +-
.../clientpositive/vector_auto_smb_mapjoin_14.q | 297 +++
.../tez/vector_auto_smb_mapjoin_14.q.out | 1576 +++++++++++++++
.../vector_auto_smb_mapjoin_14.q.out | 1792 ++++++++++++++++++
5 files changed, 3679 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/461e38ec/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index b47d1b5..4f7b25f 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -190,6 +190,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
update_two_cols.q,\
vector_acid3.q,\
vector_aggregate_9.q,\
+ vector_auto_smb_mapjoin_14.q,\
vector_between_in.q,\
vector_binary_join_groupby.q,\
vector_bucket.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/461e38ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
index a2f8091..804ba17 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
@@ -38,7 +38,9 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
/**
* VectorSMBJoinOperator.
@@ -123,8 +125,17 @@ public class VectorSMBMapJoinOperator extends SMBMapJoinOperator implements Vect
@Override
protected List<Object> smbJoinComputeKeys(Object row, byte alias) throws HiveException {
if (alias == this.posBigTable) {
- VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
- return keyEvaluator.evaluate(keyValues[batchIndex]);
+
+ // The keyEvaluate reuses storage. That doesn't work with SMB MapJoin because it
+ // holds references to keys as it is merging.
+ List<Object> singletonListAndObjects = keyEvaluator.evaluate(keyValues[batchIndex]);
+ ArrayList<Object> result = new ArrayList<Object>(singletonListAndObjects.size());
+ for (int i = 0; i < singletonListAndObjects.size(); i++) {
+ result.add(ObjectInspectorUtils.copyToStandardObject(singletonListAndObjects.get(i),
+ joinKeysObjectInspectors[alias].get(i),
+ ObjectInspectorCopyOption.WRITABLE));
+ }
+ return result;
} else {
return super.smbJoinComputeKeys(row, alias);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/461e38ec/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q b/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q
new file mode 100644
index 0000000..32be5ee
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q
@@ -0,0 +1,297 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC;
+
+insert overwrite table tbl1
+select * from src where key < 10;
+
+insert overwrite table tbl2
+select * from src where key < 10;
+
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+
+set hive.auto.convert.sortmerge.join=true;
+
+-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1;
+
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1;
+
+-- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2;
+
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2;
+
+-- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key;
+
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key;
+
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key;
+
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key;
+
+-- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key;
+
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key;
+
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key;
+
+-- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
+-- join should be performed
+explain
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key;
+
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key;
+
+-- One of the tables is a sub-query and the other is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key;
+
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key;
+
+-- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key);
+
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key);
+
+-- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a;
+
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a;
+
+CREATE TABLE dest1(key int, value string);
+CREATE TABLE dest2(key int, val1 string, val2 string);
+
+-- The join is followed by a multi-table insert. It should be converted to
+-- a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2;
+
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2;
+
+select * from dest1;
+select * from dest2;
+
+DROP TABLE dest2;
+CREATE TABLE dest2(key int, cnt int);
+
+-- The join is followed by a multi-table insert, and one of the inserts involves a reducer.
+-- It should be converted to a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key;
+
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key;
+
+select * from dest1;
+select * from dest2;
http://git-wip-us.apache.org/repos/asf/hive/blob/461e38ec/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out
new file mode 100644
index 0000000..480c4e1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out
@@ -0,0 +1,1576 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl1
+PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl2
+PREHOOK: query: insert overwrite table tbl1
+select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: insert overwrite table tbl1
+select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl1
+POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table tbl2
+select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: insert overwrite table tbl2
+select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl2
+POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_13]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_20]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_10]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_9]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_18]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_17]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_1]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Filter Operator [FIL_16]
+ predicate:key is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+22
+PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3
+ File Output Operator [FS_18]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_28]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 2 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_15]
+ sort order:
+ Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [OP_27]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator [OP_26]
+ Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator [OP_25]
+ | aggregations:["count(VALUE._col0)"]
+ | keys:KEY._col0 (type: int)
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_10]
+ key expressions:_col0 (type: int)
+ Map-reduce partition columns:_col0 (type: int)
+ sort order:+
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col1 (type: bigint)
+ Group By Operator [GBY_9]
+ aggregations:["count()"]
+ keys:_col0 (type: int)
+ outputColumnNames:["_col0","_col1"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_23]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_22]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_1]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Filter Operator [FIL_21]
+ predicate:key is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+6
+PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3
+ File Output Operator [FS_32]
+ compressed:false
+ Statistics:Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Select Operator [SEL_31]
+ outputColumnNames:["_col0","_col1","_col2"]
+ Statistics:Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_49]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+ | outputColumnNames:["_col0","_col1","_col3"]
+ | Statistics:Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 2 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_51]
+ | key expressions:_col0 (type: int)
+ | Map-reduce partition columns:_col0 (type: int)
+ | sort order:+
+ | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col1 (type: bigint)
+ | Group By Operator [OP_50]
+ | | aggregations:["count(VALUE._col0)"]
+ | | keys:KEY._col0 (type: int)
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 1 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_10]
+ | key expressions:_col0 (type: int)
+ | Map-reduce partition columns:_col0 (type: int)
+ | sort order:+
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col1 (type: bigint)
+ | Group By Operator [GBY_9]
+ | aggregations:["count()"]
+ | keys:_col0 (type: int)
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ | Merge Join Operator [MERGEJOIN_45]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | | outputColumnNames:["_col0"]
+ | | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ | |
+ | |<-Filter Operator [FIL_42]
+ | | predicate:key is not null (type: boolean)
+ | | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_1]
+ | | alias:b
+ | | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ | |<-Filter Operator [FIL_41]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_0]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 6 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_53]
+ key expressions:_col0 (type: int)
+ Map-reduce partition columns:_col0 (type: int)
+ sort order:+
+ Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col1 (type: bigint)
+ Group By Operator [OP_52]
+ | aggregations:["count(VALUE._col0)"]
+ | keys:KEY._col0 (type: int)
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 5 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_23]
+ key expressions:_col0 (type: int)
+ Map-reduce partition columns:_col0 (type: int)
+ sort order:+
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col1 (type: bigint)
+ Group By Operator [GBY_22]
+ aggregations:["count()"]
+ keys:_col0 (type: int)
+ outputColumnNames:["_col0","_col1"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_47]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_44]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_14]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Filter Operator [FIL_43]
+ predicate:key is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_13]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+0 9 9
+2 1 1
+4 1 1
+5 9 9
+8 1 1
+9 1 1
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_16]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_23]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_13]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_12]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_21]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+ | Statistics:Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Select Operator [SEL_5]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_20]
+ | predicate:(key < 6) (type: boolean)
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_3]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_19]
+ predicate:(key < 6) (type: boolean)
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_16]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_23]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_13]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_12]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_21]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"key (type: int)"}
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_20]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_5]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_19]
+ predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_20]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_27]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_17]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_16]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_25]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+ | Statistics:Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Select Operator [SEL_7]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_24]
+ | predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_5]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_23]
+ predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_16]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_23]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_13]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_12]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_21]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+ | Statistics:Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Select Operator [SEL_5]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_20]
+ | predicate:(key < 8) (type: boolean)
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_3]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_19]
+ predicate:(key < 8) (type: boolean)
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
+-- join should be performed
+explain
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
+-- join should be performed
+explain
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3
+ File Output Operator [FS_14]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_26]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 2 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_11]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_10]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_19]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_22]
+ | key expressions:_col0 (type: int)
+ | Map-reduce partition columns:_col0 (type: int)
+ | sort order:+
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_21]
+ | predicate:_col0 is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | Select Operator [OP_20]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_0]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 4 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_25]
+ key expressions:_col0 (type: int)
+ Map-reduce partition columns:_col0 (type: int)
+ sort order:+
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_24]
+ predicate:_col0 is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Select Operator [OP_23]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_2]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+22
+PREHOOK: query: -- One of the tables is a sub-query and the other is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- One of the tables is a sub-query and the other is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_14]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_21]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_11]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_10]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_19]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"key (type: int)"}
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_18]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_3]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_17]
+ predicate:(key < 6) (type: boolean)
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_21]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_34]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_18]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_17]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_31]
+ | condition map:[{"":"Inner Join 0 to 1"},{"":"Inner Join 0 to 2"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)","2":"_col0 (type: int)"}
+ | Statistics:Num rows: 6 Data size: 613 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Select Operator [SEL_5]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_29]
+ | predicate:(key < 6) (type: boolean)
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_3]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Select Operator [SEL_8]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_30]
+ | predicate:(key < 6) (type: boolean)
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_6]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_28]
+ predicate:(key < 6) (type: boolean)
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+56
+PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_17]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_24]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_14]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_13]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_22]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"key (type: int)"}
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_21]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_5]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_20]
+ predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: CREATE TABLE dest1(key int, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(key int, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest2
+POSTHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest2
+PREHOOK: query: -- The join is followed by a multi-table insert. It should be converted to
+-- a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is followed by a multi-table insert. It should be converted to
+-- a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+POSTHOOK: type: QUERY
+Plan not optimized by CBO.
+
+Stage-4
+ Stats-Aggr Operator
+ Stage-0
+ Move Operator
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest1"}
+ Stage-3
+ Dependency Collection{}
+ Stage-2
+ Map 1
+ File Output Operator [FS_9]
+ compressed:false
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest1"}
+ Select Operator [SEL_8]
+ outputColumnNames:["_col0","_col1"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Select Operator [SEL_7]
+ outputColumnNames:["_col0","_col1","_col2"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_16]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | outputColumnNames:["_col0","_col1","_col6"]
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_15]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_1]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Filter Operator [FIL_14]
+ predicate:key is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator [FS_11]
+ compressed:false
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest2"}
+ Please refer to the previous Select Operator [SEL_7]
+Stage-5
+ Stats-Aggr Operator
+ Stage-1
+ Move Operator
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest2"}
+ Please refer to the previous Stage-3
+
+PREHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+2 val_2
+4 val_4
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+8 val_8
+9 val_9
+PREHOOK: query: select * from dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+2 val_2 val_2
+4 val_4 val_4
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+8 val_8 val_8
+9 val_9 val_9
+PREHOOK: query: DROP TABLE dest2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest2
+PREHOOK: Output: default@dest2
+POSTHOOK: query: DROP TABLE dest2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest2
+POSTHOOK: Output: default@dest2
+PREHOOK: query: CREATE TABLE dest2(key int, cnt int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest2
+POSTHOOK: query: CREATE TABLE dest2(key int, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest2
+PREHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer.
+-- It should be converted to a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer.
+-- It should be converted to a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-4
+ Stats-Aggr Operator
+ Stage-0
+ Move Operator
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest1"}
+ Stage-3
+ Dependency Collection{}
+ Stage-2
+ Reducer 2
+ File Output Operator [FS_25]
+ compressed:false
+ Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest2"}
+ Select Operator [OP_24]
+ outputColumnNames:["_col0","_col1"]
+ Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator [OP_23]
+ | aggregations:["count(VALUE._col0)"]
+ | keys:KEY._col0 (type: int)
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ File Output Operator [FS_9]
+ compressed:false
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest1"}
+ Merge Join Operator [MERGEJOIN_21]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_20]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_1]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Filter Operator [FIL_19]
+ predicate:key is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator [RS_12]
+ key expressions:_col0 (type: int)
+ Map-reduce partition columns:_col0 (type: int)
+ sort order:+
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col1 (type: bigint)
+ Group By Operator [GBY_11]
+ aggregations:["count()"]
+ keys:_col0 (type: int)
+ outputColumnNames:["_col0","_col1"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Select Operator [SEL_10]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Please refer to the previous Merge Join Operator [MERGEJOIN_21]
+Stage-5
+ Stats-Aggr Operator
+ Stage-1
+ Move Operator
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest2"}
+ Please refer to the previous Stage-3
+
+PREHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(tbl1)a.null, (tbl2)b.null, ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+2 val_2
+4 val_4
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+8 val_8
+9 val_9
+PREHOOK: query: select * from dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+0 9
+2 1
+4 1
+5 9
+8 1
+9 1
[13/50] [abbrv] hive git commit: HIVE-9811 : Hive on Tez leaks
WorkMap objects (Oleg Danilov, reviewed by Sergey Shelukhin)
Posted by xu...@apache.org.
HIVE-9811 : Hive on Tez leaks WorkMap objects (Oleg Danilov, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3672a279
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3672a279
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3672a279
Branch: refs/heads/beeline-cli
Commit: 3672a279a6fa46fa2a55346ef2257cf52a9900da
Parents: 4c0fb13
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Sep 18 15:01:04 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Sep 18 15:01:04 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/exec/tez/TezTask.java | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3672a279/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 3a6ec1a..4a1a712 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -83,6 +83,9 @@ public class TezTask extends Task<TezWork> {
private final DagUtils utils;
+ Map<BaseWork, Vertex> workToVertex = new HashMap<BaseWork, Vertex>();
+ Map<BaseWork, JobConf> workToConf = new HashMap<BaseWork, JobConf>();
+
public TezTask() {
this(DagUtils.getInstance());
}
@@ -197,6 +200,15 @@ public class TezTask extends Task<TezWork> {
// rc will be 1 at this point indicating failure.
} finally {
Utilities.clearWork(conf);
+
+ // Clear gWorkMap
+ for (BaseWork w : work.getAllWork()) {
+ JobConf workCfg = workToConf.get(w);
+ if (workCfg != null) {
+ Utilities.clearWorkMapForConf(workCfg);
+ }
+ }
+
if (cleanContext) {
try {
ctx.clear();
@@ -276,8 +288,6 @@ public class TezTask extends Task<TezWork> {
throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
- Map<BaseWork, Vertex> workToVertex = new HashMap<BaseWork, Vertex>();
- Map<BaseWork, JobConf> workToConf = new HashMap<BaseWork, JobConf>();
// getAllWork returns a topologically sorted list, which we use to make
// sure that vertices are created before they are used in edges.
[38/50] [abbrv] hive git commit: HIVE-6091 : Empty pipeout files are
created for connection create/close (Thiruvel Thirumoolan,
Bing Li via Ashutosh Chauhan)
Posted by xu...@apache.org.
HIVE-6091 : Empty pipeout files are created for connection create/close (Thiruvel Thirumoolan, Bing Li via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6e8eeb74
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6e8eeb74
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6e8eeb74
Branch: refs/heads/beeline-cli
Commit: 6e8eeb7439d44b2e37f70a77f2abc27d59ef8993
Parents: 7cfe374
Author: Thiruvel Thirumoolan <th...@yahoo-inc.com>
Authored: Fri Dec 20 14:09:00 2013 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Sep 23 08:34:21 2015 -0700
----------------------------------------------------------------------
ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java | 3 +++
1 file changed, 3 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6e8eeb74/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 5f528167..014941e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -677,6 +677,9 @@ public class SessionState {
if (localSessionPath != null) {
FileSystem.getLocal(conf).delete(localSessionPath, true);
}
+ if (this.getTmpOutputFile().exists()) {
+ this.getTmpOutputFile().delete();
+ }
}
/**
[31/50] [abbrv] hive git commit: HIVE-11762: TestHCatLoaderEncryption
failures when using Hadoop 2.7 (Jason Dere, reviewed by Sergio Pena)
Posted by xu...@apache.org.
HIVE-11762: TestHCatLoaderEncryption failures when using Hadoop 2.7 (Jason Dere, reviewed by Sergio Pena)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1c52a7e7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1c52a7e7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1c52a7e7
Branch: refs/heads/beeline-cli
Commit: 1c52a7e72ab9c2a27902592599bc588e9e3d8be8
Parents: 5a5539c
Author: Jason Dere <jd...@hortonworks.com>
Authored: Tue Sep 22 16:31:07 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Tue Sep 22 16:31:07 2015 -0700
----------------------------------------------------------------------
shims/0.23/pom.xml | 1 -
.../apache/hadoop/hive/shims/Hadoop23Shims.java | 23 +++++++++++++++++++-
2 files changed, 22 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1c52a7e7/shims/0.23/pom.xml
----------------------------------------------------------------------
diff --git a/shims/0.23/pom.xml b/shims/0.23/pom.xml
index 2e16956..3b1fb97 100644
--- a/shims/0.23/pom.xml
+++ b/shims/0.23/pom.xml
@@ -61,7 +61,6 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop-23.version}</version>
- <optional>true</optional>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
http://git-wip-us.apache.org/repos/asf/hive/blob/1c52a7e7/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 83369ee..c08e76d 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -532,13 +532,34 @@ public class Hadoop23Shims extends HadoopShimsSecure {
// else the updates do not get flushed properly
KeyProviderCryptoExtension keyProvider = miniDFSCluster.getNameNode().getNamesystem().getProvider();
if (keyProvider != null) {
- miniDFSCluster.getFileSystem().getClient().setKeyProvider(keyProvider);
+ try {
+ setKeyProvider(miniDFSCluster.getFileSystem().getClient(), keyProvider);
+ } catch (Exception err) {
+ throw new IOException(err);
+ }
}
cluster = new MiniDFSShim(miniDFSCluster);
return cluster;
}
+ private static void setKeyProvider(DFSClient dfsClient, KeyProviderCryptoExtension provider)
+ throws Exception {
+ Method setKeyProviderHadoop27Method = null;
+ try {
+ setKeyProviderHadoop27Method = DFSClient.class.getMethod("setKeyProvider", KeyProvider.class);
+ } catch (NoSuchMethodException err) {
+ // We can just use setKeyProvider() as it is
+ }
+
+ if (setKeyProviderHadoop27Method != null) {
+ // Method signature changed in Hadoop 2.7. Cast provider to KeyProvider
+ setKeyProviderHadoop27Method.invoke(dfsClient, (KeyProvider) provider);
+ } else {
+ dfsClient.setKeyProvider(provider);
+ }
+ }
+
/**
* MiniDFSShim.
*
[22/50] [abbrv] hive git commit: HIVE-11849: NPE in
HiveHBaseTableShapshotInputFormat in query with just count(*) (Enis Soztutar
via Jason Dere)
Posted by xu...@apache.org.
HIVE-11849: NPE in HiveHBaseTableShapshotInputFormat in query with just count(*) (Enis Soztutar via Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2a65989a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2a65989a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2a65989a
Branch: refs/heads/beeline-cli
Commit: 2a65989a48e043404b7060296be8da9d3494911e
Parents: 4ff5b25
Author: Jason Dere <jd...@hortonworks.com>
Authored: Mon Sep 21 10:59:21 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Mon Sep 21 10:59:21 2015 -0700
----------------------------------------------------------------------
.../HiveHBaseTableSnapshotInputFormat.java | 21 ++++++++++++-------
.../queries/positive/hbase_handler_snapshot.q | 4 ++++
.../positive/hbase_handler_snapshot.q.out | 22 ++++++++++++++++++++
3 files changed, 39 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2a65989a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableSnapshotInputFormat.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableSnapshotInputFormat.java
index 45e4de9..aa3a02f 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableSnapshotInputFormat.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableSnapshotInputFormat.java
@@ -24,6 +24,9 @@ import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapred.TableInputFormat;
import org.apache.hadoop.hbase.mapred.TableSnapshotInputFormat;
+import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
+import org.apache.hadoop.hbase.util.Base64;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
@@ -41,15 +44,17 @@ public class HiveHBaseTableSnapshotInputFormat
TableSnapshotInputFormat delegate = new TableSnapshotInputFormat();
private static void setColumns(JobConf job) throws IOException {
- // hbase mapred API doesn't support scan at the moment.
Scan scan = HiveHBaseInputFormatUtil.getScan(job);
- byte[][] families = scan.getFamilies();
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < families.length; i++) {
- if (i > 0) sb.append(" ");
- sb.append(Bytes.toString(families[i]));
- }
- job.set(TableInputFormat.COLUMN_LIST, sb.toString());
+ job.set(org.apache.hadoop.hbase.mapreduce.TableInputFormat.SCAN,
+ convertScanToString(scan));
+ }
+
+ // TODO: Once HBASE-11163 is completed, use that API, or switch to
+ // using mapreduce version of the APIs. rather than mapred
+ // Copied from HBase's TableMapreduceUtil since it is not public API
+ static String convertScanToString(Scan scan) throws IOException {
+ ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
+ return Base64.encodeBytes(proto.toByteArray());
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/2a65989a/hbase-handler/src/test/queries/positive/hbase_handler_snapshot.q
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/queries/positive/hbase_handler_snapshot.q b/hbase-handler/src/test/queries/positive/hbase_handler_snapshot.q
index 11d52fd..ebdc63c 100644
--- a/hbase-handler/src/test/queries/positive/hbase_handler_snapshot.q
+++ b/hbase-handler/src/test/queries/positive/hbase_handler_snapshot.q
@@ -2,3 +2,7 @@ SET hive.hbase.snapshot.name=src_hbase_snapshot;
SET hive.hbase.snapshot.restoredir=/tmp;
SELECT * FROM src_hbase LIMIT 5;
+
+SELECT value FROM src_hbase LIMIT 5;
+
+select count(*) from src_hbase;
http://git-wip-us.apache.org/repos/asf/hive/blob/2a65989a/hbase-handler/src/test/results/positive/hbase_handler_snapshot.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/hbase_handler_snapshot.q.out b/hbase-handler/src/test/results/positive/hbase_handler_snapshot.q.out
index 1cb18b2..731646c 100644
--- a/hbase-handler/src/test/results/positive/hbase_handler_snapshot.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_handler_snapshot.q.out
@@ -11,3 +11,25 @@ POSTHOOK: Input: default@src_hbase
100 val_100
103 val_103
104 val_104
+PREHOOK: query: SELECT value FROM src_hbase LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_hbase
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT value FROM src_hbase LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_hbase
+#### A masked pattern was here ####
+val_0
+val_10
+val_100
+val_103
+val_104
+PREHOOK: query: select count(*) from src_hbase
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_hbase
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from src_hbase
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_hbase
+#### A masked pattern was here ####
+309
[16/50] [abbrv] hive git commit: HIVE-11512: Hive LDAP Authenticator
should also support full DN in Authenticate() Naveen Gangam via Chaoyu Tang
Posted by xu...@apache.org.
HIVE-11512: Hive LDAP Authenticator should also support full DN in Authenticate() Naveen Gangam via Chaoyu Tang
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cc78dd5d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cc78dd5d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cc78dd5d
Branch: refs/heads/beeline-cli
Commit: cc78dd5d8ce1e5a77ecf9f65b9bd19cccf158ac3
Parents: ae588db
Author: ctang <ct...@gmail.com>
Authored: Fri Sep 18 22:02:22 2015 -0400
Committer: ctang <ct...@gmail.com>
Committed: Fri Sep 18 22:03:42 2015 -0400
----------------------------------------------------------------------
.../auth/LdapAuthenticationProviderImpl.java | 82 ++++++++++++++++++--
1 file changed, 76 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/cc78dd5d/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java b/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
index 0c7cede..b2c4daf 100644
--- a/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
+++ b/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
@@ -146,15 +146,28 @@ public class LdapAuthenticationProviderImpl implements PasswdAuthenticationProvi
DirContext ctx = null;
String userDN = null;
+ String userName = null;
try {
// Create initial context
ctx = new InitialDirContext(env);
+ if (isDN(user)) {
+ userName = extractName(user);
+ } else {
+ userName = user;
+ }
+
if (userFilter == null && groupFilter == null && customQuery == null) {
- userDN = findUserDNByPattern(ctx, user);
+ if (isDN(user)) {
+ userDN = findUserDNByDN(ctx, user);
+ } else {
+ if (userDN == null) {
+ userDN = findUserDNByPattern(ctx, user);
+ }
- if (userDN == null) {
- userDN = findUserDNByName(ctx, baseDN, user);
+ if (userDN == null) {
+ userDN = findUserDNByName(ctx, baseDN, user);
+ }
}
// This should not be null because we were allowed to bind with this username
@@ -185,7 +198,7 @@ public class LdapAuthenticationProviderImpl implements PasswdAuthenticationProvi
boolean success = false;
for (String filteredUser : userFilter) {
- if (filteredUser.equalsIgnoreCase(user)) {
+ if (filteredUser.equalsIgnoreCase(userName)) {
LOG.debug("User filter partially satisfied");
success = true;
break;
@@ -198,7 +211,7 @@ public class LdapAuthenticationProviderImpl implements PasswdAuthenticationProvi
"of specified list");
}
- userDN = findUserDNByPattern(ctx, user);
+ userDN = findUserDNByPattern(ctx, userName);
if (userDN != null) {
LOG.info("User filter entirely satisfied");
} else {
@@ -214,7 +227,7 @@ public class LdapAuthenticationProviderImpl implements PasswdAuthenticationProvi
// if only groupFilter is configured.
if (userDN == null) {
- userDN = findUserDNByName(ctx, baseDN, user);
+ userDN = findUserDNByName(ctx, baseDN, userName);
}
List<String> userGroups = getGroupsForUser(ctx, userDN);
@@ -395,6 +408,44 @@ public class LdapAuthenticationProviderImpl implements PasswdAuthenticationProvi
return null;
}
+ /**
+ * This helper method attempts to find a username given a DN.
+ * Various LDAP implementations have different keys/properties that store this unique userID.
+ * Active Directory has a "sAMAccountName" that appears reliable,openLDAP uses "uid"
+ * So the first attempt is to find an entity with objectClass=person||user where
+ * (uid||sAMAccountName) matches the given username.
+ * The second attempt is to use CN attribute for wild card matching and then match the
+ * username in the DN.
+ * @param ctx DirContext for the LDAP Connection.
+ * @param baseDN BaseDN for this LDAP directory where the search is to be performed.
+ * @param userName A unique userid that is to be located in the LDAP.
+ * @return LDAP DN if the user is found in LDAP, null otherwise.
+ */
+ public static String findUserDNByDN(DirContext ctx, String userDN)
+ throws NamingException {
+ if (!isDN(userDN)) {
+ return null;
+ }
+
+ String baseDN = extractBaseDN(userDN);
+ List<String> results = null;
+ String searchFilter = "(&(|(objectClass=person)(objectClass=user))(" + DN_ATTR + "="
+ + userDN + "))";
+
+ results = findDNByName(ctx, baseDN, searchFilter, 2);
+
+ if (results == null) {
+ return null;
+ }
+
+ if(results.size() > 1) {
+ //make sure there is not another item available, there should be only 1 match
+ LOG.info("Matched multiple users for the user: " + userDN + ",returning null");
+ return null;
+ }
+ return userDN;
+ }
+
public static List<String> findDNByName(DirContext ctx, String baseDN,
String searchString, int limit) throws NamingException {
SearchResult searchResult = null;
@@ -507,4 +558,23 @@ public class LdapAuthenticationProviderImpl implements PasswdAuthenticationProvi
}
return list;
}
+
+ public static boolean isDN(String name) {
+ return (name.indexOf("=") > -1);
+ }
+
+ public static String extractName(String dn) {
+ if (dn.indexOf("=") > -1) {
+ return dn.substring(dn.indexOf("=") + 1, dn.indexOf(","));
+ }
+ return dn;
+ }
+
+ public static String extractBaseDN(String dn) {
+ if (dn.indexOf(",") > -1) {
+ return dn.substring(dn.indexOf(",") + 1);
+ }
+ return null;
+ }
+
}
[15/50] [abbrv] hive git commit: HIVE-6758: Beeline doesn't work with
-e option when started in background (Mohit via Xuefu)
Posted by xu...@apache.org.
HIVE-6758: Beeline doesn't work with -e option when started in background (Mohit via Xuefu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ae588db5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ae588db5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ae588db5
Branch: refs/heads/beeline-cli
Commit: ae588db5e999b948dc3fae9171d6bfcb2380560b
Parents: f08a033
Author: Xuefu Zhang <xz...@Cloudera.com>
Authored: Fri Sep 18 15:22:41 2015 -0700
Committer: Xuefu Zhang <xz...@Cloudera.com>
Committed: Fri Sep 18 15:22:41 2015 -0700
----------------------------------------------------------------------
bin/beeline | 5 +++++
1 file changed, 5 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ae588db5/bin/beeline
----------------------------------------------------------------------
diff --git a/bin/beeline b/bin/beeline
index bceb7b9..2fb05bc 100644
--- a/bin/beeline
+++ b/bin/beeline
@@ -22,4 +22,9 @@ bin=`cd "$bin"; pwd`
# hive lib instead of hadoop lib.
export HADOOP_USER_CLASSPATH_FIRST=true
+# If process is backgrounded, don't change terminal settings
+if [[ ! $(ps -o stat= -p $$) =~ + ]]; then
+ export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Djline.terminal=jline.UnsupportedTerminal"
+fi
+
. "$bin"/hive --service beeline "$@"
[37/50] [abbrv] hive git commit: HIVE-11468: Vectorize Struct IN()
clauses (Matt McCline, via Gopal V)
Posted by xu...@apache.org.
HIVE-11468: Vectorize Struct IN() clauses (Matt McCline, via Gopal V)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7cfe3743
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7cfe3743
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7cfe3743
Branch: refs/heads/beeline-cli
Commit: 7cfe3743ff583386653bdd32c79f2c44ffe734ba
Parents: 2e8324e
Author: Gopal V <go...@apache.org>
Authored: Tue Sep 22 19:39:49 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Tue Sep 22 23:24:14 2015 -0700
----------------------------------------------------------------------
.../ql/exec/vector/VectorizationContext.java | 203 +-
.../expressions/FilterStringColumnInList.java | 13 +-
.../expressions/FilterStructColumnInList.java | 178 ++
.../exec/vector/expressions/IStructInExpr.java | 36 +
.../vector/expressions/StringColumnInList.java | 4 +
.../vector/expressions/StructColumnInList.java | 174 ++
.../hive/ql/optimizer/physical/Vectorizer.java | 71 +-
.../ql/optimizer/physical/Vectorizer.java.orig | 1744 ++++++++++++++++++
.../ql/optimizer/physical/Vectorizer.java.rej | 86 +
.../queries/clientpositive/vector_struct_in.q | 247 +++
.../clientpositive/vector_struct_in.q.out | 825 +++++++++
11 files changed, 3566 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 2483196..46c2a78 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -104,20 +104,30 @@ import org.apache.hadoop.hive.ql.udf.UDFToLong;
import org.apache.hadoop.hive.ql.udf.UDFToShort;
import org.apache.hadoop.hive.ql.udf.UDFToString;
import org.apache.hadoop.hive.ql.udf.generic.*;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.DateUtils;
+
/**
* Context class for vectorization execution.
* Main role is to map column names to column indices and serves as a
@@ -1273,17 +1283,208 @@ public class VectorizationContext {
}
}
+ public enum InConstantType {
+ INT_FAMILY,
+ TIMESTAMP,
+ DATE,
+ FLOAT_FAMILY,
+ STRING_FAMILY,
+ DECIMAL
+ }
+
+ public static InConstantType getInConstantTypeFromPrimitiveCategory(PrimitiveCategory primitiveCategory) {
+
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return InConstantType.INT_FAMILY;
+
+ case DATE:
+ return InConstantType.TIMESTAMP;
+
+ case TIMESTAMP:
+ return InConstantType.DATE;
+
+ case FLOAT:
+ case DOUBLE:
+ return InConstantType.FLOAT_FAMILY;
+
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ case BINARY:
+ return InConstantType.STRING_FAMILY;
+
+ case DECIMAL:
+ return InConstantType.DECIMAL;
+
+
+ case INTERVAL_YEAR_MONTH:
+ case INTERVAL_DAY_TIME:
+ // UNDONE: Fall through for these... they don't appear to be supported yet.
+ default:
+ throw new RuntimeException("Unexpected primitive type category " + primitiveCategory);
+ }
+ }
+
+ private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr,
+ TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, Mode mode, TypeInfo returnType)
+ throws HiveException {
+
+ VectorExpression expr = null;
+
+ StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
+
+ ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ final int fieldCount = fieldTypeInfos.size();
+ ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
+ InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
+ for (int f = 0; f < fieldCount; f++) {
+ TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
+ // Only primitive fields supports for now.
+ if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
+ return null;
+ }
+
+ // We are going to serialize using the 4 basic types.
+ ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
+ fieldVectorColumnTypes[f] = fieldVectorColumnType;
+
+ // We currently evaluate the IN (..) constants in special ways.
+ PrimitiveCategory fieldPrimitiveCategory =
+ ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
+ InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
+ fieldInConstantTypes[f] = inConstantType;
+ }
+
+ Output buffer = new Output();
+ BinarySortableSerializeWrite binarySortableSerializeWrite =
+ new BinarySortableSerializeWrite(fieldCount);
+
+ final int inChildrenCount = inChildren.size();
+ byte[][] serializedInChildren = new byte[inChildrenCount][];
+ try {
+ for (int i = 0; i < inChildrenCount; i++) {
+ final ExprNodeDesc node = inChildren.get(i);
+ final Object[] constants;
+
+ if (node instanceof ExprNodeConstantDesc) {
+ ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
+ ConstantObjectInspector output = constNode.getWritableObjectInspector();
+ constants = ((List<?>) output.getWritableConstantValue()).toArray();
+ } else {
+ ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
+ ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory
+ .get(exprNode);
+ ObjectInspector output = evaluator.initialize(exprNode
+ .getWritableObjectInspector());
+ constants = (Object[]) evaluator.evaluate(null);
+ }
+
+ binarySortableSerializeWrite.set(buffer);
+ for (int f = 0; f < fieldCount; f++) {
+ Object constant = constants[f];
+ if (constant == null) {
+ binarySortableSerializeWrite.writeNull();
+ } else {
+ InConstantType inConstantType = fieldInConstantTypes[f];
+ switch (inConstantType) {
+ case STRING_FAMILY:
+ {
+ byte[] bytes;
+ if (constant instanceof Text) {
+ Text text = (Text) constant;
+ bytes = text.getBytes();
+ binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
+ } else {
+ throw new HiveException("Unexpected constant String type " +
+ constant.getClass().getSimpleName());
+ }
+ }
+ break;
+ case INT_FAMILY:
+ {
+ long value;
+ if (constant instanceof IntWritable) {
+ value = ((IntWritable) constant).get();
+ } else if (constant instanceof LongWritable) {
+ value = ((LongWritable) constant).get();
+ } else {
+ throw new HiveException("Unexpected constant Long type " +
+ constant.getClass().getSimpleName());
+ }
+ binarySortableSerializeWrite.writeLong(value);
+ }
+ break;
+
+ case FLOAT_FAMILY:
+ {
+ double value;
+ if (constant instanceof DoubleWritable) {
+ value = ((DoubleWritable) constant).get();
+ } else {
+ throw new HiveException("Unexpected constant Double type " +
+ constant.getClass().getSimpleName());
+ }
+ binarySortableSerializeWrite.writeDouble(value);
+ }
+ break;
+
+ // UNDONE...
+ case DATE:
+ case TIMESTAMP:
+ case DECIMAL:
+ default:
+ throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
+ }
+ }
+ }
+ serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
+ }
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
+
+ // Create a single child representing the scratch column where we will
+ // generate the serialized keys of the batch.
+ int scratchBytesCol = ocm.allocateOutputColumn("string");
+
+ Class<?> cl = (mode == Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
+
+ expr = createVectorExpression(cl, null, Mode.PROJECTION, returnType);
+
+ ((IStringInExpr) expr).setInListValues(serializedInChildren);
+
+ ((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
+ ((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(),
+ fieldVectorColumnTypes);
+
+ return expr;
+ }
+
/**
* Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
*/
private VectorExpression getInExpression(List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType)
throws HiveException {
ExprNodeDesc colExpr = childExpr.get(0);
+ List<ExprNodeDesc> inChildren = childExpr.subList(1, childExpr.size());
String colType = colExpr.getTypeString();
+ colType = VectorizationContext.mapTypeNameSynonyms(colType);
+ TypeInfo colTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colType);
+ Category category = colTypeInfo.getCategory();
+ if (category == Category.STRUCT){
+ return getStructInExpression(childExpr, colExpr, colTypeInfo, inChildren, mode, returnType);
+ } else if (category != Category.PRIMITIVE) {
+ return null;
+ }
// prepare arguments for createVectorExpression
- List<ExprNodeDesc> childrenForInList = evaluateCastOnConstants(childExpr.subList(1, childExpr.size()));
+ List<ExprNodeDesc> childrenForInList = evaluateCastOnConstants(inChildren);
/* This method assumes that the IN list has no NULL entries. That is enforced elsewhere,
* in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined.
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
index 2434e90..e34ec75 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
@@ -20,16 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFLike;
-import org.apache.hadoop.io.Text;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
/**
* Evaluate an IN filter on a batch for a vector of strings.
@@ -165,6 +156,10 @@ public class FilterStringColumnInList extends VectorExpression implements IStrin
return "boolean";
}
+ public void setInputColumn(int inputCol) {
+ this.inputCol = inputCol;
+ }
+
@Override
public int getOutputColumn() {
return -1;
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java
new file mode 100644
index 0000000..00f22bb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
+
+/**
+ * Evaluate an IN filter on a batch for a vector of structs.
+ * This is optimized so that no objects have to be created in
+ * the inner loop, and there is a hash table implemented
+ * with Cuckoo hashing that has fast lookup to do the IN test.
+ */
+public class FilterStructColumnInList extends FilterStringColumnInList implements IStructInExpr {
+ private static final long serialVersionUID = 1L;
+ private VectorExpression[] structExpressions;
+ private ColumnVector.Type[] fieldVectorColumnTypes;
+ private int[] structColumnMap;
+ private int scratchBytesColumn;
+
+ private transient Output buffer;
+ private transient BinarySortableSerializeWrite binarySortableSerializeWrite;
+
+ /**
+ * After construction you must call setInListValues() to add the values to the IN set
+ * (on the IStringInExpr interface).
+ *
+ * And, call a and b on the IStructInExpr interface.
+ */
+ public FilterStructColumnInList() {
+ super(-1);
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ final int logicalSize = batch.size;
+ if (logicalSize == 0) {
+ return;
+ }
+
+ if (buffer == null) {
+ buffer = new Output();
+ binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
+ }
+
+ for (VectorExpression ve : structExpressions) {
+ ve.evaluate(batch);
+ }
+
+ BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
+
+ try {
+ boolean selectedInUse = batch.selectedInUse;
+ int[] selected = batch.selected;
+ for (int logical = 0; logical < logicalSize; logical++) {
+ int batchIndex = (selectedInUse ? selected[logical] : logical);
+
+ binarySortableSerializeWrite.set(buffer);
+ for (int f = 0; f < structColumnMap.length; f++) {
+ int fieldColumn = structColumnMap[f];
+ ColumnVector colVec = batch.cols[fieldColumn];
+ int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
+ if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
+ switch (fieldVectorColumnTypes[f]) {
+ case BYTES:
+ {
+ BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
+ byte[] bytes = bytesColVec.vector[adjustedIndex];
+ int start = bytesColVec.start[adjustedIndex];
+ int length = bytesColVec.length[adjustedIndex];
+ binarySortableSerializeWrite.writeString(bytes, start, length);
+ }
+ break;
+
+ case LONG:
+ binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
+ break;
+
+ case DOUBLE:
+ binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
+ break;
+
+ case DECIMAL:
+ binarySortableSerializeWrite.writeHiveDecimal(
+ ((DecimalColumnVector) colVec).vector[adjustedIndex].getHiveDecimal());
+ break;
+
+ default:
+ throw new RuntimeException("Unexpected vector column type " +
+ fieldVectorColumnTypes[f].name());
+ }
+ } else {
+ binarySortableSerializeWrite.writeNull();
+ }
+ }
+ scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
+ }
+
+ // Now, take the serialized keys we just wrote into our scratch column and look them
+ // up in the IN list.
+ super.evaluate(batch);
+
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+
+ }
+
+
+ @Override
+ public String getOutputType() {
+ return "boolean";
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return -1;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+
+ // This VectorExpression (IN) is a special case, so don't return a descriptor.
+ return null;
+ }
+
+ @Override
+ public void setScratchBytesColumn(int scratchBytesColumn) {
+
+ // Tell our super class FilterStringColumnInList it will be evaluating our scratch
+ // BytesColumnVector.
+ super.setInputColumn(scratchBytesColumn);
+ this.scratchBytesColumn = scratchBytesColumn;
+ }
+
+ @Override
+ public void setStructColumnExprs(VectorizationContext vContext,
+ List<ExprNodeDesc> structColumnExprs, ColumnVector.Type[] fieldVectorColumnTypes)
+ throws HiveException {
+
+ structExpressions = vContext.getVectorExpressions(structColumnExprs);
+ structColumnMap = new int[structExpressions.length];
+ for (int i = 0; i < structColumnMap.length; i++) {
+ VectorExpression ve = structExpressions[i];
+ structColumnMap[i] = ve.getOutputColumn();
+ }
+ this.fieldVectorColumnTypes = fieldVectorColumnTypes;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java
new file mode 100644
index 0000000..3b25255
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+
+/**
+ * Interface used for both filter and non-filter versions of IN to simplify
+ * VectorizationContext code.
+ */
+public interface IStructInExpr {
+ void setScratchBytesColumn(int scratchBytesColumn);
+ void setStructColumnExprs(VectorizationContext vContext, List<ExprNodeDesc> structColumnExprs,
+ ColumnVector.Type[] fieldVectorColumnTypes) throws HiveException;
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
index 03833a2..b90e3c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
@@ -140,6 +140,10 @@ public class StringColumnInList extends VectorExpression implements IStringInExp
return "boolean";
}
+ public void setInputColumn(int inputCol) {
+ this.inputCol = inputCol;
+ }
+
@Override
public int getOutputColumn() {
return this.outputColumn;
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java
new file mode 100644
index 0000000..724497a
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
+
+/**
+ * Evaluate an IN boolean expression (not a filter) on a batch for a vector of structs.
+ * This is optimized so that no objects have to be created in
+ * the inner loop, and there is a hash table implemented
+ * with Cuckoo hashing that has fast lookup to do the IN test.
+ */
+public class StructColumnInList extends StringColumnInList implements IStructInExpr {
+ private static final long serialVersionUID = 1L;
+ private VectorExpression[] structExpressions;
+ private ColumnVector.Type[] fieldVectorColumnTypes;
+ private int[] structColumnMap;
+ private int scratchBytesColumn;
+
+ private transient Output buffer;
+ private transient BinarySortableSerializeWrite binarySortableSerializeWrite;
+
+ public StructColumnInList() {
+ super();
+ }
+
+ /**
+ * After construction you must call setInListValues() to add the values to the IN set.
+ */
+ public StructColumnInList(int outputColumn) {
+ super(-1, outputColumn);
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ final int logicalSize = batch.size;
+ if (logicalSize == 0) {
+ return;
+ }
+
+ if (buffer == null) {
+ buffer = new Output();
+ binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
+ }
+
+ for (VectorExpression ve : structExpressions) {
+ ve.evaluate(batch);
+ }
+
+ BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
+
+ try {
+ boolean selectedInUse = batch.selectedInUse;
+ int[] selected = batch.selected;
+ for (int logical = 0; logical < logicalSize; logical++) {
+ int batchIndex = (selectedInUse ? selected[logical] : logical);
+
+ binarySortableSerializeWrite.set(buffer);
+ for (int f = 0; f < structColumnMap.length; f++) {
+ int fieldColumn = structColumnMap[f];
+ ColumnVector colVec = batch.cols[fieldColumn];
+ int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
+ if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
+ switch (fieldVectorColumnTypes[f]) {
+ case BYTES:
+ {
+ BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
+ byte[] bytes = bytesColVec.vector[adjustedIndex];
+ int start = bytesColVec.start[adjustedIndex];
+ int length = bytesColVec.length[adjustedIndex];
+ binarySortableSerializeWrite.writeString(bytes, start, length);
+ }
+ break;
+
+ case LONG:
+ binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
+ break;
+
+ case DOUBLE:
+ binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
+ break;
+
+ case DECIMAL:
+ binarySortableSerializeWrite.writeHiveDecimal(
+ ((DecimalColumnVector) colVec).vector[adjustedIndex].getHiveDecimal());
+ break;
+
+ default:
+ throw new RuntimeException("Unexpected vector column type " +
+ fieldVectorColumnTypes[f].name());
+ }
+ } else {
+ binarySortableSerializeWrite.writeNull();
+ }
+ }
+ scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
+ }
+
+ // Now, take the serialized keys we just wrote into our scratch column and look them
+ // up in the IN list.
+ super.evaluate(batch);
+
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+
+ @Override
+ public String getOutputType() {
+ return "boolean";
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+
+ // This VectorExpression (IN) is a special case, so don't return a descriptor.
+ return null;
+ }
+
+
+ @Override
+ public void setScratchBytesColumn(int scratchBytesColumn) {
+
+ // Tell our super class FilterStringColumnInList it will be evaluating our scratch
+ // BytesColumnVector.
+ super.setInputColumn(scratchBytesColumn);
+ this.scratchBytesColumn = scratchBytesColumn;
+ }
+
+ @Override
+ public void setStructColumnExprs(VectorizationContext vContext,
+ List<ExprNodeDesc> structColumnExprs, ColumnVector.Type[] fieldVectorColumnTypes)
+ throws HiveException {
+
+ structExpressions = vContext.getVectorExpressions(structColumnExprs);
+ structColumnMap = new int[structExpressions.length];
+ for (int i = 0; i < structColumnMap.length; i++) {
+ VectorExpression ve = structExpressions[i];
+ structColumnMap[i] = ve.getOutputColumn();
+ }
+ this.fieldVectorColumnTypes = fieldVectorColumnTypes;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 0d4c1d8..da1d9eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -53,10 +53,12 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiString
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
@@ -139,8 +141,11 @@ import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.*;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -575,7 +580,12 @@ public class Vectorizer implements PhysicalPlanResolver {
if (nonVectorizableChildOfGroupBy(op)) {
return new Boolean(true);
}
- boolean ret = validateMapWorkOperator(op, mapWork, isTez);
+ boolean ret;
+ try {
+ ret = validateMapWorkOperator(op, mapWork, isTez);
+ } catch (Exception e) {
+ throw new SemanticException(e);
+ }
if (!ret) {
LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized.");
return new Boolean(false);
@@ -1260,6 +1270,7 @@ public class Vectorizer implements PhysicalPlanResolver {
LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
return false;
}
+ boolean isInExpression = false;
if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
boolean r = validateGenericUdf(d);
@@ -1267,12 +1278,62 @@ public class Vectorizer implements PhysicalPlanResolver {
LOG.info("Cannot vectorize UDF " + d);
return false;
}
+ GenericUDF genericUDF = d.getGenericUDF();
+ isInExpression = (genericUDF instanceof GenericUDFIn);
}
if (desc.getChildren() != null) {
- for (ExprNodeDesc d: desc.getChildren()) {
- // Don't restrict child expressions for projection. Always use looser FILTER mode.
- boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
- if (!r) {
+ if (isInExpression
+ && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
+ // Don't restrict child expressions for projection.
+ // Always use loose FILTER mode.
+ if (!validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER)) {
+ return false;
+ }
+ } else {
+ for (ExprNodeDesc d : desc.getChildren()) {
+ // Don't restrict child expressions for projection.
+ // Always use loose FILTER mode.
+ if (!validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER)) {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ private boolean validateStructInExpression(ExprNodeDesc desc,
+ VectorExpressionDescriptor.Mode mode) {
+ for (ExprNodeDesc d : desc.getChildren()) {
+ TypeInfo typeInfo = d.getTypeInfo();
+ if (typeInfo.getCategory() != Category.STRUCT) {
+ return false;
+ }
+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+
+ ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo
+ .getAllStructFieldTypeInfos();
+ ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ final int fieldCount = fieldTypeInfos.size();
+ for (int f = 0; f < fieldCount; f++) {
+ TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
+ Category category = fieldTypeInfo.getCategory();
+ if (category != Category.PRIMITIVE) {
+ LOG.info("Cannot vectorize struct field " + fieldNames.get(f)
+ + " of type " + fieldTypeInfo.getTypeName());
+ return false;
+ }
+ PrimitiveTypeInfo fieldPrimitiveTypeInfo = (PrimitiveTypeInfo) fieldTypeInfo;
+ InConstantType inConstantType = VectorizationContext
+ .getInConstantTypeFromPrimitiveCategory(fieldPrimitiveTypeInfo
+ .getPrimitiveCategory());
+
+ // For now, limit the data types we support for Vectorized Struct IN().
+ if (inConstantType != InConstantType.INT_FAMILY
+ && inConstantType != InConstantType.FLOAT_FAMILY
+ && inConstantType != InConstantType.STRING_FAMILY) {
+ LOG.info("Cannot vectorize struct field " + fieldNames.get(f)
+ + " of type " + fieldTypeInfo.getTypeName());
return false;
}
}
[44/50] [abbrv] hive git commit: HIVE-10785 : Support aggregate push
down through joins (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Posted by xu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out b/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out
new file mode 100644
index 0000000..17df98f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out
@@ -0,0 +1,1522 @@
+PREHOOK: query: EXPLAIN
+SELECT f.key, g.key, count(g.key)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key, g.key, count(g.key)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string), (_col1 * _col3) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.key, g.key
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key, g.key
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT DISTINCT f.value, g.value
+FROM src f JOIN src g ON(f.value = g.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT DISTINCT f.value, g.value
+FROM src f JOIN src g ON(f.value = g.value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.key, g.key, COUNT(*)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key, g.key, COUNT(*)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string), (_col1 * _col3) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.ctinyint, g.ctinyint, SUM(f.cbigint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cint = g.cint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.ctinyint, g.ctinyint, SUM(f.cbigint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cint = g.cint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-3 depends on stages: Stage-2
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int), cbigint (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col0 (type: tinyint), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: int)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: tinyint), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col2 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col5
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col3 (type: tinyint), (_col2 * _col5) (type: bigint)
+ outputColumnNames: _col0, _col3, _col6
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6)
+ keys: _col0 (type: tinyint), _col3 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: tinyint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint)
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: tinyint), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: int)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.cbigint, g.cbigint, MAX(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cbigint = g.cbigint)
+GROUP BY f.cbigint, g.cbigint
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.cbigint, g.cbigint, MAX(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cbigint = g.cbigint)
+GROUP BY f.cbigint, g.cbigint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cbigint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cint (type: int), cbigint (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col0)
+ keys: _col1 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), _col2 (type: bigint), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cbigint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT f.ctinyint, g.ctinyint, MIN(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT f.ctinyint, g.ctinyint, MIN(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0)
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: tinyint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: tinyint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col2 (type: tinyint), _col1 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT MIN(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT MIN(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col1)
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT count(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT count(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col1 * _col3) (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT count(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT count(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col1)
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT sum(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT sum(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index fbcd86a..789bedf 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -4,6 +4,7 @@ POSTHOOK: query: SHOW FUNCTIONS
POSTHOOK: type: SHOWFUNCTIONS
!
!=
+$sum0
%
&
*
[33/50] [abbrv] hive git commit: HIVE-11794 : GBY vectorization
appears to process COMPLETE reduce-side GBY incorrectly (Sergey Shelukhin,
reviewed by Matt McCline)
Posted by xu...@apache.org.
HIVE-11794 : GBY vectorization appears to process COMPLETE reduce-side GBY incorrectly (Sergey Shelukhin, reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/072c5a0b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/072c5a0b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/072c5a0b
Branch: refs/heads/beeline-cli
Commit: 072c5a0bce78cde0124d98a1243392cdee2f2f3e
Parents: e82bf25
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Sep 22 18:13:15 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Sep 22 18:13:15 2015 -0700
----------------------------------------------------------------------
.../ql/exec/vector/VectorGroupByOperator.java | 5 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 16 +-
.../hive/ql/plan/ExprNodeGenericFuncDesc.java | 10 +-
.../hadoop/hive/ql/plan/VectorGroupByDesc.java | 10 +
.../clientpositive/vector_groupby_reduce.q | 62 +-
.../clientpositive/vectorization_limit.q | 4 +-
.../tez/vector_groupby_reduce.q.out | 1452 ++++++++++++++++-
.../tez/vectorization_limit.q.out | 8 +-
.../clientpositive/vector_groupby_reduce.q.out | 1466 +++++++++++++++++-
.../clientpositive/vectorization_limit.q.out | 8 +-
10 files changed, 2958 insertions(+), 83 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 917f406..7a552b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -625,8 +625,7 @@ public class VectorGroupByOperator extends Operator<GroupByDesc> implements
rowsToFlush[flushMark] = currentStreamingAggregators;
if (keysToFlush[flushMark] == null) {
keysToFlush[flushMark] = (VectorHashKeyWrapper) streamingKey.copyKey();
- }
- else {
+ } else {
streamingKey.duplicateTo(keysToFlush[flushMark]);
}
@@ -836,6 +835,8 @@ public class VectorGroupByOperator extends Operator<GroupByDesc> implements
} else if (conf.getVectorDesc().isReduceMergePartial()) {
// Sorted GroupBy of vector batches where an individual batch has the same group key (e.g. reduce).
processingMode = this.new ProcessingModeReduceMergePartialKeys();
+ } else if (conf.getVectorDesc().isReduceStreaming()) {
+ processingMode = this.new ProcessingModeUnsortedStreaming();
} else {
// We start in hash mode and may dynamically switch to unsorted stream mode.
processingMode = this.new ProcessingModeHashAggregate();
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 2e3bd76..0d4c1d8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1139,8 +1139,6 @@ public class Vectorizer implements PhysicalPlanResolver {
return false;
}
- boolean isMergePartial = (desc.getMode() != GroupByDesc.Mode.HASH);
-
if (!isReduce) {
// MapWork
@@ -1153,12 +1151,15 @@ public class Vectorizer implements PhysicalPlanResolver {
// ReduceWork
- if (isMergePartial) {
+ boolean isComplete = desc.getMode() == GroupByDesc.Mode.COMPLETE;
+ if (desc.getMode() != GroupByDesc.Mode.HASH) {
// Reduce Merge-Partial GROUP BY.
// A merge-partial GROUP BY is fed by grouping by keys from reduce-shuffle. It is the
// first (or root) operator for its reduce task.
+ // TODO: Technically, we should also handle FINAL, PARTIAL1, PARTIAL2 and PARTIALS
+ // that are not hash or complete, but aren't merge-partial, somehow.
if (desc.isDistinct()) {
LOG.info("Vectorized Reduce MergePartial GROUP BY does not support DISTINCT");
@@ -1174,7 +1175,7 @@ public class Vectorizer implements PhysicalPlanResolver {
}
if (hasKeys) {
- if (op.getParentOperators().size() > 0) {
+ if (op.getParentOperators().size() > 0 && !isComplete) {
LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle a key group when it is fed by reduce-shuffle");
return false;
}
@@ -1187,7 +1188,11 @@ public class Vectorizer implements PhysicalPlanResolver {
} else {
LOG.info("Vectorized Reduce MergePartial GROUP BY will do global aggregation");
}
- vectorDesc.setIsReduceMergePartial(true);
+ if (!isComplete) {
+ vectorDesc.setIsReduceMergePartial(true);
+ } else {
+ vectorDesc.setIsReduceStreaming(true);
+ }
} else {
// Reduce Hash GROUP BY or global aggregation.
@@ -1259,6 +1264,7 @@ public class Vectorizer implements PhysicalPlanResolver {
ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
boolean r = validateGenericUdf(d);
if (!r) {
+ LOG.info("Cannot vectorize UDF " + d);
return false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
index 4b2c1ad..b5d2ddf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
@@ -136,11 +136,13 @@ public class ExprNodeGenericFuncDesc extends ExprNodeDesc implements
StringBuilder sb = new StringBuilder();
sb.append(genericUDF.getClass().getSimpleName());
sb.append("(");
- for (int i = 0; i < chidren.size(); i++) {
- if (i > 0) {
- sb.append(", ");
+ if (chidren != null) {
+ for (int i = 0; i < chidren.size(); i++) {
+ if (i > 0) {
+ sb.append(", ");
+ }
+ sb.append(chidren.get(i));
}
- sb.append(chidren.get(i).toString());
}
sb.append(")");
return sb.toString();
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
index 7e791f2..e613a4e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
@@ -34,6 +34,8 @@ public class VectorGroupByDesc extends AbstractVectorDesc {
private boolean isVectorOutput;
+ private boolean isReduceStreaming;
+
public VectorGroupByDesc() {
this.isReduceMergePartial = false;
this.isVectorOutput = false;
@@ -54,4 +56,12 @@ public class VectorGroupByDesc extends AbstractVectorDesc {
public void setVectorOutput(boolean isVectorOutput) {
this.isVectorOutput = isVectorOutput;
}
+
+ public void setIsReduceStreaming(boolean isReduceStreaming) {
+ this.isReduceStreaming = isReduceStreaming;
+ }
+
+ public boolean isReduceStreaming() {
+ return isReduceStreaming;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/queries/clientpositive/vector_groupby_reduce.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_reduce.q b/ql/src/test/queries/clientpositive/vector_groupby_reduce.q
index 1438c29..8fe6b7e 100644
--- a/ql/src/test/queries/clientpositive/vector_groupby_reduce.q
+++ b/ql/src/test/queries/clientpositive/vector_groupby_reduce.q
@@ -105,12 +105,11 @@ from
group by ss_ticket_number
limit 20;
--- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently,
--- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't
--- vectorize the 2nd GROUP BY...
+
+
explain
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -118,10 +117,10 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20;
+order by m;
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -129,5 +128,54 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20;
+order by m;
+
+
+
+explain
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number;
+
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number;
+
+
+explain
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk;
+
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk;
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/queries/clientpositive/vectorization_limit.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorization_limit.q b/ql/src/test/queries/clientpositive/vectorization_limit.q
index 8799087..f261a36 100644
--- a/ql/src/test/queries/clientpositive/vectorization_limit.q
+++ b/ql/src/test/queries/clientpositive/vectorization_limit.q
@@ -23,8 +23,8 @@ select distinct(ctinyint) from alltypesorc limit 20;
select distinct(ctinyint) from alltypesorc limit 20;
explain
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20;
-- limit zero
explain
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
index 814ee39..fe7e829 100644
--- a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
@@ -321,12 +321,9 @@ POSTHOOK: Input: default@store_sales
18
19
20
-PREHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently,
--- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't
--- vectorize the 2nd GROUP BY...
-explain
+PREHOOK: query: explain
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -334,14 +331,11 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
PREHOOK: type: QUERY
-POSTHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently,
--- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't
--- vectorize the 2nd GROUP BY...
-explain
+POSTHOOK: query: explain
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -349,7 +343,7 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -360,6 +354,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -399,25 +394,34 @@ STAGE PLANS:
expressions: _col1 (type: int)
outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
- limit: 20
+ limit: -1
Processor Tree:
ListSink
PREHOOK: query: select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -425,12 +429,12 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
PREHOOK: type: QUERY
PREHOOK: Input: default@store_sales
#### A masked pattern was here ####
POSTHOOK: query: select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -438,7 +442,7 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
POSTHOOK: type: QUERY
POSTHOOK: Input: default@store_sales
#### A masked pattern was here ####
@@ -462,3 +466,1397 @@ POSTHOOK: Input: default@store_sales
18
19
20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+PREHOOK: query: explain
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2)
+ keys: _col0 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+1 85411 816
+2 157365 812
+3 147948 710
+4 69545 411
+5 163232 840
+6 86307 627
+7 114874 563
+8 117953 662
+9 173250 690
+10 60338 602
+11 138545 657
+12 97181 586
+13 109484 555
+14 137333 442
+15 176829 652
+16 115004 654
+17 105008 460
+18 165135 738
+19 128252 831
+20 104789 374
+21 72771 469
+22 128153 449
+23 110253 603
+24 100662 1029
+25 118714 760
+26 81596 502
+27 164068 871
+28 58632 409
+29 133777 417
+30 130451 772
+31 114967 586
+32 142021 592
+33 151818 691
+34 112559 662
+35 137027 780
+36 118285 538
+37 94528 401
+38 81368 521
+39 101064 937
+40 84435 480
+41 112444 688
+42 95731 840
+43 57298 410
+44 159880 839
+45 68919 474
+46 111212 374
+47 78210 416
+48 94459 445
+49 90879 589
+50 37821 407
+51 124927 612
+52 98099 489
+53 138706 609
+54 87478 354
+55 90290 406
+56 78812 372
+57 101175 597
+58 88044 202
+59 104582 753
+60 99218 900
+61 66514 392
+62 126713 527
+63 98778 648
+64 131659 380
+65 86990 494
+66 108808 492
+67 75250 711
+68 91671 548
+69 92821 405
+70 75021 319
+71 124484 748
+72 161470 744
+73 104358 621
+74 88609 688
+75 92940 649
+76 75853 580
+77 124755 873
+78 98285 573
+79 160595 581
+80 151471 704
+81 105109 429
+82 55611 254
+PREHOOK: query: explain
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+1 49 5
+1 173 65
+1 1553 50
+1 3248 58
+1 3617 79
+1 4553 100
+1 4583 72
+1 4682 44
+1 5527 88
+1 5981 14
+1 10993 91
+1 13283 37
+1 13538 14
+1 13631 99
+2 1363 4
+2 2930 36
+2 3740 49
+2 6928 65
+2 7654 25
+2 9436 79
+2 10768 30
+2 12068 74
+2 12223 78
+2 13340 71
+2 13927 93
+2 14701 58
+2 15085 88
+2 15782 62
+2 17420 NULL
+3 246 96
+3 1531 NULL
+3 3525 42
+3 4698 98
+3 5355 53
+3 10693 27
+3 12447 82
+3 13021 64
+3 14100 79
+3 14443 4
+3 15786 56
+3 16869 4
+3 17263 17
+3 17971 88
+4 163 17
+4 1576 74
+4 5350 86
+4 5515 23
+4 6988 23
+4 7990 56
+4 8452 27
+4 9685 21
+4 11036 41
+4 12790 43
+5 1808 NULL
+5 1940 60
+5 5842 50
+5 6068 76
+5 6466 36
+5 11324 52
+5 11590 15
+5 12650 66
+5 13562 64
+5 13958 60
+5 14599 83
+5 14686 91
+5 15752 66
+5 16195 50
+5 16792 71
+6 2549 62
+6 2647 100
+6 3049 31
+6 3291 100
+6 6437 72
+6 8621 NULL
+6 10355 94
+6 10895 1
+6 11705 61
+6 13245 64
+6 13513 42
+7 4627 9
+7 4795 73
+7 4833 88
+7 5183 51
+7 5905 69
+7 8955 54
+7 9751 4
+7 10487 52
+7 12571 82
+7 15179 12
+7 15333 NULL
+7 17255 69
+8 665 31
+8 4183 90
+8 5929 83
+8 7115 54
+8 11365 7
+8 11893 95
+8 12041 95
+8 13427 87
+8 16671 20
+8 17119 51
+8 17545 49
+9 69 11
+9 889 6
+9 1185 62
+9 4623 34
+9 7945 83
+9 8334 71
+9 12027 27
+9 12969 59
+9 13483 NULL
+9 13717 53
+9 15133 15
+9 16083 32
+9 16363 54
+9 16461 66
+9 16659 84
+9 17310 33
+10 755 74
+10 1425 92
+10 1511 76
+10 3433 83
+10 3933 52
+10 4357 17
+10 5863 47
+10 9811 28
+10 13803 66
+10 15447 67
+11 157 84
+11 1315 70
+11 7519 68
+11 7608 66
+11 9901 57
+11 10699 33
+11 11490 NULL
+11 11991 38
+11 12438 16
+11 15157 96
+11 15649 33
+11 17226 11
+11 17395 85
+12 373 57
+12 1591 82
+12 4888 56
+12 6148 36
+12 6248 36
+12 9616 66
+12 9788 73
+12 13399 46
+12 14746 26
+12 14944 9
+12 15440 99
+13 868 NULL
+13 1760 12
+13 1898 NULL
+13 2108 9
+13 2191 NULL
+13 4430 73
+13 5971 80
+13 6085 58
+13 6140 15
+13 6682 80
+13 7640 48
+13 7723 27
+13 10096 12
+13 11758 34
+13 16894 87
+13 17240 20
+14 177 41
+14 769 20
+14 4507 4
+14 10175 19
+14 11549 6
+14 11653 60
+14 11817 81
+14 12587 NULL
+14 13069 77
+14 13515 57
+14 13845 17
+14 16741 46
+14 16929 14
+15 4241 21
+15 4505 59
+15 4777 28
+15 7391 98
+15 8336 15
+15 8353 NULL
+15 8690 32
+15 8707 21
+15 10361 39
+15 11659 80
+15 13172 25
+15 16619 81
+15 17267 7
+15 17330 82
+15 17564 26
+15 17857 38
+16 457 60
+16 1888 4
+16 4144 94
+16 6008 59
+16 7504 51
+16 8887 35
+16 9769 42
+16 9790 17
+16 9997 94
+16 11168 86
+16 11920 29
+16 16226 13
+16 17246 70
+17 2092 37
+17 4678 34
+17 6811 70
+17 9214 57
+17 10543 54
+17 11203 21
+17 13177 45
+17 13826 32
+17 15781 76
+17 17683 34
+18 2440 40
+18 5251 41
+18 7378 94
+18 8779 9
+18 8884 18
+18 9886 62
+18 11584 76
+18 11890 7
+18 12602 81
+18 12826 93
+18 12860 18
+18 14011 95
+18 14372 76
+18 14377 15
+18 17995 13
+19 1094 48
+19 3133 96
+19 3376 84
+19 4882 84
+19 6772 97
+19 7087 1
+19 7814 29
+19 8662 97
+19 9094 49
+19 9346 39
+19 10558 82
+19 10651 46
+19 11914 59
+19 16330 NULL
+19 17539 20
+20 1451 89
+20 2618 4
+20 5312 9
+20 5425 15
+20 5483 8
+20 6026 21
+20 7207 90
+20 8714 NULL
+20 9086 4
+20 9800 32
+20 13601 17
+20 14935 NULL
+20 15131 85
+21 230 48
+21 1810 59
+21 2870 50
+21 5170 45
+21 5998 51
+21 6476 49
+21 9187 14
+21 12266 47
+21 14368 18
+21 14396 88
+22 9985 70
+22 10474 31
+22 11599 66
+22 12415 10
+22 15310 15
+22 16396 85
+22 16922 88
+22 17392 14
+22 17660 70
+23 319 86
+23 7242 37
+23 8181 13
+23 8413 1
+23 9093 38
+23 9097 81
+23 11220 91
+23 11257 64
+23 12397 80
+23 15403 96
+23 17631 16
+24 407 53
+24 1389 72
+24 1795 21
+24 2497 85
+24 3103 73
+24 4425 57
+24 4749 28
+24 4873 41
+24 5653 92
+24 6043 1
+24 6751 82
+24 7375 97
+24 10265 93
+24 11551 48
+24 13303 97
+24 16483 89
+25 1333 55
+25 2150 100
+25 2608 76
+25 3454 100
+25 4880 29
+25 5954 34
+25 6955 40
+25 7874 65
+25 9472 48
+25 10159 24
+25 14488 26
+25 14635 68
+25 17000 40
+25 17752 55
+26 1989 26
+26 5053 4
+26 5385 97
+26 5721 81
+26 6647 64
+26 7337 45
+26 9679 18
+26 11895 77
+26 12851 56
+26 15039 34
+27 1305 44
+27 2137 96
+27 2671 92
+27 5831 61
+27 7139 59
+27 8167 28
+27 10757 15
+27 11441 15
+27 11509 65
+27 12237 89
+27 12749 31
+27 13885 66
+27 15025 26
+27 16029 59
+27 16419 65
+27 16767 60
+28 1807 98
+28 2817 8
+28 2967 29
+28 4483 78
+28 5437 15
+28 6411 3
+28 7965 93
+28 8043 58
+28 8407 14
+28 10295 13
+29 20 18
+29 1363 75
+29 2930 23
+29 3740 5
+29 7654 20
+29 9458 33
+29 10795 33
+29 12068 37
+29 12223 59
+29 13340 21
+29 13693 NULL
+29 15085 40
+29 15626 NULL
+29 15782 53
+30 217 91
+30 1951 59
+30 3238 16
+30 3506 15
+30 3928 87
+30 5431 77
+30 6752 69
+30 7870 7
+30 8666 21
+30 12572 33
+30 12670 20
+30 13579 75
+30 14848 62
+30 17348 62
+30 17875 78
+31 913 54
+31 4963 67
+31 6617 11
+31 6917 4
+31 7513 82
+31 11739 95
+31 14575 97
+31 14727 41
+31 15341 31
+31 15411 53
+31 16251 51
+32 1115 61
+32 2095 34
+32 2887 8
+32 4339 6
+32 4537 22
+32 4808 NULL
+32 5798 87
+32 7547 24
+32 9683 26
+32 11005 46
+32 11348 41
+32 12134 21
+32 15001 57
+32 15644 34
+32 16421 74
+32 17659 51
+33 4798 27
+33 7300 3
+33 9649 36
+33 10376 21
+33 11119 92
+33 11756 26
+33 12643 89
+33 12760 54
+33 12964 80
+33 14125 66
+33 14158 82
+33 14692 93
+33 15478 22
+34 1526 91
+34 1717 53
+34 2312 6
+34 4118 88
+34 5197 63
+34 5449 9
+34 6193 61
+34 9325 3
+34 9766 83
+34 12016 42
+34 12290 53
+34 12512 60
+34 13814 20
+34 16324 30
+35 411 51
+35 2377 52
+35 3667 97
+35 4325 56
+35 5179 83
+35 11635 87
+35 11661 81
+35 14239 55
+35 15619 45
+35 15757 9
+35 17341 92
+35 17365 65
+35 17451 7
+36 1115 80
+36 2095 43
+36 2887 31
+36 7547 46
+36 11005 49
+36 11349 80
+36 15001 54
+36 15645 23
+36 16421 25
+36 17561 16
+36 17659 91
+37 2997 94
+37 7283 87
+37 10715 52
+37 10929 88
+37 13171 6
+37 15337 62
+37 16971 12
+37 17125 NULL
+38 757 2
+38 2164 17
+38 3439 84
+38 4154 35
+38 5113 73
+38 6220 98
+38 7018 15
+38 7784 56
+38 8870 15
+38 9710 7
+38 10441 62
+38 15698 57
+39 386 89
+39 1598 64
+39 3476 73
+39 3943 64
+39 4190 86
+39 4957 24
+39 5393 98
+39 7097 78
+39 7118 67
+39 7604 49
+39 7697 24
+39 8078 54
+39 8411 96
+39 15491 54
+39 15625 17
+40 2854 71
+40 3490 65
+40 3985 63
+40 5098 35
+40 5318 87
+40 10094 80
+40 10912 23
+40 12050 NULL
+40 13658 53
+40 16976 3
+41 10 50
+41 64 29
+41 3380 88
+41 5566 11
+41 6310 90
+41 7402 69
+41 7603 94
+41 9322 8
+41 10915 81
+41 14788 15
+41 15242 87
+41 15328 46
+41 16514 20
+42 619 69
+42 976 100
+42 1436 94
+42 2314 74
+42 2392 14
+42 2602 30
+42 3346 74
+42 3613 30
+42 6058 30
+42 6134 92
+42 8462 23
+42 9740 52
+42 10016 57
+42 10471 19
+42 12550 41
+42 15002 41
+43 2923 16
+43 3344 22
+43 3911 26
+43 4364 77
+43 4691 41
+43 5773 85
+43 5852 16
+43 11771 30
+43 14669 97
+44 2351 56
+44 2623 18
+44 7303 14
+44 7527 67
+44 9059 68
+44 11707 83
+44 12341 20
+44 13331 98
+44 13449 45
+44 14149 80
+44 15803 81
+44 16491 56
+44 16837 92
+44 16909 61
+45 811 62
+45 1479 49
+45 3265 98
+45 5309 18
+45 7363 87
+45 10115 68
+45 11095 40
+45 13133 46
+45 16349 6
+46 1960 12
+46 3010 67
+46 7040 33
+46 8065 NULL
+46 11426 72
+46 13042 58
+46 15595 32
+46 16540 30
+46 17150 57
+46 17384 13
+47 254 NULL
+47 481 30
+47 1132 66
+47 1916 71
+47 3085 51
+47 3202 7
+47 3878 NULL
+47 4774 11
+47 5008 82
+47 5305 NULL
+47 5468 7
+47 7214 1
+47 9770 33
+47 13246 47
+47 13477 10
+48 1761 22
+48 2820 4
+48 2829 65
+48 4431 39
+48 5971 29
+48 6085 1
+48 6684 44
+48 9199 88
+48 11259 NULL
+48 12468 62
+48 13153 74
+48 17799 17
+49 749 60
+49 2135 4
+49 5342 69
+49 5852 47
+49 6805 40
+49 7141 94
+49 9049 68
+49 9553 71
+49 12737 48
+49 15155 84
+49 16361 4
+50 1280 69
+50 1312 30
+50 1909 53
+50 1984 40
+50 3097 64
+50 5023 NULL
+50 7135 69
+50 16081 82
+51 422 21
+51 3091 28
+51 4687 6
+51 5029 12
+51 5059 51
+51 6565 33
+51 8384 79
+51 9311 90
+51 10133 54
+51 11234 NULL
+51 12625 53
+51 13199 97
+51 17483 22
+51 17705 66
+52 2420 90
+52 3334 73
+52 6098 NULL
+52 7606 45
+52 11488 76
+52 15649 29
+52 16646 48
+52 17402 91
+52 17456 37
+53 1114 40
+53 2095 62
+53 2786 70
+53 2887 39
+53 7546 58
+53 11348 38
+53 13220 76
+53 13795 38
+53 15991 37
+53 16420 14
+53 16648 79
+53 17296 43
+53 17560 15
+54 702 40
+54 825 50
+54 1165 62
+54 3861 NULL
+54 6517 40
+54 9159 75
+54 14737 38
+54 16059 15
+54 16974 NULL
+54 17479 34
+55 1339 16
+55 3001 7
+55 5137 33
+55 9703 44
+55 12170 92
+55 12205 90
+55 14135 36
+55 14923 71
+55 17677 17
+56 4242 2
+56 4506 57
+56 8353 35
+56 8691 59
+56 8707 68
+56 10362 54
+56 16620 23
+56 17331 74
+57 3253 71
+57 4028 88
+57 4933 22
+57 12596 91
+57 12721 62
+57 12740 52
+57 15182 86
+57 17729 26
+57 17993 99
+58 1829 52
+58 3848 6
+58 5117 2
+58 7649 19
+58 9743 62
+58 10802 14
+58 15635 6
+58 16472 6
+58 16949 35
+59 3133 92
+59 3546 22
+59 5772 70
+59 7087 80
+59 8010 46
+59 8335 36
+59 9348 62
+59 9397 92
+59 10651 100
+59 11916 19
+59 12858 90
+59 14529 44
+60 97 50
+60 555 62
+60 633 71
+60 999 43
+60 1117 78
+60 1573 90
+60 4041 25
+60 4235 28
+60 4513 72
+60 4937 22
+60 7231 95
+60 10277 62
+60 10393 75
+60 13975 14
+60 16887 25
+60 17755 88
+61 1106 4
+61 2264 36
+61 3362 48
+61 4567 26
+61 5528 78
+61 6380 77
+61 7591 78
+61 8924 11
+61 10330 8
+61 16462 26
+62 4093 94
+62 6403 NULL
+62 8457 37
+62 10149 75
+62 12163 29
+62 12199 5
+62 12407 NULL
+62 13559 80
+62 15399 74
+62 15733 40
+62 16151 93
+63 4488 73
+63 5079 79
+63 5217 66
+63 5658 99
+63 9319 80
+63 11370 38
+63 11946 85
+63 13339 19
+63 15793 40
+63 16569 69
+64 1213 NULL
+64 3090 87
+64 3963 NULL
+64 11835 82
+64 13224 NULL
+64 14407 8
+64 15867 59
+64 15936 30
+64 16921 19
+64 17586 78
+64 17617 17
+65 2287 100
+65 4227 42
+65 9625 51
+65 9847 54
+65 13897 40
+65 14905 85
+65 15177 55
+65 17025 67
+66 6507 76
+66 7033 65
+66 7227 66
+66 8197 41
+66 9237 29
+66 10019 10
+66 11419 66
+66 15629 20
+66 16745 91
+66 16795 28
+67 757 77
+67 2133 74
+67 3439 73
+67 4155 87
+67 5113 NULL
+67 7020 79
+67 7507 77
+67 8469 59
+67 8871 71
+67 12087 70
+67 15699 44
+68 1387 74
+68 1603 57
+68 1820 54
+68 2035 22
+68 2296 52
+68 2564 83
+68 5162 23
+68 6763 77
+68 7765 NULL
+68 12526 3
+68 12724 88
+68 17426 2
+68 17600 13
+69 322 45
+69 337 34
+69 4208 9
+69 4267 10
+69 6136 7
+69 7264 67
+69 7822 30
+69 8599 53
+69 11137 68
+69 13489 66
+69 13792 NULL
+69 15448 16
+70 1592 53
+70 2462 NULL
+70 3296 48
+70 3947 NULL
+70 6185 82
+70 6425 NULL
+70 8893 17
+70 9857 20
+70 14549 4
+70 17815 95
+71 457 75
+71 1888 4
+71 2098 51
+71 4144 49
+71 5858 NULL
+71 6008 54
+71 7504 3
+71 8887 10
+71 9274 36
+71 9769 79
+71 9790 96
+71 9997 26
+71 10108 66
+71 10288 30
+71 11168 79
+71 17246 90
+72 1535 9
+72 5917 85
+72 6113 45
+72 6671 13
+72 9860 26
+72 10427 66
+72 10753 16
+72 11741 62
+72 12788 29
+72 12901 57
+72 13085 94
+72 13423 62
+72 13904 37
+72 15587 87
+72 16765 56
+73 247 53
+73 1063 37
+73 3205 82
+73 4946 54
+73 6862 58
+73 10051 49
+73 12502 75
+73 15109 38
+73 16519 97
+73 16585 38
+73 17269 40
+74 326 29
+74 3104 78
+74 3175 23
+74 3278 NULL
+74 3542 96
+74 3754 26
+74 5492 54
+74 7694 17
+74 8653 12
+74 9620 95
+74 10069 99
+74 13208 87
+74 16694 72
+75 607 20
+75 2948 25
+75 4625 73
+75 6938 89
+75 6953 71
+75 8726 6
+75 9905 54
+75 10217 85
+75 11039 70
+75 14186 63
+75 16796 93
+76 257 5
+76 465 2
+76 1107 16
+76 1503 97
+76 2265 98
+76 2869 32
+76 3363 25
+76 4237 48
+76 4567 40
+76 5529 78
+76 6381 50
+76 7591 27
+76 8925 6
+76 10331 3
+76 16463 53
+77 992 62
+77 1399 34
+77 2713 85
+77 3868 89
+77 6289 30
+77 7339 88
+77 7448 95
+77 7486 49
+77 8686 38
+77 9220 90
+77 11918 36
+77 12439 95
+77 13456 48
+77 14815 18
+77 16687 16
+78 901 3
+78 3304 50
+78 3856 27
+78 5965 78
+78 6044 59
+78 6110 43
+78 6500 76
+78 7576 87
+78 8611 79
+78 10507 6
+78 11209 7
+78 12706 19
+78 14996 39
+79 247 NULL
+79 1063 85
+79 3205 48
+79 4947 35
+79 6864 1
+79 10051 10
+79 10524 36
+79 12504 81
+79 14322 41
+79 15109 NULL
+79 15498 3
+79 15888 58
+79 16519 9
+79 16585 93
+79 17269 81
+80 998 93
+80 1519 25
+80 1573 40
+80 4040 66
+80 4513 NULL
+80 4622 1
+80 7231 49
+80 7610 37
+80 10393 5
+80 12968 NULL
+80 13717 91
+80 13975 13
+80 16363 84
+80 16886 77
+80 17308 29
+80 17755 94
+81 4486 31
+81 5078 75
+81 5216 64
+81 5656 24
+81 7166 7
+81 7663 79
+81 8918 37
+81 9319 36
+81 11107 36
+81 11368 26
+81 13339 6
+81 15793 8
+82 2572 53
+82 7862 75
+82 13138 59
+82 14998 49
+82 17041 18
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
index 33f7ed9..fec2d2c 100644
--- a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
@@ -319,10 +319,10 @@ NULL
-47
-46
PREHOOK: query: explain
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -385,11 +385,11 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
index 331ba4f..fc1997c 100644
--- a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
+++ b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
@@ -314,12 +314,9 @@ POSTHOOK: Input: default@store_sales
18
19
20
-PREHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently,
--- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't
--- vectorize the 2nd GROUP BY...
-explain
+PREHOOK: query: explain
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -327,14 +324,11 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
PREHOOK: type: QUERY
-POSTHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently,
--- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't
--- vectorize the 2nd GROUP BY...
-explain
+POSTHOOK: query: explain
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -342,11 +336,12 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -386,25 +381,42 @@ STAGE PLANS:
expressions: _col1 (type: int)
outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
- limit: 20
+ limit: -1
Processor Tree:
ListSink
PREHOOK: query: select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -412,12 +424,12 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
PREHOOK: type: QUERY
PREHOOK: Input: default@store_sales
#### A masked pattern was here ####
POSTHOOK: query: select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -425,7 +437,7 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
POSTHOOK: type: QUERY
POSTHOOK: Input: default@store_sales
#### A masked pattern was here ####
@@ -449,3 +461,1401 @@ POSTHOOK: Input: default@store_sales
18
19
20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+PREHOOK: query: explain
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2)
+ keys: _col0 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+1 85411 816
+2 157365 812
+3 147948 710
+4 69545 411
+5 163232 840
+6 86307 627
+7 114874 563
+8 117953 662
+9 173250 690
+10 60338 602
+11 138545 657
+12 97181 586
+13 109484 555
+14 137333 442
+15 176829 652
+16 115004 654
+17 105008 460
+18 165135 738
+19 128252 831
+20 104789 374
+21 72771 469
+22 128153 449
+23 110253 603
+24 100662 1029
+25 118714 760
+26 81596 502
+27 164068 871
+28 58632 409
+29 133777 417
+30 130451 772
+31 114967 586
+32 142021 592
+33 151818 691
+34 112559 662
+35 137027 780
+36 118285 538
+37 94528 401
+38 81368 521
+39 101064 937
+40 84435 480
+41 112444 688
+42 95731 840
+43 57298 410
+44 159880 839
+45 68919 474
+46 111212 374
+47 78210 416
+48 94459 445
+49 90879 589
+50 37821 407
+51 124927 612
+52 98099 489
+53 138706 609
+54 87478 354
+55 90290 406
+56 78812 372
+57 101175 597
+58 88044 202
+59 104582 753
+60 99218 900
+61 66514 392
+62 126713 527
+63 98778 648
+64 131659 380
+65 86990 494
+66 108808 492
+67 75250 711
+68 91671 548
+69 92821 405
+70 75021 319
+71 124484 748
+72 161470 744
+73 104358 621
+74 88609 688
+75 92940 649
+76 75853 580
+77 124755 873
+78 98285 573
+79 160595 581
+80 151471 704
+81 105109 429
+82 55611 254
+PREHOOK: query: explain
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+1 49 5
+1 173 65
+1 1553 50
+1 3248 58
+1 3617 79
+1 4553 100
+1 4583 72
+1 4682 44
+1 5527 88
+1 5981 14
+1 10993 91
+1 13283 37
+1 13538 14
+1 13631 99
+2 1363 4
+2 2930 36
+2 3740 49
+2 6928 65
+2 7654 25
+2 9436 79
+2 10768 30
+2 12068 74
+2 12223 78
+2 13340 71
+2 13927 93
+2 14701 58
+2 15085 88
+2 15782 62
+2 17420 NULL
+3 246 96
+3 1531 NULL
+3 3525 42
+3 4698 98
+3 5355 53
+3 10693 27
+3 12447 82
+3 13021 64
+3 14100 79
+3 14443 4
+3 15786 56
+3 16869 4
+3 17263 17
+3 17971 88
+4 163 17
+4 1576 74
+4 5350 86
+4 5515 23
+4 6988 23
+4 7990 56
+4 8452 27
+4 9685 21
+4 11036 41
+4 12790 43
+5 1808 NULL
+5 1940 60
+5 5842 50
+5 6068 76
+5 6466 36
+5 11324 52
+5 11590 15
+5 12650 66
+5 13562 64
+5 13958 60
+5 14599 83
+5 14686 91
+5 15752 66
+5 16195 50
+5 16792 71
+6 2549 62
+6 2647 100
+6 3049 31
+6 3291 100
+6 6437 72
+6 8621 NULL
+6 10355 94
+6 10895 1
+6 11705 61
+6 13245 64
+6 13513 42
+7 4627 9
+7 4795 73
+7 4833 88
+7 5183 51
+7 5905 69
+7 8955 54
+7 9751 4
+7 10487 52
+7 12571 82
+7 15179 12
+7 15333 NULL
+7 17255 69
+8 665 31
+8 4183 90
+8 5929 83
+8 7115 54
+8 11365 7
+8 11893 95
+8 12041 95
+8 13427 87
+8 16671 20
+8 17119 51
+8 17545 49
+9 69 11
+9 889 6
+9 1185 62
+9 4623 34
+9 7945 83
+9 8334 71
+9 12027 27
+9 12969 59
+9 13483 NULL
+9 13717 53
+9 15133 15
+9 16083 32
+9 16363 54
+9 16461 66
+9 16659 84
+9 17310 33
+10 755 74
+10 1425 92
+10 1511 76
+10 3433 83
+10 3933 52
+10 4357 17
+10 5863 47
+10 9811 28
+10 13803 66
+10 15447 67
+11 157 84
+11 1315 70
+11 7519 68
+11 7608 66
+11 9901 57
+11 10699 33
+11 11490 NULL
+11 11991 38
+11 12438 16
+11 15157 96
+11 15649 33
+11 17226 11
+11 17395 85
+12 373 57
+12 1591 82
+12 4888 56
+12 6148 36
+12 6248 36
+12 9616 66
+12 9788 73
+12 13399 46
+12 14746 26
+12 14944 9
+12 15440 99
+13 868 NULL
+13 1760 12
+13 1898 NULL
+13 2108 9
+13 2191 NULL
+13 4430 73
+13 5971 80
+13 6085 58
+13 6140 15
+13 6682 80
+13 7640 48
+13 7723 27
+13 10096 12
+13 11758 34
+13 16894 87
+13 17240 20
+14 177 41
+14 769 20
+14 4507 4
+14 10175 19
+14 11549 6
+14 11653 60
+14 11817 81
+14 12587 NULL
+14 13069 77
+14 13515 57
+14 13845 17
+14 16741 46
+14 16929 14
+15 4241 21
+15 4505 59
+15 4777 28
+15 7391 98
+15 8336 15
+15 8353 NULL
+15 8690 32
+15 8707 21
+15 10361 39
+15 11659 80
+15 13172 25
+15 16619 81
+15 17267 7
+15 17330 82
+15 17564 26
+15 17857 38
+16 457 60
+16 1888 4
+16 4144 94
+16 6008 59
+16 7504 51
+16 8887 35
+16 9769 42
+16 9790 17
+16 9997 94
+16 11168 86
+16 11920 29
+16 16226 13
+16 17246 70
+17 2092 37
+17 4678 34
+17 6811 70
+17 9214 57
+17 10543 54
+17 11203 21
+17 13177 45
+17 13826 32
+17 15781 76
+17 17683 34
+18 2440 40
+18 5251 41
+18 7378 94
+18 8779 9
+18 8884 18
+18 9886 62
+18 11584 76
+18 11890 7
+18 12602 81
+18 12826 93
+18 12860 18
+18 14011 95
+18 14372 76
+18 14377 15
+18 17995 13
+19 1094 48
+19 3133 96
+19 3376 84
+19 4882 84
+19 6772 97
+19 7087 1
+19 7814 29
+19 8662 97
+19 9094 49
+19 9346 39
+19 10558 82
+19 10651 46
+19 11914 59
+19 16330 NULL
+19 17539 20
+20 1451 89
+20 2618 4
+20 5312 9
+20 5425 15
+20 5483 8
+20 6026 21
+20 7207 90
+20 8714 NULL
+20 9086 4
+20 9800 32
+20 13601 17
+20 14935 NULL
+20 15131 85
+21 230 48
+21 1810 59
+21 2870 50
+21 5170 45
+21 5998 51
+21 6476 49
+21 9187 14
+21 12266 47
+21 14368 18
+21 14396 88
+22 9985 70
+22 10474 31
+22 11599 66
+22 12415 10
+22 15310 15
+22 16396 85
+22 16922 88
+22 17392 14
+22 17660 70
+23 319 86
+23 7242 37
+23 8181 13
+23 8413 1
+23 9093 38
+23 9097 81
+23 11220 91
+23 11257 64
+23 12397 80
+23 15403 96
+23 17631 16
+24 407 53
+24 1389 72
+24 1795 21
+24 2497 85
+24 3103 73
+24 4425 57
+24 4749 28
+24 4873 41
+24 5653 92
+24 6043 1
+24 6751 82
+24 7375 97
+24 10265 93
+24 11551 48
+24 13303 97
+24 16483 89
+25 1333 55
+25 2150 100
+25 2608 76
+25 3454 100
+25 4880 29
+25 5954 34
+25 6955 40
+25 7874 65
+25 9472 48
+25 10159 24
+25 14488 26
+25 14635 68
+25 17000 40
+25 17752 55
+26 1989 26
+26 5053 4
+26 5385 97
+26 5721 81
+26 6647 64
+26 7337 45
+26 9679 18
+26 11895 77
+26 12851 56
+26 15039 34
+27 1305 44
+27 2137 96
+27 2671 92
+27 5831 61
+27 7139 59
+27 8167 28
+27 10757 15
+27 11441 15
+27 11509 65
+27 12237 89
+27 12749 31
+27 13885 66
+27 15025 26
+27 16029 59
+27 16419 65
+27 16767 60
+28 1807 98
+28 2817 8
+28 2967 29
+28 4483 78
+28 5437 15
+28 6411 3
+28 7965 93
+28 8043 58
+28 8407 14
+28 10295 13
+29 20 18
+29 1363 75
+29 2930 23
+29 3740 5
+29 7654 20
+29 9458 33
+29 10795 33
+29 12068 37
+29 12223 59
+29 13340 21
+29 13693 NULL
+29 15085 40
+29 15626 NULL
+29 15782 53
+30 217 91
+30 1951 59
+30 3238 16
+30 3506 15
+30 3928 87
+30 5431 77
+30 6752 69
+30 7870 7
+30 8666 21
+30 12572 33
+30 12670 20
+30 13579 75
+30 14848 62
+30 17348 62
+30 17875 78
+31 913 54
+31 4963 67
+31 6617 11
+31 6917 4
+31 7513 82
+31 11739 95
+31 14575 97
+31 14727 41
+31 15341 31
+31 15411 53
+31 16251 51
+32 1115 61
+32 2095 34
+32 2887 8
+32 4339 6
+32 4537 22
+32 4808 NULL
+32 5798 87
+32 7547 24
+32 9683 26
+32 11005 46
+32 11348 41
+32 12134 21
+32 15001 57
+32 15644 34
+32 16421 74
+32 17659 51
+33 4798 27
+33 7300 3
+33 9649 36
+33 10376 21
+33 11119 92
+33 11756 26
+33 12643 89
+33 12760 54
+33 12964 80
+33 14125 66
+33 14158 82
+33 14692 93
+33 15478 22
+34 1526 91
+34 1717 53
+34 2312 6
+34 4118 88
+34 5197 63
+34 5449 9
+34 6193 61
+34 9325 3
+34 9766 83
+34 12016 42
+34 12290 53
+34 12512 60
+34 13814 20
+34 16324 30
+35 411 51
+35 2377 52
+35 3667 97
+35 4325 56
+35 5179 83
+35 11635 87
+35 11661 81
+35 14239 55
+35 15619 45
+35 15757 9
+35 17341 92
+35 17365 65
+35 17451 7
+36 1115 80
+36 2095 43
+36 2887 31
+36 7547 46
+36 11005 49
+36 11349 80
+36 15001 54
+36 15645 23
+36 16421 25
+36 17561 16
+36 17659 91
+37 2997 94
+37 7283 87
+37 10715 52
+37 10929 88
+37 13171 6
+37 15337 62
+37 16971 12
+37 17125 NULL
+38 757 2
+38 2164 17
+38 3439 84
+38 4154 35
+38 5113 73
+38 6220 98
+38 7018 15
+38 7784 56
+38 8870 15
+38 9710 7
+38 10441 62
+38 15698 57
+39 386 89
+39 1598 64
+39 3476 73
+39 3943 64
+39 4190 86
+39 4957 24
+39 5393 98
+39 7097 78
+39 7118 67
+39 7604 49
+39 7697 24
+39 8078 54
+39 8411 96
+39 15491 54
+39 15625 17
+40 2854 71
+40 3490 65
+40 3985 63
+40 5098 35
+40 5318 87
+40 10094 80
+40 10912 23
+40 12050 NULL
+40 13658 53
+40 16976 3
+41 10 50
+41 64 29
+41 3380 88
+41 5566 11
+41 6310 90
+41 7402 69
+41 7603 94
+41 9322 8
+41 10915 81
+41 14788 15
+41 15242 87
+41 15328 46
+41 16514 20
+42 619 69
+42 976 100
+42 1436 94
+42 2314 74
+42 2392 14
+42 2602 30
+42 3346 74
+42 3613 30
+42 6058 30
+42 6134 92
+42 8462 23
+42 9740 52
+42 10016 57
+42 10471 19
+42 12550 41
+42 15002 41
+43 2923 16
+43 3344 22
+43 3911 26
+43 4364 77
+43 4691 41
+43 5773 85
+43 5852 16
+43 11771 30
+43 14669 97
+44 2351 56
+44 2623 18
+44 7303 14
+44 7527 67
+44 9059 68
+44 11707 83
+44 12341 20
+44 13331 98
+44 13449 45
+44 14149 80
+44 15803 81
+44 16491 56
+44 16837 92
+44 16909 61
+45 811 62
+45 1479 49
+45 3265 98
+45 5309 18
+45 7363 87
+45 10115 68
+45 11095 40
+45 13133 46
+45 16349 6
+46 1960 12
+46 3010 67
+46 7040 33
+46 8065 NULL
+46 11426 72
+46 13042 58
+46 15595 32
+46 16540 30
+46 17150 57
+46 17384 13
+47 254 NULL
+47 481 30
+47 1132 66
+47 1916 71
+47 3085 51
+47 3202 7
+47 3878 NULL
+47 4774 11
+47 5008 82
+47 5305 NULL
+47 5468 7
+47 7214 1
+47 9770 33
+47 13246 47
+47 13477 10
+48 1761 22
+48 2820 4
+48 2829 65
+48 4431 39
+48 5971 29
+48 6085 1
+48 6684 44
+48 9199 88
+48 11259 NULL
+48 12468 62
+48 13153 74
+48 17799 17
+49 749 60
+49 2135 4
+49 5342 69
+49 5852 47
+49 6805 40
+49 7141 94
+49 9049 68
+49 9553 71
+49 12737 48
+49 15155 84
+49 16361 4
+50 1280 69
+50 1312 30
+50 1909 53
+50 1984 40
+50 3097 64
+50 5023 NULL
+50 7135 69
+50 16081 82
+51 422 21
+51 3091 28
+51 4687 6
+51 5029 12
+51 5059 51
+51 6565 33
+51 8384 79
+51 9311 90
+51 10133 54
+51 11234 NULL
+51 12625 53
+51 13199 97
+51 17483 22
+51 17705 66
+52 2420 90
+52 3334 73
+52 6098 NULL
+52 7606 45
+52 11488 76
+52 15649 29
+52 16646 48
+52 17402 91
+52 17456 37
+53 1114 40
+53 2095 62
+53 2786 70
+53 2887 39
+53 7546 58
+53 11348 38
+53 13220 76
+53 13795 38
+53 15991 37
+53 16420 14
+53 16648 79
+53 17296 43
+53 17560 15
+54 702 40
+54 825 50
+54 1165 62
+54 3861 NULL
+54 6517 40
+54 9159 75
+54 14737 38
+54 16059 15
+54 16974 NULL
+54 17479 34
+55 1339 16
+55 3001 7
+55 5137 33
+55 9703 44
+55 12170 92
+55 12205 90
+55 14135 36
+55 14923 71
+55 17677 17
+56 4242 2
+56 4506 57
+56 8353 35
+56 8691 59
+56 8707 68
+56 10362 54
+56 16620 23
+56 17331 74
+57 3253 71
+57 4028 88
+57 4933 22
+57 12596 91
+57 12721 62
+57 12740 52
+57 15182 86
+57 17729 26
+57 17993 99
+58 1829 52
+58 3848 6
+58 5117 2
+58 7649 19
+58 9743 62
+58 10802 14
+58 15635 6
+58 16472 6
+58 16949 35
+59 3133 92
+59 3546 22
+59 5772 70
+59 7087 80
+59 8010 46
+59 8335 36
+59 9348 62
+59 9397 92
+59 10651 100
+59 11916 19
+59 12858 90
+59 14529 44
+60 97 50
+60 555 62
+60 633 71
+60 999 43
+60 1117 78
+60 1573 90
+60 4041 25
+60 4235 28
+60 4513 72
+60 4937 22
+60 7231 95
+60 10277 62
+60 10393 75
+60 13975 14
+60 16887 25
+60 17755 88
+61 1106 4
+61 2264 36
+61 3362 48
+61 4567 26
+61 5528 78
+61 6380 77
+61 7591 78
+61 8924 11
+61 10330 8
+61 16462 26
+62 4093 94
+62 6403 NULL
+62 8457 37
+62 10149 75
+62 12163 29
+62 12199 5
+62 12407 NULL
+62 13559 80
+62 15399 74
+62 15733 40
+62 16151 93
+63 4488 73
+63 5079 79
+63 5217 66
+63 5658 99
+63 9319 80
+63 11370 38
+63 11946 85
+63 13339 19
+63 15793 40
+63 16569 69
+64 1213 NULL
+64 3090 87
+64 3963 NULL
+64 11835 82
+64 13224 NULL
+64 14407 8
+64 15867 59
+64 15936 30
+64 16921 19
+64 17586 78
+64 17617 17
+65 2287 100
+65 4227 42
+65 9625 51
+65 9847 54
+65 13897 40
+65 14905 85
+65 15177 55
+65 17025 67
+66 6507 76
+66 7033 65
+66 7227 66
+66 8197 41
+66 9237 29
+66 10019 10
+66 11419 66
+66 15629 20
+66 16745 91
+66 16795 28
+67 757 77
+67 2133 74
+67 3439 73
+67 4155 87
+67 5113 NULL
+67 7020 79
+67 7507 77
+67 8469 59
+67 8871 71
+67 12087 70
+67 15699 44
+68 1387 74
+68 1603 57
+68 1820 54
+68 2035 22
+68 2296 52
+68 2564 83
+68 5162 23
+68 6763 77
+68 7765 NULL
+68 12526 3
+68 12724 88
+68 17426 2
+68 17600 13
+69 322 45
+69 337 34
+69 4208 9
+69 4267 10
+69 6136 7
+69 7264 67
+69 7822 30
+69 8599 53
+69 11137 68
+69 13489 66
+69 13792 NULL
+69 15448 16
+70 1592 53
+70 2462 NULL
+70 3296 48
+70 3947 NULL
+70 6185 82
+70 6425 NULL
+70 8893 17
+70 9857 20
+70 14549 4
+70 17815 95
+71 457 75
+71 1888 4
+71 2098 51
+71 4144 49
+71 5858 NULL
+71 6008 54
+71 7504 3
+71 8887 10
+71 9274 36
+71 9769 79
+71 9790 96
+71 9997 26
+71 10108 66
+71 10288 30
+71 11168 79
+71 17246 90
+72 1535 9
+72 5917 85
+72 6113 45
+72 6671 13
+72 9860 26
+72 10427 66
+72 10753 16
+72 11741 62
+72 12788 29
+72 12901 57
+72 13085 94
+72 13423 62
+72 13904 37
+72 15587 87
+72 16765 56
+73 247 53
+73 1063 37
+73 3205 82
+73 4946 54
+73 6862 58
+73 10051 49
+73 12502 75
+73 15109 38
+73 16519 97
+73 16585 38
+73 17269 40
+74 326 29
+74 3104 78
+74 3175 23
+74 3278 NULL
+74 3542 96
+74 3754 26
+74 5492 54
+74 7694 17
+74 8653 12
+74 9620 95
+74 10069 99
+74 13208 87
+74 16694 72
+75 607 20
+75 2948 25
+75 4625 73
+75 6938 89
+75 6953 71
+75 8726 6
+75 9905 54
+75 10217 85
+75 11039 70
+75 14186 63
+75 16796 93
+76 257 5
+76 465 2
+76 1107 16
+76 1503 97
+76 2265 98
+76 2869 32
+76 3363 25
+76 4237 48
+76 4567 40
+76 5529 78
+76 6381 50
+76 7591 27
+76 8925 6
+76 10331 3
+76 16463 53
+77 992 62
+77 1399 34
+77 2713 85
+77 3868 89
+77 6289 30
+77 7339 88
+77 7448 95
+77 7486 49
+77 8686 38
+77 9220 90
+77 11918 36
+77 12439 95
+77 13456 48
+77 14815 18
+77 16687 16
+78 901 3
+78 3304 50
+78 3856 27
+78 5965 78
+78 6044 59
+78 6110 43
+78 6500 76
+78 7576 87
+78 8611 79
+78 10507 6
+78 11209 7
+78 12706 19
+78 14996 39
+79 247 NULL
+79 1063 85
+79 3205 48
+79 4947 35
+79 6864 1
+79 10051 10
+79 10524 36
+79 12504 81
+79 14322 41
+79 15109 NULL
+79 15498 3
+79 15888 58
+79 16519 9
+79 16585 93
+79 17269 81
+80 998 93
+80 1519 25
+80 1573 40
+80 4040 66
+80 4513 NULL
+80 4622 1
+80 7231 49
+80 7610 37
+80 10393 5
+80 12968 NULL
+80 13717 91
+80 13975 13
+80 16363 84
+80 16886 77
+80 17308 29
+80 17755 94
+81 4486 31
+81 5078 75
+81 5216 64
+81 5656 24
+81 7166 7
+81 7663 79
+81 8918 37
+81 9319 36
+81 11107 36
+81 11368 26
+81 13339 6
+81 15793 8
+82 2572 53
+82 7862 75
+82 13138 59
+82 14998 49
+82 17041 18
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/results/clientpositive/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out b/ql/src/test/results/clientpositive/vectorization_limit.q.out
index 9ff888c..2400baa 100644
--- a/ql/src/test/results/clientpositive/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out
@@ -316,10 +316,10 @@ NULL
-47
-46
PREHOOK: query: explain
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -373,11 +373,11 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
[10/50] [abbrv] hive git commit: HIVE-11846: CliDriver shutdown tries
to drop index table again which was already dropped when dropping the
original table (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by xu...@apache.org.
HIVE-11846: CliDriver shutdown tries to drop index table again which was already dropped when dropping the original table (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/68c0e999
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/68c0e999
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/68c0e999
Branch: refs/heads/beeline-cli
Commit: 68c0e9993c6aee85d50ce1dc8974916b1e073f67
Parents: b934a80
Author: Pengcheng Xiong <px...@apache.org>
Authored: Fri Sep 18 10:29:52 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Fri Sep 18 10:29:52 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/QTestUtil.java | 9 +-
.../clientpositive/drop_table_with_index.q | 35 +++++
.../clientpositive/drop_table_with_index.q.out | 152 +++++++++++++++++++
3 files changed, 195 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/68c0e999/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
----------------------------------------------------------------------
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
index 3fae0ba..f23bf2b 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
@@ -84,6 +84,7 @@ import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl;
import org.apache.hadoop.hive.ql.lockmgr.zookeeper.CuratorFrameworkSingleton;
import org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
@@ -670,7 +671,13 @@ public class QTestUtil {
SessionState.get().setCurrentDatabase(dbName);
for (String tblName : db.getAllTables()) {
if (!DEFAULT_DATABASE_NAME.equals(dbName) || !srcTables.contains(tblName)) {
- Table tblObj = db.getTable(tblName);
+ Table tblObj = null;
+ try {
+ tblObj = db.getTable(tblName);
+ } catch (InvalidTableException e) {
+ LOG.warn("Trying to drop table " + e.getTableName() + ". But it does not exist.");
+ continue;
+ }
// dropping index table can not be dropped directly. Dropping the base
// table will automatically drop all its index table
if(tblObj.isIndexTable()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/68c0e999/ql/src/test/queries/clientpositive/drop_table_with_index.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/drop_table_with_index.q b/ql/src/test/queries/clientpositive/drop_table_with_index.q
new file mode 100644
index 0000000..1790664
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/drop_table_with_index.q
@@ -0,0 +1,35 @@
+set hive.stats.dbclass=fs;
+set hive.stats.autogather=true;
+set hive.cbo.enable=true;
+
+DROP TABLE IF EXISTS aa;
+CREATE TABLE aa (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY DOUBLE,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DOUBLE,
+ L_RETURNFLAG STRING,
+ L_LINESTATUS STRING,
+ l_shipdate STRING,
+ L_COMMITDATE STRING,
+ L_RECEIPTDATE STRING,
+ L_SHIPINSTRUCT STRING,
+ L_SHIPMODE STRING,
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE aa;
+
+CREATE INDEX aa_lshipdate_idx ON TABLE aa(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)");
+ALTER INDEX aa_lshipdate_idx ON aa REBUILD;
+
+show tables;
+
+explain select l_shipdate, count(l_shipdate)
+from aa
+group by l_shipdate;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/68c0e999/ql/src/test/results/clientpositive/drop_table_with_index.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/drop_table_with_index.q.out b/ql/src/test/results/clientpositive/drop_table_with_index.q.out
new file mode 100644
index 0000000..d1b0d6d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/drop_table_with_index.q.out
@@ -0,0 +1,152 @@
+PREHOOK: query: DROP TABLE IF EXISTS aa
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS aa
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE aa (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY DOUBLE,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DOUBLE,
+ L_RETURNFLAG STRING,
+ L_LINESTATUS STRING,
+ l_shipdate STRING,
+ L_COMMITDATE STRING,
+ L_RECEIPTDATE STRING,
+ L_SHIPINSTRUCT STRING,
+ L_SHIPMODE STRING,
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@aa
+POSTHOOK: query: CREATE TABLE aa (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY DOUBLE,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DOUBLE,
+ L_RETURNFLAG STRING,
+ L_LINESTATUS STRING,
+ l_shipdate STRING,
+ L_COMMITDATE STRING,
+ L_RECEIPTDATE STRING,
+ L_SHIPINSTRUCT STRING,
+ L_SHIPMODE STRING,
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@aa
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE aa
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@aa
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE aa
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@aa
+PREHOOK: query: CREATE INDEX aa_lshipdate_idx ON TABLE aa(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)")
+PREHOOK: type: CREATEINDEX
+PREHOOK: Input: default@aa
+POSTHOOK: query: CREATE INDEX aa_lshipdate_idx ON TABLE aa(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)")
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Input: default@aa
+POSTHOOK: Output: default@default__aa_aa_lshipdate_idx__
+PREHOOK: query: ALTER INDEX aa_lshipdate_idx ON aa REBUILD
+PREHOOK: type: ALTERINDEX_REBUILD
+PREHOOK: Input: default@aa
+PREHOOK: Output: default@default__aa_aa_lshipdate_idx__
+POSTHOOK: query: ALTER INDEX aa_lshipdate_idx ON aa REBUILD
+POSTHOOK: type: ALTERINDEX_REBUILD
+POSTHOOK: Input: default@aa
+POSTHOOK: Output: default@default__aa_aa_lshipdate_idx__
+POSTHOOK: Lineage: default__aa_aa_lshipdate_idx__._bucketname SIMPLE [(aa)aa.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__aa_aa_lshipdate_idx__._count_of_l_shipdate EXPRESSION [(aa)aa.FieldSchema(name:l_shipdate, type:string, comment:null), ]
+POSTHOOK: Lineage: default__aa_aa_lshipdate_idx__._offsets EXPRESSION [(aa)aa.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__aa_aa_lshipdate_idx__.l_shipdate SIMPLE [(aa)aa.FieldSchema(name:l_shipdate, type:string, comment:null), ]
+PREHOOK: query: show tables
+PREHOOK: type: SHOWTABLES
+PREHOOK: Input: database:default
+POSTHOOK: query: show tables
+POSTHOOK: type: SHOWTABLES
+POSTHOOK: Input: database:default
+aa
+alltypesorc
+cbo_t1
+cbo_t2
+cbo_t3
+default__aa_aa_lshipdate_idx__
+lineitem
+part
+src
+src1
+src_cbo
+src_json
+src_sequencefile
+src_thrift
+srcbucket
+srcbucket2
+srcpart
+PREHOOK: query: explain select l_shipdate, count(l_shipdate)
+from aa
+group by l_shipdate
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select l_shipdate, count(l_shipdate)
+from aa
+group by l_shipdate
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: aa
+ Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: l_shipdate (type: string)
+ outputColumnNames: l_shipdate
+ Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(l_shipdate)
+ keys: l_shipdate (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
[39/50] [abbrv] hive git commit: HIVE-10328 : Loop optimization for
SIMD in IfExprColumnColumn.txt (Teddy Choi via Ashutosh Chauhan)
Posted by xu...@apache.org.
HIVE-10328 : Loop optimization for SIMD in IfExprColumnColumn.txt (Teddy Choi via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b98a60df
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b98a60df
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b98a60df
Branch: refs/heads/beeline-cli
Commit: b98a60df2f3779acf82d94965d11ed951b618fad
Parents: 6e8eeb7
Author: Teddy Choi <tc...@hortonworks.com>
Authored: Tue Aug 11 16:26:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Sep 23 10:45:02 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ant/GenVectorCode.java | 31 ----
.../vectorization/VectorizationBench.java | 32 +++-
.../ExpressionTemplates/IfExprColumnColumn.txt | 186 -------------------
.../IfExprDoubleColumnDoubleColumn.java | 167 +++++++++++++++++
.../expressions/IfExprLongColumnLongColumn.java | 166 +++++++++++++++++
.../hive/ql/udf/generic/GenericUDFIf.java | 4 +-
.../exec/vector/TestVectorizationContext.java | 4 +-
.../TestVectorConditionalExpressions.java | 3 +-
8 files changed, 369 insertions(+), 224 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
----------------------------------------------------------------------
diff --git a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
index fede273..ba7648c 100644
--- a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
+++ b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
@@ -788,8 +788,6 @@ public class GenVectorCode extends Task {
// IF conditional expression
// fileHeader, resultType, arg2Type, arg3Type
- {"IfExprColumnColumn", "long"},
- {"IfExprColumnColumn", "double"},
{"IfExprColumnScalar", "long", "long"},
{"IfExprColumnScalar", "double", "long"},
{"IfExprColumnScalar", "long", "double"},
@@ -1051,8 +1049,6 @@ public class GenVectorCode extends Task {
generateFilterStringGroupColumnCompareStringGroupColumn(tdesc);
} else if (tdesc[0].equals("StringGroupColumnCompareStringGroupColumn")) {
generateStringGroupColumnCompareStringGroupColumn(tdesc);
- } else if (tdesc[0].equals("IfExprColumnColumn")) {
- generateIfExprColumnColumn(tdesc);
} else if (tdesc[0].equals("IfExprColumnScalar")) {
generateIfExprColumnScalar(tdesc);
} else if (tdesc[0].equals("IfExprScalarColumn")) {
@@ -1644,33 +1640,6 @@ public class GenVectorCode extends Task {
className, templateString);
}
- private void generateIfExprColumnColumn(String[] tdesc) throws Exception {
- String operandType = tdesc[1];
- String inputColumnVectorType = this.getColumnVectorType(operandType);
- String outputColumnVectorType = inputColumnVectorType;
- String returnType = operandType;
- String className = "IfExpr" + getCamelCaseType(operandType) + "Column"
- + getCamelCaseType(operandType) + "Column";
- String outputFile = joinPath(this.expressionOutputDirectory, className + ".java");
- File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt"));
- String templateString = readFile(templateFile);
- // Expand, and write result
- templateString = templateString.replaceAll("<ClassName>", className);
- templateString = templateString.replaceAll("<InputColumnVectorType>", inputColumnVectorType);
- templateString = templateString.replaceAll("<OperandType>", operandType);
- String vectorExprArgType = operandType;
-
- // Toss in timestamp and date.
- if (operandType.equals("long")) {
- // Let comparisons occur for DATE and TIMESTAMP, too.
- vectorExprArgType = "int_datetime_interval_family";
- }
- templateString = templateString.replaceAll("<VectorExprArgType>", vectorExprArgType);
-
- writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory,
- className, templateString);
- }
-
private void generateIfExprColumnScalar(String[] tdesc) throws Exception {
String operandType2 = tdesc[1];
String operandType3 = tdesc[2];
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
index 0e880c6..dcd9501 100644
--- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
@@ -17,6 +17,8 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn;
@@ -40,6 +42,7 @@ import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
+import java.lang.Override;
import java.util.Random;
import java.util.concurrent.TimeUnit;
@@ -302,9 +305,36 @@ public class VectorizationBench {
}
}
+ public static class IfExprLongColumnLongColumnBench extends AbstractExpression {
+ @Override
+ public void setup() {
+ rowBatch = buildRowBatch(new LongColumnVector(), 3, getBooleanLongColumnVector(),
+ getLongColumnVector(), getLongColumnVector());
+ expression = new IfExprLongColumnLongColumn(0, 1, 2, 3);
+ }
+ }
+
+ public static class IfExprRepeatingLongColumnLongColumnBench extends AbstractExpression {
+ @Override
+ public void setup() {
+ rowBatch = buildRowBatch(new LongColumnVector(), 3, getBooleanLongColumnVector(),
+ getRepeatingLongColumnVector(), getLongColumnVector());
+ expression = new IfExprLongColumnLongColumn(0, 1, 2, 3);
+ }
+ }
+
+ public static class IfExprLongColumnRepeatingLongColumnBench extends AbstractExpression {
+ @Override
+ public void setup() {
+ rowBatch = buildRowBatch(new LongColumnVector(), 3, getBooleanLongColumnVector(),
+ getLongColumnVector(), getRepeatingLongColumnVector());
+ expression = new IfExprLongColumnLongColumn(0, 1, 2, 3);
+ }
+ }
+
public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder().include(".*" + VectorizationBench.class.getSimpleName() +
".*").build();
new Runner(opt).run();
}
-}
\ No newline at end of file
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt
deleted file mode 100644
index 27d769c..0000000
--- a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt
+++ /dev/null
@@ -1,186 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
-
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-
-/**
- * Compute IF(expr1, expr2, expr3) for 3 input column expressions.
- * The first is always a boolean (LongColumnVector).
- * The second and third are long columns or long expression results.
- */
-public class <ClassName> extends VectorExpression {
-
- private static final long serialVersionUID = 1L;
-
- private int arg1Column, arg2Column, arg3Column;
- private int outputColumn;
-
- public <ClassName>(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
- this.arg1Column = arg1Column;
- this.arg2Column = arg2Column;
- this.arg3Column = arg3Column;
- this.outputColumn = outputColumn;
- }
-
- public <ClassName>() {
- }
-
- @Override
- public void evaluate(VectorizedRowBatch batch) {
-
- if (childExpressions != null) {
- super.evaluateChildren(batch);
- }
-
- LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
- <InputColumnVectorType> arg2ColVector = (<InputColumnVectorType>) batch.cols[arg2Column];
- <InputColumnVectorType> arg3ColVector = (<InputColumnVectorType>) batch.cols[arg3Column];
- <InputColumnVectorType> outputColVector = (<InputColumnVectorType>) batch.cols[outputColumn];
- int[] sel = batch.selected;
- boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
- outputColVector.isRepeating = false; // may override later
- int n = batch.size;
- long[] vector1 = arg1ColVector.vector;
- <OperandType>[] vector2 = arg2ColVector.vector;
- <OperandType>[] vector3 = arg3ColVector.vector;
- <OperandType>[] outputVector = outputColVector.vector;
-
- // return immediately if batch is empty
- if (n == 0) {
- return;
- }
-
- /* All the code paths below propagate nulls even if neither arg2 nor arg3
- * have nulls. This is to reduce the number of code paths and shorten the
- * code, at the expense of maybe doing unnecessary work if neither input
- * has nulls. This could be improved in the future by expanding the number
- * of code paths.
- */
- if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
- arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
- } else {
- arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
- }
- return;
- }
-
- // extend any repeating values and noNulls indicator in the inputs
- arg2ColVector.flatten(batch.selectedInUse, sel, n);
- arg3ColVector.flatten(batch.selectedInUse, sel, n);
-
- if (arg1ColVector.noNulls) {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
- outputIsNull[i] = (vector1[i] == 1 ?
- arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
- outputIsNull[i] = (vector1[i] == 1 ?
- arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
- }
- }
- } else /* there are nulls */ {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
- vector2[i] : vector3[i]);
- outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
- arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
- vector2[i] : vector3[i]);
- outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
- arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
- }
- }
- }
-
- // restore repeating and no nulls indicators
- arg2ColVector.unFlatten();
- arg3ColVector.unFlatten();
- }
-
- @Override
- public int getOutputColumn() {
- return outputColumn;
- }
-
- @Override
- public String getOutputType() {
- return "<OperandType>";
- }
-
- public int getArg1Column() {
- return arg1Column;
- }
-
- public void setArg1Column(int colNum) {
- this.arg1Column = colNum;
- }
-
- public int getArg2Column() {
- return arg2Column;
- }
-
- public void setArg2Column(int colNum) {
- this.arg2Column = colNum;
- }
-
- public int getArg3Column() {
- return arg3Column;
- }
-
- public void setArg3Column(int colNum) {
- this.arg3Column = colNum;
- }
-
- public void setOutputColumn(int outputColumn) {
- this.outputColumn = outputColumn;
- }
-
- @Override
- public VectorExpressionDescriptor.Descriptor getDescriptor() {
- return (new VectorExpressionDescriptor.Builder())
- .setMode(
- VectorExpressionDescriptor.Mode.PROJECTION)
- .setNumArguments(3)
- .setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("long"),
- VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType>"),
- VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType>"))
- .setInputExpressionTypes(
- VectorExpressionDescriptor.InputExpressionType.COLUMN,
- VectorExpressionDescriptor.InputExpressionType.COLUMN,
- VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
new file mode 100644
index 0000000..71c99f6
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
@@ -0,0 +1,167 @@
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Compute IF(expr1, expr2, expr3) for 3 input column expressions.
+ * The first is always a boolean (LongColumnVector).
+ * The second and third are long columns or long expression results.
+ */
+public class IfExprDoubleColumnDoubleColumn extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private int arg1Column, arg2Column, arg3Column;
+ private int outputColumn;
+
+ public IfExprDoubleColumnDoubleColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
+ this.outputColumn = outputColumn;
+ }
+
+ public IfExprDoubleColumnDoubleColumn() {
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
+ DoubleColumnVector arg2ColVector = (DoubleColumnVector) batch.cols[arg2Column];
+ DoubleColumnVector arg3ColVector = (DoubleColumnVector) batch.cols[arg3Column];
+ DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
+ outputColVector.isRepeating = false; // may override later
+ int n = batch.size;
+ long[] vector1 = arg1ColVector.vector;
+ double[] vector2 = arg2ColVector.vector;
+ double[] vector3 = arg3ColVector.vector;
+ double[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ /* All the code paths below propagate nulls even if neither arg2 nor arg3
+ * have nulls. This is to reduce the number of code paths and shorten the
+ * code, at the expense of maybe doing unnecessary work if neither input
+ * has nulls. This could be improved in the future by expanding the number
+ * of code paths.
+ */
+ if (arg1ColVector.isRepeating) {
+ if (vector1[0] == 1) {
+ arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ } else {
+ arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ }
+ return;
+ }
+
+ // extend any repeating values and noNulls indicator in the inputs
+ arg2ColVector.flatten(batch.selectedInUse, sel, n);
+ arg3ColVector.flatten(batch.selectedInUse, sel, n);
+
+ if (arg1ColVector.noNulls) {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
+ outputIsNull[i] = (vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
+ outputIsNull[i] = (vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ }
+ } else /* there are nulls */ {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ vector2[i] : vector3[i]);
+ outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ vector2[i] : vector3[i]);
+ outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ }
+ }
+
+ // restore repeating and no nulls indicators
+ arg2ColVector.unFlatten();
+ arg3ColVector.unFlatten();
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "double";
+ }
+
+ public int getArg1Column() {
+ return arg1Column;
+ }
+
+ public void setArg1Column(int colNum) {
+ this.arg1Column = colNum;
+ }
+
+ public int getArg2Column() {
+ return arg2Column;
+ }
+
+ public void setArg2Column(int colNum) {
+ this.arg2Column = colNum;
+ }
+
+ public int getArg3Column() {
+ return arg3Column;
+ }
+
+ public void setArg3Column(int colNum) {
+ this.arg3Column = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(3)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("long"),
+ VectorExpressionDescriptor.ArgumentType.getType("double"),
+ VectorExpressionDescriptor.ArgumentType.getType("double"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
new file mode 100644
index 0000000..00485a2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
@@ -0,0 +1,166 @@
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Compute IF(expr1, expr2, expr3) for 3 input column expressions.
+ * The first is always a boolean (LongColumnVector).
+ * The second and third are long columns or long expression results.
+ */
+public class IfExprLongColumnLongColumn extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private int arg1Column, arg2Column, arg3Column;
+ private int outputColumn;
+
+ public IfExprLongColumnLongColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
+ this.outputColumn = outputColumn;
+ }
+
+ public IfExprLongColumnLongColumn() {
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
+ LongColumnVector arg2ColVector = (LongColumnVector) batch.cols[arg2Column];
+ LongColumnVector arg3ColVector = (LongColumnVector) batch.cols[arg3Column];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
+ outputColVector.isRepeating = false; // may override later
+ int n = batch.size;
+ long[] vector1 = arg1ColVector.vector;
+ long[] vector2 = arg2ColVector.vector;
+ long[] vector3 = arg3ColVector.vector;
+ long[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ /* All the code paths below propagate nulls even if neither arg2 nor arg3
+ * have nulls. This is to reduce the number of code paths and shorten the
+ * code, at the expense of maybe doing unnecessary work if neither input
+ * has nulls. This could be improved in the future by expanding the number
+ * of code paths.
+ */
+ if (arg1ColVector.isRepeating) {
+ if (vector1[0] == 1) {
+ arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ } else {
+ arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ }
+ return;
+ }
+
+ // extend any repeating values and noNulls indicator in the inputs
+ arg2ColVector.flatten(batch.selectedInUse, sel, n);
+ arg3ColVector.flatten(batch.selectedInUse, sel, n);
+
+ if (arg1ColVector.noNulls) {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = (~(vector1[i] - 1) & vector2[i]) | ((vector1[i] - 1) & vector3[i]);
+ outputIsNull[i] = (vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = (~(vector1[i] - 1) & vector2[i]) | ((vector1[i] - 1) & vector3[i]);
+ outputIsNull[i] = (vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ }
+ } else /* there are nulls */ {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ vector2[i] : vector3[i]);
+ outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ vector2[i] : vector3[i]);
+ outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ }
+ }
+
+ // restore repeating and no nulls indicators
+ arg2ColVector.unFlatten();
+ arg3ColVector.unFlatten();
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "long";
+ }
+
+ public int getArg1Column() {
+ return arg1Column;
+ }
+
+ public void setArg1Column(int colNum) {
+ this.arg1Column = colNum;
+ }
+
+ public int getArg2Column() {
+ return arg2Column;
+ }
+
+ public void setArg2Column(int colNum) {
+ this.arg2Column = colNum;
+ }
+
+ public int getArg3Column() {
+ return arg3Column;
+ }
+
+ public void setArg3Column(int colNum) {
+ this.arg3Column = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(3)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("long"),
+ VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"),
+ VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
index 568fd46..b5e2837 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
@@ -28,8 +28,6 @@ import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnLongScalar;
@@ -42,6 +40,8 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLon
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnCharScalar;
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
index 8470c47..704c654 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
@@ -42,6 +42,8 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleTo
import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringGroupColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnCharScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar;
@@ -68,11 +70,9 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar;
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java
index a711b55..47ebe57 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java
@@ -24,8 +24,7 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar;
[21/50] [abbrv] hive git commit: HIVE-11889: Add unit test for
HIVE-11449 (Wei Zheng via Jason Dere)
Posted by xu...@apache.org.
HIVE-11889: Add unit test for HIVE-11449 (Wei Zheng via Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4ff5b258
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4ff5b258
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4ff5b258
Branch: refs/heads/beeline-cli
Commit: 4ff5b258c93a1996f320cab19c58465a2aab38bc
Parents: 262bae6
Author: Jason Dere <jd...@hortonworks.com>
Authored: Mon Sep 21 10:45:37 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Mon Sep 21 10:45:37 2015 -0700
----------------------------------------------------------------------
.../persistence/TestBytesBytesMultiHashMap.java | 3 ++
.../ql/exec/persistence/TestHashPartition.java | 29 ++++++++++++++++++++
2 files changed, 32 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4ff5b258/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
index c417b6f..aed9214 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
@@ -43,6 +43,9 @@ public class TestBytesBytesMultiHashMap {
assertEquals(CAPACITY, map.getCapacity());
map = new BytesBytesMultiHashMap(9, LOAD_FACTOR, WB_SIZE);
assertEquals(16, map.getCapacity());
+
+ // Verify the scenario when maxProbeSize is a very small value, it doesn't fail
+ BytesBytesMultiHashMap map1 = new BytesBytesMultiHashMap(1024, (float) 0.75, 524288, 1);
}
@Test
http://git-wip-us.apache.org/repos/asf/hive/blob/4ff5b258/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestHashPartition.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestHashPartition.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestHashPartition.java
new file mode 100644
index 0000000..a6e52bd
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestHashPartition.java
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.persistence;
+
+import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition;
+import org.junit.Test;
+
+public class TestHashPartition {
+
+ @Test
+ public void testHashPartition() throws Exception {
+ HashPartition hashPartition = new HashPartition(1024, (float) 0.75, 524288, 1, true);
+ }
+}
[36/50] [abbrv] hive git commit: HIVE-11468: Vectorize Struct IN()
clauses (Matt McCline, via Gopal V)
Posted by xu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig
new file mode 100644
index 0000000..0d4c1d8
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig
@@ -0,0 +1,1744 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.Stack;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.ql.exec.*;
+import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
+import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
+import org.apache.hadoop.hive.ql.exec.tez.TezTask;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker;
+import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.lib.TaskGraphWalker;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
+import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
+import org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc;
+import org.apache.hadoop.hive.ql.plan.SparkWork;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.UDFAcos;
+import org.apache.hadoop.hive.ql.udf.UDFAsin;
+import org.apache.hadoop.hive.ql.udf.UDFAtan;
+import org.apache.hadoop.hive.ql.udf.UDFBin;
+import org.apache.hadoop.hive.ql.udf.UDFConv;
+import org.apache.hadoop.hive.ql.udf.UDFCos;
+import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
+import org.apache.hadoop.hive.ql.udf.UDFDegrees;
+import org.apache.hadoop.hive.ql.udf.UDFExp;
+import org.apache.hadoop.hive.ql.udf.UDFHex;
+import org.apache.hadoop.hive.ql.udf.UDFHour;
+import org.apache.hadoop.hive.ql.udf.UDFLength;
+import org.apache.hadoop.hive.ql.udf.UDFLike;
+import org.apache.hadoop.hive.ql.udf.UDFLn;
+import org.apache.hadoop.hive.ql.udf.UDFLog;
+import org.apache.hadoop.hive.ql.udf.UDFLog10;
+import org.apache.hadoop.hive.ql.udf.UDFLog2;
+import org.apache.hadoop.hive.ql.udf.UDFMinute;
+import org.apache.hadoop.hive.ql.udf.UDFMonth;
+import org.apache.hadoop.hive.ql.udf.UDFRadians;
+import org.apache.hadoop.hive.ql.udf.UDFRand;
+import org.apache.hadoop.hive.ql.udf.UDFSecond;
+import org.apache.hadoop.hive.ql.udf.UDFSign;
+import org.apache.hadoop.hive.ql.udf.UDFSin;
+import org.apache.hadoop.hive.ql.udf.UDFSqrt;
+import org.apache.hadoop.hive.ql.udf.UDFSubstr;
+import org.apache.hadoop.hive.ql.udf.UDFTan;
+import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
+import org.apache.hadoop.hive.ql.udf.UDFToByte;
+import org.apache.hadoop.hive.ql.udf.UDFToDouble;
+import org.apache.hadoop.hive.ql.udf.UDFToFloat;
+import org.apache.hadoop.hive.ql.udf.UDFToInteger;
+import org.apache.hadoop.hive.ql.udf.UDFToLong;
+import org.apache.hadoop.hive.ql.udf.UDFToShort;
+import org.apache.hadoop.hive.ql.udf.UDFToString;
+import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
+import org.apache.hadoop.hive.ql.udf.UDFYear;
+import org.apache.hadoop.hive.ql.udf.generic.*;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+public class Vectorizer implements PhysicalPlanResolver {
+
+ protected static transient final Log LOG = LogFactory.getLog(Vectorizer.class);
+
+ Pattern supportedDataTypesPattern;
+ List<Task<? extends Serializable>> vectorizableTasks =
+ new ArrayList<Task<? extends Serializable>>();
+ Set<Class<?>> supportedGenericUDFs = new HashSet<Class<?>>();
+
+ Set<String> supportedAggregationUdfs = new HashSet<String>();
+
+ private HiveConf hiveConf;
+
+ public Vectorizer() {
+
+ StringBuilder patternBuilder = new StringBuilder();
+ patternBuilder.append("int");
+ patternBuilder.append("|smallint");
+ patternBuilder.append("|tinyint");
+ patternBuilder.append("|bigint");
+ patternBuilder.append("|integer");
+ patternBuilder.append("|long");
+ patternBuilder.append("|short");
+ patternBuilder.append("|timestamp");
+ patternBuilder.append("|" + serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME);
+ patternBuilder.append("|" + serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME);
+ patternBuilder.append("|boolean");
+ patternBuilder.append("|binary");
+ patternBuilder.append("|string");
+ patternBuilder.append("|byte");
+ patternBuilder.append("|float");
+ patternBuilder.append("|double");
+ patternBuilder.append("|date");
+ patternBuilder.append("|void");
+
+ // Decimal types can be specified with different precision and scales e.g. decimal(10,5),
+ // as opposed to other data types which can be represented by constant strings.
+ // The regex matches only the "decimal" prefix of the type.
+ patternBuilder.append("|decimal.*");
+
+ // CHAR and VARCHAR types can be specified with maximum length.
+ patternBuilder.append("|char.*");
+ patternBuilder.append("|varchar.*");
+
+ supportedDataTypesPattern = Pattern.compile(patternBuilder.toString());
+
+ supportedGenericUDFs.add(GenericUDFOPPlus.class);
+ supportedGenericUDFs.add(GenericUDFOPMinus.class);
+ supportedGenericUDFs.add(GenericUDFOPMultiply.class);
+ supportedGenericUDFs.add(GenericUDFOPDivide.class);
+ supportedGenericUDFs.add(GenericUDFOPMod.class);
+ supportedGenericUDFs.add(GenericUDFOPNegative.class);
+ supportedGenericUDFs.add(GenericUDFOPPositive.class);
+
+ supportedGenericUDFs.add(GenericUDFOPEqualOrLessThan.class);
+ supportedGenericUDFs.add(GenericUDFOPEqualOrGreaterThan.class);
+ supportedGenericUDFs.add(GenericUDFOPGreaterThan.class);
+ supportedGenericUDFs.add(GenericUDFOPLessThan.class);
+ supportedGenericUDFs.add(GenericUDFOPNot.class);
+ supportedGenericUDFs.add(GenericUDFOPNotEqual.class);
+ supportedGenericUDFs.add(GenericUDFOPNotNull.class);
+ supportedGenericUDFs.add(GenericUDFOPNull.class);
+ supportedGenericUDFs.add(GenericUDFOPOr.class);
+ supportedGenericUDFs.add(GenericUDFOPAnd.class);
+ supportedGenericUDFs.add(GenericUDFOPEqual.class);
+ supportedGenericUDFs.add(UDFLength.class);
+
+ supportedGenericUDFs.add(UDFYear.class);
+ supportedGenericUDFs.add(UDFMonth.class);
+ supportedGenericUDFs.add(UDFDayOfMonth.class);
+ supportedGenericUDFs.add(UDFHour.class);
+ supportedGenericUDFs.add(UDFMinute.class);
+ supportedGenericUDFs.add(UDFSecond.class);
+ supportedGenericUDFs.add(UDFWeekOfYear.class);
+ supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class);
+
+ supportedGenericUDFs.add(GenericUDFDateAdd.class);
+ supportedGenericUDFs.add(GenericUDFDateSub.class);
+ supportedGenericUDFs.add(GenericUDFDate.class);
+ supportedGenericUDFs.add(GenericUDFDateDiff.class);
+
+ supportedGenericUDFs.add(UDFLike.class);
+ supportedGenericUDFs.add(GenericUDFRegExp.class);
+ supportedGenericUDFs.add(UDFSubstr.class);
+ supportedGenericUDFs.add(GenericUDFLTrim.class);
+ supportedGenericUDFs.add(GenericUDFRTrim.class);
+ supportedGenericUDFs.add(GenericUDFTrim.class);
+
+ supportedGenericUDFs.add(UDFSin.class);
+ supportedGenericUDFs.add(UDFCos.class);
+ supportedGenericUDFs.add(UDFTan.class);
+ supportedGenericUDFs.add(UDFAsin.class);
+ supportedGenericUDFs.add(UDFAcos.class);
+ supportedGenericUDFs.add(UDFAtan.class);
+ supportedGenericUDFs.add(UDFDegrees.class);
+ supportedGenericUDFs.add(UDFRadians.class);
+ supportedGenericUDFs.add(GenericUDFFloor.class);
+ supportedGenericUDFs.add(GenericUDFCeil.class);
+ supportedGenericUDFs.add(UDFExp.class);
+ supportedGenericUDFs.add(UDFLn.class);
+ supportedGenericUDFs.add(UDFLog2.class);
+ supportedGenericUDFs.add(UDFLog10.class);
+ supportedGenericUDFs.add(UDFLog.class);
+ supportedGenericUDFs.add(GenericUDFPower.class);
+ supportedGenericUDFs.add(GenericUDFRound.class);
+ supportedGenericUDFs.add(GenericUDFBRound.class);
+ supportedGenericUDFs.add(GenericUDFPosMod.class);
+ supportedGenericUDFs.add(UDFSqrt.class);
+ supportedGenericUDFs.add(UDFSign.class);
+ supportedGenericUDFs.add(UDFRand.class);
+ supportedGenericUDFs.add(UDFBin.class);
+ supportedGenericUDFs.add(UDFHex.class);
+ supportedGenericUDFs.add(UDFConv.class);
+
+ supportedGenericUDFs.add(GenericUDFLower.class);
+ supportedGenericUDFs.add(GenericUDFUpper.class);
+ supportedGenericUDFs.add(GenericUDFConcat.class);
+ supportedGenericUDFs.add(GenericUDFAbs.class);
+ supportedGenericUDFs.add(GenericUDFBetween.class);
+ supportedGenericUDFs.add(GenericUDFIn.class);
+ supportedGenericUDFs.add(GenericUDFCase.class);
+ supportedGenericUDFs.add(GenericUDFWhen.class);
+ supportedGenericUDFs.add(GenericUDFCoalesce.class);
+ supportedGenericUDFs.add(GenericUDFElt.class);
+ supportedGenericUDFs.add(GenericUDFInitCap.class);
+
+ // For type casts
+ supportedGenericUDFs.add(UDFToLong.class);
+ supportedGenericUDFs.add(UDFToInteger.class);
+ supportedGenericUDFs.add(UDFToShort.class);
+ supportedGenericUDFs.add(UDFToByte.class);
+ supportedGenericUDFs.add(UDFToBoolean.class);
+ supportedGenericUDFs.add(UDFToFloat.class);
+ supportedGenericUDFs.add(UDFToDouble.class);
+ supportedGenericUDFs.add(UDFToString.class);
+ supportedGenericUDFs.add(GenericUDFTimestamp.class);
+ supportedGenericUDFs.add(GenericUDFToDecimal.class);
+ supportedGenericUDFs.add(GenericUDFToDate.class);
+ supportedGenericUDFs.add(GenericUDFToChar.class);
+ supportedGenericUDFs.add(GenericUDFToVarchar.class);
+ supportedGenericUDFs.add(GenericUDFToIntervalYearMonth.class);
+ supportedGenericUDFs.add(GenericUDFToIntervalDayTime.class);
+
+ // For conditional expressions
+ supportedGenericUDFs.add(GenericUDFIf.class);
+
+ supportedAggregationUdfs.add("min");
+ supportedAggregationUdfs.add("max");
+ supportedAggregationUdfs.add("count");
+ supportedAggregationUdfs.add("sum");
+ supportedAggregationUdfs.add("avg");
+ supportedAggregationUdfs.add("variance");
+ supportedAggregationUdfs.add("var_pop");
+ supportedAggregationUdfs.add("var_samp");
+ supportedAggregationUdfs.add("std");
+ supportedAggregationUdfs.add("stddev");
+ supportedAggregationUdfs.add("stddev_pop");
+ supportedAggregationUdfs.add("stddev_samp");
+ }
+
+ class VectorizationDispatcher implements Dispatcher {
+
+ private List<String> reduceColumnNames;
+ private List<TypeInfo> reduceTypeInfos;
+
+ public VectorizationDispatcher(PhysicalContext physicalContext) {
+ reduceColumnNames = null;
+ reduceTypeInfos = null;
+ }
+
+ @Override
+ public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs)
+ throws SemanticException {
+ Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
+ if (currTask instanceof MapRedTask) {
+ convertMapWork(((MapRedTask) currTask).getWork().getMapWork(), false);
+ } else if (currTask instanceof TezTask) {
+ TezWork work = ((TezTask) currTask).getWork();
+ for (BaseWork w: work.getAllWork()) {
+ if (w instanceof MapWork) {
+ convertMapWork((MapWork) w, true);
+ } else if (w instanceof ReduceWork) {
+ // We are only vectorizing Reduce under Tez.
+ if (HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) {
+ convertReduceWork((ReduceWork) w, true);
+ }
+ }
+ }
+ } else if (currTask instanceof SparkTask) {
+ SparkWork sparkWork = (SparkWork) currTask.getWork();
+ for (BaseWork baseWork : sparkWork.getAllWork()) {
+ if (baseWork instanceof MapWork) {
+ convertMapWork((MapWork) baseWork, false);
+ } else if (baseWork instanceof ReduceWork
+ && HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) {
+ convertReduceWork((ReduceWork) baseWork, false);
+ }
+ }
+ }
+ return null;
+ }
+
+ private void convertMapWork(MapWork mapWork, boolean isTez) throws SemanticException {
+ boolean ret = validateMapWork(mapWork, isTez);
+ if (ret) {
+ vectorizeMapWork(mapWork, isTez);
+ }
+ }
+
+ private void addMapWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
+ opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*"
+ + FileSinkOperator.getOperatorName()), np);
+ opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
+ + ReduceSinkOperator.getOperatorName()), np);
+ }
+
+ private boolean validateMapWork(MapWork mapWork, boolean isTez) throws SemanticException {
+ LOG.info("Validating MapWork...");
+
+ // Eliminate MR plans with more than one TableScanOperator.
+ LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = mapWork.getAliasToWork();
+ if ((aliasToWork == null) || (aliasToWork.size() == 0)) {
+ return false;
+ }
+ int tableScanCount = 0;
+ for (Operator<?> op : aliasToWork.values()) {
+ if (op == null) {
+ LOG.warn("Map work has invalid aliases to work with. Fail validation!");
+ return false;
+ }
+ if (op instanceof TableScanOperator) {
+ tableScanCount++;
+ }
+ }
+ if (tableScanCount > 1) {
+ LOG.warn("Map work has more than 1 TableScanOperator aliases to work with. Fail validation!");
+ return false;
+ }
+
+ // Validate the input format
+ for (String path : mapWork.getPathToPartitionInfo().keySet()) {
+ PartitionDesc pd = mapWork.getPathToPartitionInfo().get(path);
+ List<Class<?>> interfaceList =
+ Arrays.asList(pd.getInputFileFormatClass().getInterfaces());
+ if (!interfaceList.contains(VectorizedInputFormatInterface.class)) {
+ LOG.info("Input format: " + pd.getInputFileFormatClassName()
+ + ", doesn't provide vectorized input");
+ return false;
+ }
+ }
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTez);
+ addMapWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+
+ // iterator the mapper operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(mapWork.getAliasToWork().values());
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+ for (Node n : nodeOutput.keySet()) {
+ if (nodeOutput.get(n) != null) {
+ if (!((Boolean)nodeOutput.get(n)).booleanValue()) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private void vectorizeMapWork(MapWork mapWork, boolean isTez) throws SemanticException {
+ LOG.info("Vectorizing MapWork...");
+ mapWork.setVectorMode(true);
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ MapWorkVectorizationNodeProcessor vnp = new MapWorkVectorizationNodeProcessor(mapWork, isTez);
+ addMapWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new PreOrderOnceWalker(disp);
+ // iterator the mapper operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(mapWork.getAliasToWork().values());
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+
+ mapWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap());
+ mapWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap());
+ mapWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap());
+
+ if (LOG.isDebugEnabled()) {
+ debugDisplayAllMaps(mapWork);
+ }
+
+ return;
+ }
+
+ private void convertReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException {
+ boolean ret = validateReduceWork(reduceWork);
+ if (ret) {
+ vectorizeReduceWork(reduceWork, isTez);
+ }
+ }
+
+ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws SemanticException {
+ try {
+ // Check key ObjectInspector.
+ ObjectInspector keyObjectInspector = reduceWork.getKeyObjectInspector();
+ if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector;
+ List<? extends StructField> keyFields = keyStructObjectInspector.getAllStructFieldRefs();
+
+ // Tez doesn't use tagging...
+ if (reduceWork.getNeedsTagging()) {
+ return false;
+ }
+
+ // Check value ObjectInspector.
+ ObjectInspector valueObjectInspector = reduceWork.getValueObjectInspector();
+ if (valueObjectInspector == null ||
+ !(valueObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector;
+ List<? extends StructField> valueFields = valueStructObjectInspector.getAllStructFieldRefs();
+
+ reduceColumnNames = new ArrayList<String>();
+ reduceTypeInfos = new ArrayList<TypeInfo>();
+
+ for (StructField field: keyFields) {
+ reduceColumnNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName());
+ reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName()));
+ }
+ for (StructField field: valueFields) {
+ reduceColumnNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName());
+ reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName()));
+ }
+ } catch (Exception e) {
+ throw new SemanticException(e);
+ }
+ return true;
+ }
+
+ private void addReduceWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
+ opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + ".*"), np);
+ opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + ".*"), np);
+ }
+
+ private boolean validateReduceWork(ReduceWork reduceWork) throws SemanticException {
+ LOG.info("Validating ReduceWork...");
+
+ // Validate input to ReduceWork.
+ if (!getOnlyStructObjectInspectors(reduceWork)) {
+ return false;
+ }
+ // Now check the reduce operator tree.
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ ReduceWorkValidationNodeProcessor vnp = new ReduceWorkValidationNodeProcessor();
+ addReduceWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+ // iterator the reduce operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.add(reduceWork.getReducer());
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+ for (Node n : nodeOutput.keySet()) {
+ if (nodeOutput.get(n) != null) {
+ if (!((Boolean)nodeOutput.get(n)).booleanValue()) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private void vectorizeReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException {
+ LOG.info("Vectorizing ReduceWork...");
+ reduceWork.setVectorMode(true);
+
+ // For some reason, the DefaultGraphWalker does not descend down from the reducer Operator as
+ // expected. We need to descend down, otherwise it breaks our algorithm that determines
+ // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker.
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ ReduceWorkVectorizationNodeProcessor vnp =
+ new ReduceWorkVectorizationNodeProcessor(reduceColumnNames, reduceTypeInfos, isTez);
+ addReduceWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new PreOrderWalker(disp);
+ // iterator the reduce operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.add(reduceWork.getReducer());
+ LOG.info("vectorizeReduceWork reducer Operator: " +
+ reduceWork.getReducer().getName() + "...");
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+
+ // Necessary since we are vectorizing the root operator in reduce.
+ reduceWork.setReducer(vnp.getRootVectorOp());
+
+ reduceWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap());
+ reduceWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap());
+ reduceWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap());
+
+ if (LOG.isDebugEnabled()) {
+ debugDisplayAllMaps(reduceWork);
+ }
+ }
+ }
+
+ class MapWorkValidationNodeProcessor implements NodeProcessor {
+
+ private final MapWork mapWork;
+ private final boolean isTez;
+
+ public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) {
+ this.mapWork = mapWork;
+ this.isTez = isTez;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ for (Node n : stack) {
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n;
+ if (nonVectorizableChildOfGroupBy(op)) {
+ return new Boolean(true);
+ }
+ boolean ret = validateMapWorkOperator(op, mapWork, isTez);
+ if (!ret) {
+ LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized.");
+ return new Boolean(false);
+ }
+ }
+ return new Boolean(true);
+ }
+ }
+
+ class ReduceWorkValidationNodeProcessor implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ for (Node n : stack) {
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n;
+ if (nonVectorizableChildOfGroupBy(op)) {
+ return new Boolean(true);
+ }
+ boolean ret = validateReduceWorkOperator(op);
+ if (!ret) {
+ LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized.");
+ return new Boolean(false);
+ }
+ }
+ return new Boolean(true);
+ }
+ }
+
+ // This class has common code used by both MapWorkVectorizationNodeProcessor and
+ // ReduceWorkVectorizationNodeProcessor.
+ class VectorizationNodeProcessor implements NodeProcessor {
+
+ // The vectorization context for the Map or Reduce task.
+ protected VectorizationContext taskVectorizationContext;
+
+ // The input projection column type name map for the Map or Reduce task.
+ protected Map<Integer, String> taskColumnTypeNameMap;
+
+ VectorizationNodeProcessor() {
+ taskColumnTypeNameMap = new HashMap<Integer, String>();
+ }
+
+ public Map<String, Integer> getVectorColumnNameMap() {
+ return taskVectorizationContext.getProjectionColumnMap();
+ }
+
+ public Map<Integer, String> getVectorColumnTypeMap() {
+ return taskColumnTypeNameMap;
+ }
+
+ public Map<Integer, String> getVectorScratchColumnTypeMap() {
+ return taskVectorizationContext.getScratchColumnTypeMap();
+ }
+
+ protected final Set<Operator<? extends OperatorDesc>> opsDone =
+ new HashSet<Operator<? extends OperatorDesc>>();
+
+ protected final Map<Operator<? extends OperatorDesc>, Operator<? extends OperatorDesc>> opToVectorOpMap =
+ new HashMap<Operator<? extends OperatorDesc>, Operator<? extends OperatorDesc>>();
+
+ public VectorizationContext walkStackToFindVectorizationContext(Stack<Node> stack,
+ Operator<? extends OperatorDesc> op) throws SemanticException {
+ VectorizationContext vContext = null;
+ if (stack.size() <= 1) {
+ throw new SemanticException(
+ String.format("Expected operator stack for operator %s to have at least 2 operators",
+ op.getName()));
+ }
+ // Walk down the stack of operators until we found one willing to give us a context.
+ // At the bottom will be the root operator, guaranteed to have a context
+ int i= stack.size()-2;
+ while (vContext == null) {
+ if (i < 0) {
+ return null;
+ }
+ Operator<? extends OperatorDesc> opParent = (Operator<? extends OperatorDesc>) stack.get(i);
+ Operator<? extends OperatorDesc> vectorOpParent = opToVectorOpMap.get(opParent);
+ if (vectorOpParent != null) {
+ if (vectorOpParent instanceof VectorizationContextRegion) {
+ VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOpParent;
+ vContext = vcRegion.getOuputVectorizationContext();
+ LOG.info("walkStackToFindVectorizationContext " + vectorOpParent.getName() + " has new vectorization context " + vContext.toString());
+ } else {
+ LOG.info("walkStackToFindVectorizationContext " + vectorOpParent.getName() + " does not have new vectorization context");
+ }
+ } else {
+ LOG.info("walkStackToFindVectorizationContext " + opParent.getName() + " is not vectorized");
+ }
+ --i;
+ }
+ return vContext;
+ }
+
+ public Operator<? extends OperatorDesc> doVectorize(Operator<? extends OperatorDesc> op,
+ VectorizationContext vContext, boolean isTez) throws SemanticException {
+ Operator<? extends OperatorDesc> vectorOp = op;
+ try {
+ if (!opsDone.contains(op)) {
+ vectorOp = vectorizeOperator(op, vContext, isTez);
+ opsDone.add(op);
+ if (vectorOp != op) {
+ opToVectorOpMap.put(op, vectorOp);
+ opsDone.add(vectorOp);
+ }
+ }
+ } catch (HiveException e) {
+ throw new SemanticException(e);
+ }
+ return vectorOp;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ throw new SemanticException("Must be overridden");
+ }
+ }
+
+ class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
+
+ private final boolean isTez;
+
+ public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez) {
+ super();
+ this.isTez = isTez;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+
+ VectorizationContext vContext = null;
+
+ if (op instanceof TableScanOperator) {
+ if (taskVectorizationContext == null) {
+ taskVectorizationContext = getVectorizationContext(op.getSchema(), op.getName(),
+ taskColumnTypeNameMap);
+ }
+ vContext = taskVectorizationContext;
+ } else {
+ LOG.info("MapWorkVectorizationNodeProcessor process going to walk the operator stack to get vectorization context for " + op.getName());
+ vContext = walkStackToFindVectorizationContext(stack, op);
+ if (vContext == null) {
+ // No operator has "pushed" a new context -- so use the task vectorization context.
+ vContext = taskVectorizationContext;
+ }
+ }
+
+ assert vContext != null;
+ LOG.info("MapWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString());
+
+ // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't
+ // vectorize the operators below it.
+ if (nonVectorizableChildOfGroupBy(op)) {
+ // No need to vectorize
+ if (!opsDone.contains(op)) {
+ opsDone.add(op);
+ }
+ return null;
+ }
+
+ Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext, isTez);
+
+ if (LOG.isDebugEnabled()) {
+ if (vectorOp instanceof VectorizationContextRegion) {
+ VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
+ VectorizationContext vNewContext = vcRegion.getOuputVectorizationContext();
+ LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString());
+ }
+ }
+
+ return null;
+ }
+ }
+
+ class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
+
+ private final List<String> reduceColumnNames;
+ private final List<TypeInfo> reduceTypeInfos;
+
+ private final boolean isTez;
+
+ private Operator<? extends OperatorDesc> rootVectorOp;
+
+ public Operator<? extends OperatorDesc> getRootVectorOp() {
+ return rootVectorOp;
+ }
+
+ public ReduceWorkVectorizationNodeProcessor(List<String> reduceColumnNames,
+ List<TypeInfo> reduceTypeInfos, boolean isTez) {
+ super();
+ this.reduceColumnNames = reduceColumnNames;
+ this.reduceTypeInfos = reduceTypeInfos;
+ rootVectorOp = null;
+ this.isTez = isTez;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+
+ VectorizationContext vContext = null;
+
+ boolean saveRootVectorOp = false;
+
+ if (op.getParentOperators().size() == 0) {
+ LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + reduceColumnNames.toString());
+
+ vContext = new VectorizationContext("__Reduce_Shuffle__", reduceColumnNames);
+ taskVectorizationContext = vContext;
+ int i = 0;
+ for (TypeInfo typeInfo : reduceTypeInfos) {
+ taskColumnTypeNameMap.put(i, typeInfo.getTypeName());
+ i++;
+ }
+ saveRootVectorOp = true;
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context " + vContext.toString());
+ }
+ } else {
+ LOG.info("ReduceWorkVectorizationNodeProcessor process going to walk the operator stack to get vectorization context for " + op.getName());
+ vContext = walkStackToFindVectorizationContext(stack, op);
+ if (vContext == null) {
+ // If we didn't find a context among the operators, assume the top -- reduce shuffle's
+ // vectorization context.
+ vContext = taskVectorizationContext;
+ }
+ }
+
+ assert vContext != null;
+ LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString());
+
+ // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't
+ // vectorize the operators below it.
+ if (nonVectorizableChildOfGroupBy(op)) {
+ // No need to vectorize
+ if (!opsDone.contains(op)) {
+ opsDone.add(op);
+ }
+ return null;
+ }
+
+ Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext, isTez);
+
+ if (LOG.isDebugEnabled()) {
+ if (vectorOp instanceof VectorizationContextRegion) {
+ VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
+ VectorizationContext vNewContext = vcRegion.getOuputVectorizationContext();
+ LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString());
+ }
+ }
+ if (saveRootVectorOp && op != vectorOp) {
+ rootVectorOp = vectorOp;
+ }
+
+ return null;
+ }
+ }
+
+ private static class ValidatorVectorizationContext extends VectorizationContext {
+ private ValidatorVectorizationContext() {
+ super("No Name");
+ }
+
+ @Override
+ protected int getInputColumnIndex(String name) {
+ return 0;
+ }
+
+ @Override
+ protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) {
+ return 0;
+ }
+ }
+
+ @Override
+ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticException {
+ hiveConf = physicalContext.getConf();
+
+ boolean vectorPath = HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
+ if (!vectorPath) {
+ LOG.info("Vectorization is disabled");
+ return physicalContext;
+ }
+ // create dispatcher and graph walker
+ Dispatcher disp = new VectorizationDispatcher(physicalContext);
+ TaskGraphWalker ogw = new TaskGraphWalker(disp);
+
+ // get all the tasks nodes from root task
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(physicalContext.getRootTasks());
+
+ // begin to walk through the task tree.
+ ogw.startWalking(topNodes, null);
+ return physicalContext;
+ }
+
+ boolean validateMapWorkOperator(Operator<? extends OperatorDesc> op, MapWork mWork, boolean isTez) {
+ boolean ret = false;
+ switch (op.getType()) {
+ case MAPJOIN:
+ if (op instanceof MapJoinOperator) {
+ ret = validateMapJoinOperator((MapJoinOperator) op);
+ } else if (op instanceof SMBMapJoinOperator) {
+ ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op);
+ }
+ break;
+ case GROUPBY:
+ ret = validateGroupByOperator((GroupByOperator) op, false, isTez);
+ break;
+ case FILTER:
+ ret = validateFilterOperator((FilterOperator) op);
+ break;
+ case SELECT:
+ ret = validateSelectOperator((SelectOperator) op);
+ break;
+ case REDUCESINK:
+ ret = validateReduceSinkOperator((ReduceSinkOperator) op);
+ break;
+ case TABLESCAN:
+ ret = validateTableScanOperator((TableScanOperator) op, mWork);
+ break;
+ case FILESINK:
+ case LIMIT:
+ case EVENT:
+ case SPARKPRUNINGSINK:
+ ret = true;
+ break;
+ case HASHTABLESINK:
+ ret = op instanceof SparkHashTableSinkOperator &&
+ validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op);
+ break;
+ default:
+ ret = false;
+ break;
+ }
+ return ret;
+ }
+
+ boolean validateReduceWorkOperator(Operator<? extends OperatorDesc> op) {
+ boolean ret = false;
+ switch (op.getType()) {
+ case MAPJOIN:
+ // Does MAPJOIN actually get planned in Reduce?
+ if (op instanceof MapJoinOperator) {
+ ret = validateMapJoinOperator((MapJoinOperator) op);
+ } else if (op instanceof SMBMapJoinOperator) {
+ ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op);
+ }
+ break;
+ case GROUPBY:
+ if (HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) {
+ ret = validateGroupByOperator((GroupByOperator) op, true, true);
+ } else {
+ ret = false;
+ }
+ break;
+ case FILTER:
+ ret = validateFilterOperator((FilterOperator) op);
+ break;
+ case SELECT:
+ ret = validateSelectOperator((SelectOperator) op);
+ break;
+ case REDUCESINK:
+ ret = validateReduceSinkOperator((ReduceSinkOperator) op);
+ break;
+ case FILESINK:
+ ret = validateFileSinkOperator((FileSinkOperator) op);
+ break;
+ case LIMIT:
+ case EVENT:
+ case SPARKPRUNINGSINK:
+ ret = true;
+ break;
+ case HASHTABLESINK:
+ ret = op instanceof SparkHashTableSinkOperator &&
+ validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op);
+ break;
+ default:
+ ret = false;
+ break;
+ }
+ return ret;
+ }
+
+ public Boolean nonVectorizableChildOfGroupBy(Operator<? extends OperatorDesc> op) {
+ Operator<? extends OperatorDesc> currentOp = op;
+ while (currentOp.getParentOperators().size() > 0) {
+ currentOp = currentOp.getParentOperators().get(0);
+ if (currentOp.getType().equals(OperatorType.GROUPBY)) {
+ GroupByDesc desc = (GroupByDesc)currentOp.getConf();
+ boolean isVectorOutput = desc.getVectorDesc().isVectorOutput();
+ if (isVectorOutput) {
+ // This GROUP BY does vectorize its output.
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) {
+ SMBJoinDesc desc = op.getConf();
+ // Validation is the same as for map join, since the 'small' tables are not vectorized
+ return validateMapJoinDesc(desc);
+ }
+
+ private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) {
+ TableScanDesc desc = op.getConf();
+ if (desc.isGatherStats()) {
+ return false;
+ }
+
+ String columns = "";
+ String types = "";
+ String partitionColumns = "";
+ String partitionTypes = "";
+ boolean haveInfo = false;
+
+ // This over-reaches slightly, since we can have > 1 table-scan per map-work.
+ // It needs path to partition, path to alias, then check the alias == the same table-scan, to be accurate.
+ // That said, that is a TODO item to be fixed when we support >1 TableScans per vectorized pipeline later.
+ LinkedHashMap<String, PartitionDesc> partitionDescs = mWork.getPathToPartitionInfo();
+
+ // For vectorization, compare each partition information for against the others.
+ // We assume the table information will be from one of the partitions, so it will
+ // work to focus on the partition information and not compare against the TableScanOperator
+ // columns (in the VectorizationContext)....
+ for (Map.Entry<String, PartitionDesc> entry : partitionDescs.entrySet()) {
+ PartitionDesc partDesc = entry.getValue();
+ if (partDesc.getPartSpec() == null || partDesc.getPartSpec().isEmpty()) {
+ // No partition information -- we match because we would default to using the table description.
+ continue;
+ }
+ Properties partProps = partDesc.getProperties();
+ if (!haveInfo) {
+ columns = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS);
+ types = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES);
+ partitionColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
+ partitionTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
+ haveInfo = true;
+ } else {
+ String nextColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS);
+ String nextTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES);
+ String nextPartitionColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
+ String nextPartitionTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
+ if (!columns.equalsIgnoreCase(nextColumns)) {
+ LOG.info(
+ String.format("Could not vectorize partition %s. Its column names %s do not match the other column names %s",
+ entry.getKey(), nextColumns, columns));
+ return false;
+ }
+ if (!types.equalsIgnoreCase(nextTypes)) {
+ LOG.info(
+ String.format("Could not vectorize partition %s. Its column types %s do not match the other column types %s",
+ entry.getKey(), nextTypes, types));
+ return false;
+ }
+ if (!partitionColumns.equalsIgnoreCase(nextPartitionColumns)) {
+ LOG.info(
+ String.format("Could not vectorize partition %s. Its partition column names %s do not match the other partition column names %s",
+ entry.getKey(), nextPartitionColumns, partitionColumns));
+ return false;
+ }
+ if (!partitionTypes.equalsIgnoreCase(nextPartitionTypes)) {
+ LOG.info(
+ String.format("Could not vectorize partition %s. Its partition column types %s do not match the other partition column types %s",
+ entry.getKey(), nextPartitionTypes, partitionTypes));
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private boolean validateMapJoinOperator(MapJoinOperator op) {
+ MapJoinDesc desc = op.getConf();
+ return validateMapJoinDesc(desc);
+ }
+
+ private boolean validateMapJoinDesc(MapJoinDesc desc) {
+ byte posBigTable = (byte) desc.getPosBigTable();
+ List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
+ if (!validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER)) {
+ LOG.info("Cannot vectorize map work filter expression");
+ return false;
+ }
+ List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable);
+ if (!validateExprNodeDesc(keyExprs)) {
+ LOG.info("Cannot vectorize map work key expression");
+ return false;
+ }
+ List<ExprNodeDesc> valueExprs = desc.getExprs().get(posBigTable);
+ if (!validateExprNodeDesc(valueExprs)) {
+ LOG.info("Cannot vectorize map work value expression");
+ return false;
+ }
+ return true;
+ }
+
+ private boolean validateSparkHashTableSinkOperator(SparkHashTableSinkOperator op) {
+ SparkHashTableSinkDesc desc = op.getConf();
+ byte tag = desc.getTag();
+ // it's essentially a MapJoinDesc
+ List<ExprNodeDesc> filterExprs = desc.getFilters().get(tag);
+ List<ExprNodeDesc> keyExprs = desc.getKeys().get(tag);
+ List<ExprNodeDesc> valueExprs = desc.getExprs().get(tag);
+ return validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER) &&
+ validateExprNodeDesc(keyExprs) && validateExprNodeDesc(valueExprs);
+ }
+
+ private boolean validateReduceSinkOperator(ReduceSinkOperator op) {
+ List<ExprNodeDesc> keyDescs = op.getConf().getKeyCols();
+ List<ExprNodeDesc> partitionDescs = op.getConf().getPartitionCols();
+ List<ExprNodeDesc> valueDesc = op.getConf().getValueCols();
+ return validateExprNodeDesc(keyDescs) && validateExprNodeDesc(partitionDescs) &&
+ validateExprNodeDesc(valueDesc);
+ }
+
+ private boolean validateSelectOperator(SelectOperator op) {
+ List<ExprNodeDesc> descList = op.getConf().getColList();
+ for (ExprNodeDesc desc : descList) {
+ boolean ret = validateExprNodeDesc(desc);
+ if (!ret) {
+ LOG.info("Cannot vectorize select expression: " + desc.toString());
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private boolean validateFilterOperator(FilterOperator op) {
+ ExprNodeDesc desc = op.getConf().getPredicate();
+ return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.FILTER);
+ }
+
+ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTez) {
+ GroupByDesc desc = op.getConf();
+ VectorGroupByDesc vectorDesc = desc.getVectorDesc();
+
+ if (desc.isGroupingSetsPresent()) {
+ LOG.info("Grouping sets not supported in vector mode");
+ return false;
+ }
+ if (desc.pruneGroupingSetId()) {
+ LOG.info("Pruning grouping set id not supported in vector mode");
+ return false;
+ }
+ boolean ret = validateExprNodeDesc(desc.getKeys());
+ if (!ret) {
+ LOG.info("Cannot vectorize groupby key expression");
+ return false;
+ }
+
+ if (!isReduce) {
+
+ // MapWork
+
+ ret = validateHashAggregationDesc(desc.getAggregators());
+ if (!ret) {
+ return false;
+ }
+ } else {
+
+ // ReduceWork
+
+ boolean isComplete = desc.getMode() == GroupByDesc.Mode.COMPLETE;
+ if (desc.getMode() != GroupByDesc.Mode.HASH) {
+
+ // Reduce Merge-Partial GROUP BY.
+
+ // A merge-partial GROUP BY is fed by grouping by keys from reduce-shuffle. It is the
+ // first (or root) operator for its reduce task.
+ // TODO: Technically, we should also handle FINAL, PARTIAL1, PARTIAL2 and PARTIALS
+ // that are not hash or complete, but aren't merge-partial, somehow.
+
+ if (desc.isDistinct()) {
+ LOG.info("Vectorized Reduce MergePartial GROUP BY does not support DISTINCT");
+ return false;
+ }
+
+ boolean hasKeys = (desc.getKeys().size() > 0);
+
+ // Do we support merge-partial aggregation AND the output is primitive?
+ ret = validateReduceMergePartialAggregationDesc(desc.getAggregators(), hasKeys);
+ if (!ret) {
+ return false;
+ }
+
+ if (hasKeys) {
+ if (op.getParentOperators().size() > 0 && !isComplete) {
+ LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle a key group when it is fed by reduce-shuffle");
+ return false;
+ }
+
+ LOG.info("Vectorized Reduce MergePartial GROUP BY will process key groups");
+
+ // Primitive output validation above means we can output VectorizedRowBatch to the
+ // children operators.
+ vectorDesc.setVectorOutput(true);
+ } else {
+ LOG.info("Vectorized Reduce MergePartial GROUP BY will do global aggregation");
+ }
+ if (!isComplete) {
+ vectorDesc.setIsReduceMergePartial(true);
+ } else {
+ vectorDesc.setIsReduceStreaming(true);
+ }
+ } else {
+
+ // Reduce Hash GROUP BY or global aggregation.
+
+ ret = validateHashAggregationDesc(desc.getAggregators());
+ if (!ret) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ private boolean validateFileSinkOperator(FileSinkOperator op) {
+ return true;
+ }
+
+ private boolean validateExprNodeDesc(List<ExprNodeDesc> descs) {
+ return validateExprNodeDesc(descs, VectorExpressionDescriptor.Mode.PROJECTION);
+ }
+
+ private boolean validateExprNodeDesc(List<ExprNodeDesc> descs,
+ VectorExpressionDescriptor.Mode mode) {
+ for (ExprNodeDesc d : descs) {
+ boolean ret = validateExprNodeDesc(d, mode);
+ if (!ret) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+ private boolean validateHashAggregationDesc(List<AggregationDesc> descs) {
+ return validateAggregationDesc(descs, /* isReduceMergePartial */ false, false);
+ }
+
+ private boolean validateReduceMergePartialAggregationDesc(List<AggregationDesc> descs, boolean hasKeys) {
+ return validateAggregationDesc(descs, /* isReduceMergePartial */ true, hasKeys);
+ }
+
+ private boolean validateAggregationDesc(List<AggregationDesc> descs, boolean isReduceMergePartial, boolean hasKeys) {
+ for (AggregationDesc d : descs) {
+ boolean ret = validateAggregationDesc(d, isReduceMergePartial, hasKeys);
+ if (!ret) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) {
+ if (desc instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
+ // Currently, we do not support vectorized virtual columns (see HIVE-5570).
+ if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) {
+ LOG.info("Cannot vectorize virtual column " + c.getColumn());
+ return false;
+ }
+ }
+ String typeName = desc.getTypeInfo().getTypeName();
+ boolean ret = validateDataType(typeName, mode);
+ if (!ret) {
+ LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
+ return false;
+ }
+ if (desc instanceof ExprNodeGenericFuncDesc) {
+ ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
+ boolean r = validateGenericUdf(d);
+ if (!r) {
+ LOG.info("Cannot vectorize UDF " + d);
+ return false;
+ }
+ }
+ if (desc.getChildren() != null) {
+ for (ExprNodeDesc d: desc.getChildren()) {
+ // Don't restrict child expressions for projection. Always use looser FILTER mode.
+ boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
+ if (!r) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private boolean validateExprNodeDesc(ExprNodeDesc desc) {
+ return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.PROJECTION);
+ }
+
+ boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) {
+ if (!validateExprNodeDescRecursive(desc, mode)) {
+ return false;
+ }
+ try {
+ VectorizationContext vc = new ValidatorVectorizationContext();
+ if (vc.getVectorExpression(desc, mode) == null) {
+ // TODO: this cannot happen - VectorizationContext throws in such cases.
+ LOG.info("getVectorExpression returned null");
+ return false;
+ }
+ } catch (Exception e) {
+ LOG.info("Failed to vectorize", e);
+ return false;
+ }
+ return true;
+ }
+
+ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) {
+ if (VectorizationContext.isCustomUDF(genericUDFExpr)) {
+ return true;
+ }
+ GenericUDF genericUDF = genericUDFExpr.getGenericUDF();
+ if (genericUDF instanceof GenericUDFBridge) {
+ Class<? extends UDF> udf = ((GenericUDFBridge) genericUDF).getUdfClass();
+ return supportedGenericUDFs.contains(udf);
+ } else {
+ return supportedGenericUDFs.contains(genericUDF.getClass());
+ }
+ }
+
+ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorAggrExpr) {
+ ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector();
+ return (outputObjInspector.getCategory() == ObjectInspector.Category.PRIMITIVE);
+ }
+
+ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduceMergePartial,
+ boolean hasKeys) {
+
+ String udfName = aggDesc.getGenericUDAFName().toLowerCase();
+ if (!supportedAggregationUdfs.contains(udfName)) {
+ LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported");
+ return false;
+ }
+ if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) {
+ LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported");
+ return false;
+ }
+
+ // See if we can vectorize the aggregation.
+ VectorizationContext vc = new ValidatorVectorizationContext();
+ VectorAggregateExpression vectorAggrExpr;
+ try {
+ vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduceMergePartial);
+ } catch (Exception e) {
+ // We should have already attempted to vectorize in validateAggregationDesc.
+ LOG.info("Vectorization of aggreation should have succeeded ", e);
+ return false;
+ }
+
+ if (isReduceMergePartial && hasKeys && !validateAggregationIsPrimitive(vectorAggrExpr)) {
+ LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types");
+ return false;
+ }
+
+ return true;
+ }
+
+ private boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode) {
+ type = type.toLowerCase();
+ boolean result = supportedDataTypesPattern.matcher(type).matches();
+ if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) {
+ return false;
+ }
+ return result;
+ }
+
+ private VectorizationContext getVectorizationContext(RowSchema rowSchema, String contextName,
+ Map<Integer, String> typeNameMap) {
+
+ VectorizationContext vContext = new VectorizationContext(contextName);
+
+ // Add all non-virtual columns to make a vectorization context for
+ // the TableScan operator.
+ int i = 0;
+ for (ColumnInfo c : rowSchema.getSignature()) {
+ // Earlier, validation code should have eliminated virtual columns usage (HIVE-5560).
+ if (!isVirtualColumn(c)) {
+ vContext.addInitialColumn(c.getInternalName());
+ typeNameMap.put(i, c.getTypeName());
+ i++;
+ }
+ }
+ vContext.finishedAddingInitialColumns();
+
+ return vContext;
+ }
+
+ private void fixupParentChildOperators(Operator<? extends OperatorDesc> op,
+ Operator<? extends OperatorDesc> vectorOp) {
+ if (op.getParentOperators() != null) {
+ vectorOp.setParentOperators(op.getParentOperators());
+ for (Operator<? extends OperatorDesc> p : op.getParentOperators()) {
+ p.replaceChild(op, vectorOp);
+ }
+ }
+ if (op.getChildOperators() != null) {
+ vectorOp.setChildOperators(op.getChildOperators());
+ for (Operator<? extends OperatorDesc> c : op.getChildOperators()) {
+ c.replaceParent(op, vectorOp);
+ }
+ }
+ }
+
+ private boolean isBigTableOnlyResults(MapJoinDesc desc) {
+ Byte[] order = desc.getTagOrder();
+ byte posBigTable = (byte) desc.getPosBigTable();
+ Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
+
+ int[] smallTableIndices;
+ int smallTableIndicesSize;
+ if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) {
+ smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable);
+ LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices " + Arrays.toString(smallTableIndices));
+ smallTableIndicesSize = smallTableIndices.length;
+ } else {
+ smallTableIndices = null;
+ LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices EMPTY");
+ smallTableIndicesSize = 0;
+ }
+
+ List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable);
+ LOG.info("Vectorizer isBigTableOnlyResults smallTableRetainList " + smallTableRetainList);
+ int smallTableRetainSize = smallTableRetainList.size();
+
+ if (smallTableIndicesSize > 0) {
+ // Small table indices has priority over retain.
+ for (int i = 0; i < smallTableIndicesSize; i++) {
+ if (smallTableIndices[i] < 0) {
+ // Negative numbers indicate a column to be (deserialize) read from the small table's
+ // LazyBinary value row.
+ LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices[i] < 0 returning false");
+ return false;
+ }
+ }
+ } else if (smallTableRetainSize > 0) {
+ LOG.info("Vectorizer isBigTableOnlyResults smallTableRetainSize > 0 returning false");
+ return false;
+ }
+
+ LOG.info("Vectorizer isBigTableOnlyResults returning true");
+ return true;
+ }
+
+ Operator<? extends OperatorDesc> specializeMapJoinOperator(Operator<? extends OperatorDesc> op,
+ VectorizationContext vContext, MapJoinDesc desc) throws HiveException {
+ Operator<? extends OperatorDesc> vectorOp = null;
+ Class<? extends Operator<?>> opClass = null;
+
+ VectorMapJoinDesc.HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE;
+ VectorMapJoinDesc.HashTableKind hashTableKind = HashTableKind.NONE;
+ VectorMapJoinDesc.HashTableKeyType hashTableKeyType = HashTableKeyType.NONE;
+
+ if (HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) {
+ hashTableImplementationType = HashTableImplementationType.FAST;
+ } else {
+ // Restrict to using BytesBytesMultiHashMap via MapJoinBytesTableContainer or
+ // HybridHashTableContainer.
+ hashTableImplementationType = HashTableImplementationType.OPTIMIZED;
+ }
+
+ int joinType = desc.getConds()[0].getType();
+
+ boolean isInnerBigOnly = false;
+ if (joinType == JoinDesc.INNER_JOIN && isBigTableOnlyResults(desc)) {
+ isInnerBigOnly = true;
+ }
+
+ // By default, we can always use the multi-key class.
+ hashTableKeyType = HashTableKeyType.MULTI_KEY;
+
+ if (!HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED)) {
+
+ // Look for single column optimization.
+ byte posBigTable = (byte) desc.getPosBigTable();
+ Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
+ List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
+ if (bigTableKeyExprs.size() == 1) {
+ String typeName = bigTableKeyExprs.get(0).getTypeString();
+ LOG.info("Vectorizer vectorizeOperator map join typeName " + typeName);
+ if (typeName.equals("boolean")) {
+ hashTableKeyType = HashTableKeyType.BOOLEAN;
+ } else if (typeName.equals("tinyint")) {
+ hashTableKeyType = HashTableKeyType.BYTE;
+ } else if (typeName.equals("smallint")) {
+ hashTableKeyType = HashTableKeyType.SHORT;
+ } else if (typeName.equals("int")) {
+ hashTableKeyType = HashTableKeyType.INT;
+ } else if (typeName.equals("bigint") || typeName.equals("long")) {
+ hashTableKeyType = HashTableKeyType.LONG;
+ } else if (VectorizationContext.isStringFamily(typeName)) {
+ hashTableKeyType = HashTableKeyType.STRING;
+ }
+ }
+ }
+
+ switch (joinType) {
+ case JoinDesc.INNER_JOIN:
+ if (!isInnerBigOnly) {
+ hashTableKind = HashTableKind.HASH_MAP;
+ } else {
+ hashTableKind = HashTableKind.HASH_MULTISET;
+ }
+ break;
+ case JoinDesc.LEFT_OUTER_JOIN:
+ case JoinDesc.RIGHT_OUTER_JOIN:
+ hashTableKind = HashTableKind.HASH_MAP;
+ break;
+ case JoinDesc.LEFT_SEMI_JOIN:
+ hashTableKind = HashTableKind.HASH_SET;
+ break;
+ default:
+ throw new HiveException("Unknown join type " + joinType);
+ }
+
+ LOG.info("Vectorizer vectorizeOperator map join hashTableKind " + hashTableKind.name() + " hashTableKeyType " + hashTableKeyType.name());
+
+ switch (hashTableKeyType) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ switch (joinType) {
+ case JoinDesc.INNER_JOIN:
+ if (!isInnerBigOnly) {
+ opClass = VectorMapJoinInnerLongOperator.class;
+ } else {
+ opClass = VectorMapJoinInnerBigOnlyLongOperator.class;
+ }
+ break;
+ case JoinDesc.LEFT_OUTER_JOIN:
+ case JoinDesc.RIGHT_OUTER_JOIN:
+ opClass = VectorMapJoinOuterLongOperator.class;
+ break;
+ case JoinDesc.LEFT_SEMI_JOIN:
+ opClass = VectorMapJoinLeftSemiLongOperator.class;
+ break;
+ default:
+ throw new HiveException("Unknown join type " + joinType);
+ }
+ break;
+ case STRING:
+ switch (joinType) {
+ case JoinDesc.INNER_JOIN:
+ if (!isInnerBigOnly) {
+ opClass = VectorMapJoinInnerStringOperator.class;
+ } else {
+ opClass = VectorMapJoinInnerBigOnlyStringOperator.class;
+ }
+ break;
+ case JoinDesc.LEFT_OUTER_JOIN:
+ case JoinDesc.RIGHT_OUTER_JOIN:
+ opClass = VectorMapJoinOuterStringOperator.class;
+ break;
+ case JoinDesc.LEFT_SEMI_JOIN:
+ opClass = VectorMapJoinLeftSemiStringOperator.class;
+ break;
+ default:
+ throw new HiveException("Unknown join type " + joinType);
+ }
+ break;
+ case MULTI_KEY:
+ switch (joinType) {
+ case JoinDesc.INNER_JOIN:
+ if (!isInnerBigOnly) {
+ opClass = VectorMapJoinInnerMultiKeyOperator.class;
+ } else {
+ opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class;
+ }
+ break;
+ case JoinDesc.LEFT_OUTER_JOIN:
+ case JoinDesc.RIGHT_OUTER_JOIN:
+ opClass = VectorMapJoinOuterMultiKeyOperator.class;
+ break;
+ case JoinDesc.LEFT_SEMI_JOIN:
+ opClass = VectorMapJoinLeftSemiMultiKeyOperator.class;
+ break;
+ default:
+ throw new HiveException("Unknown join type " + joinType);
+ }
+ break;
+ }
+
+ vectorOp = OperatorFactory.getVectorOperator(opClass, op.getConf(), vContext);
+ LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName());
+
+ boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED);
+
+ VectorMapJoinDesc vectorDesc = desc.getVectorDesc();
+ vectorDesc.setHashTableImplementationType(hashTableImplementationType);
+ vectorDesc.setHashTableKind(hashTableKind);
+ vectorDesc.setHashTableKeyType(hashTableKeyType);
+ vectorDesc.setMinMaxEnabled(minMaxEnabled);
+ return vectorOp;
+ }
+
+ private boolean onExpressionHasNullSafes(MapJoinDesc desc) {
+ boolean[] nullSafes = desc.getNullSafes();
+ for (boolean nullSafe : nullSafes) {
+ if (nullSafe) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean canSpecializeMapJoin(Operator<? extends OperatorDesc> op, MapJoinDesc desc,
+ boolean isTez) {
+
+ boolean specialize = false;
+
+ if (op instanceof MapJoinOperator &&
+ HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED)) {
+
+ // Currently, only under Tez and non-N-way joins.
+ if (isTez && desc.getConds().length == 1 && !onExpressionHasNullSafes(desc)) {
+
+ // Ok, all basic restrictions satisfied so far...
+ specialize = true;
+
+ if (!HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) {
+
+ // We are using the optimized hash table we have further
+ // restrictions (using optimized and key type).
+
+ if (!HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE)) {
+ specialize = false;
+ } else {
+ byte posBigTable = (byte) desc.getPosBigTable();
+ Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
+ List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
+ for (ExprNodeDesc exprNodeDesc : bigTableKeyExprs) {
+ String typeName = exprNodeDesc.getTypeString();
+ if (!MapJoinKey.isSupportedField(typeName)) {
+ specialize = false;
+ break;
+ }
+ }
+ }
+ } else {
+
+ // With the fast hash table implementation, we currently do not support
+ // Hybrid Grace Hash Join.
+
+ if (HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)) {
+ specialize = false;
+ }
+ }
+ }
+ }
+ return specialize;
+ }
+
+ Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op,
+ VectorizationContext vContext, boolean isTez) throws HiveException {
+ Operator<? extends OperatorDesc> vectorOp = null;
+
+ switch (op.getType()) {
+ case MAPJOIN:
+ {
+ MapJoinDesc desc = (MapJoinDesc) op.getConf();
+ boolean specialize = canSpecializeMapJoin(op, desc, isTez);
+
+ if (!specialize) {
+
+ Class<? extends Operator<?>> opClass = null;
+ if (op instanceof MapJoinOperator) {
+
+ // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
+
+ List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
+ boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
+ if (!isOuterAndFiltered) {
+ opClass = VectorMapJoinOperator.class;
+ } else {
+ opClass = VectorMapJoinOuterFilteredOperator.class;
+ }
+ } else if (op instanceof SMBMapJoinOperator) {
+ opClass = VectorSMBMapJoinOperator.class;
+ }
+
+ vectorOp = OperatorFactory.getVectorOperator(opClass, op.getConf(), vContext);
+
+ } else {
+
+ // TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
+ // HiveConf.setBoolVar(physicalContext.getConf(),
+ // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
+
+ vectorOp = specializeMapJoinOperator(op, vContext, desc);
+ }
+ }
+ break;
+ case GROUPBY:
+ case FILTER:
+ case SELECT:
+ case FILESINK:
+ case REDUCESINK:
+ case LIMIT:
+ case EXTRACT:
+ case EVENT:
+ case HASHTABLESINK:
+ vectorOp = OperatorFactory.getVectorOperator(op.getConf(), vContext);
+ break;
+ default:
+ vectorOp = op;
+ break;
+ }
+
+ LOG.info("vectorizeOperator " + (vectorOp == null ? "NULL" : vectorOp.getClass().getName()));
+ LOG.info("vectorizeOperator " + (vectorOp == null || vectorOp.getConf() == null ? "NULL" : vectorOp.getConf().getClass().getName()));
+
+ if (vectorOp != op) {
+ fixupParentChildOperators(op, vectorOp);
+ ((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
+ }
+ return vectorOp;
+ }
+
+ private boolean isVirtualColumn(ColumnInfo column) {
+
+ // Not using method column.getIsVirtualCol() because partitioning columns are also
+ // treated as virtual columns in ColumnInfo.
+ if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) {
+ return true;
+ }
+ return false;
+ }
+
+ public void debugDisplayAllMaps(BaseWork work) {
+
+ Map<String, Integer> columnNameMap = work.getVectorColumnNameMap();
+ Map<Integer, String> columnTypeMap = work.getVectorColumnTypeMap();
+ Map<Integer, String> scratchColumnTypeMap = work.getVectorScratchColumnTypeMap();
+
+ LOG.debug("debugDisplayAllMaps columnNameMap " + columnNameMap.toString());
+ LOG.debug("debugDisplayAllMaps columnTypeMap " + columnTypeMap.toString());
+ LOG.debug("debugDisplayAllMaps scratchColumnTypeMap " + scratchColumnTypeMap.toString());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
new file mode 100644
index 0000000..5a10b58
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
@@ -0,0 +1,86 @@
+***************
+*** 1255,1272 ****
+ LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
+ return false;
+ }
+ if (desc instanceof ExprNodeGenericFuncDesc) {
+ ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
+ boolean r = validateGenericUdf(d);
+ if (!r) {
+ return false;
+ }
+ }
+ if (desc.getChildren() != null) {
+- for (ExprNodeDesc d: desc.getChildren()) {
+- // Don't restrict child expressions for projection. Always use looser FILTER mode.
+- boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
+- if (!r) {
+ return false;
+ }
+ }
+--- 1265,1329 ----
+ LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
+ return false;
+ }
++ boolean isInExpression = false;
+ if (desc instanceof ExprNodeGenericFuncDesc) {
+ ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
+ boolean r = validateGenericUdf(d);
+ if (!r) {
+ return false;
+ }
++ GenericUDF genericUDF = d.getGenericUDF();
++ isInExpression = (genericUDF instanceof GenericUDFIn);
+ }
+ if (desc.getChildren() != null) {
++ if (isInExpression &&
++ desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
++ boolean r = validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER);
++ } else {
++ for (ExprNodeDesc d: desc.getChildren()) {
++ // Don't restrict child expressions for projection. Always use looser FILTER mode.
++ boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
++ if (!r) {
++ return false;
++ }
++ }
++ }
++ }
++ return true;
++ }
++
++ private boolean validateStructInExpression(ExprNodeDesc desc,
++ VectorExpressionDescriptor.Mode mode) {
++
++ for (ExprNodeDesc d: desc.getChildren()) {
++ TypeInfo typeInfo = d.getTypeInfo();
++ if (typeInfo.getCategory() != Category.STRUCT){
++ return false;
++ }
++ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
++
++ ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
++ ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
++ final int fieldCount = fieldTypeInfos.size();
++ for (int f = 0; f < fieldCount; f++) {
++ TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
++ Category category = fieldTypeInfo.getCategory();
++ if (category != Category.PRIMITIVE){
++ LOG.info("Cannot vectorize struct field " + fieldNames.get(f) +
++ " of type " + fieldTypeInfo.getTypeName());
++ return false;
++ }
++ PrimitiveTypeInfo fieldPrimitiveTypeInfo = (PrimitiveTypeInfo) fieldTypeInfo;
++ InConstantType inConstantType =
++ VectorizationContext.getInConstantTypeFromPrimitiveCategory(
++ fieldPrimitiveTypeInfo.getPrimitiveCategory());
++
++ // For now, limit the data types we support for Vectorized Struct IN().
++ if (inConstantType != InConstantType.INT_FAMILY &&
++ inConstantType != InConstantType.FLOAT_FAMILY &&
++ inConstantType != InConstantType.STRING_FAMILY) {
++ LOG.info("Cannot vectorize struct field " + fieldNames.get(f) +
++ " of type " + fieldTypeInfo.getTypeName());
+ return false;
+ }
+ }
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/test/queries/clientpositive/vector_struct_in.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_struct_in.q b/ql/src/test/queries/clientpositive/vector_struct_in.q
new file mode 100644
index 0000000..0e3a4ca
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_struct_in.q
@@ -0,0 +1,247 @@
+set hive.cbo.enable=false;
+set hive.tez.dynamic.partition.pruning=false;
+set hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+
+-- SORT_QUERY_RESULTS
+
+-- 2 Strings
+create table test_1 (`id` string, `lineid` string) stored as orc;
+
+insert into table test_1 values ('one','1'), ('seven','1');
+
+explain
+select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+);
+
+select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+);
+
+explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1 ;
+
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1 ;
+
+
+-- 2 Integers
+create table test_2 (`id` int, `lineid` int) stored as orc;
+
+insert into table test_2 values (1,1), (7,1);
+
+explain
+select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+);
+
+select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+);
+
+explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2;
+
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2;
+
+-- 1 String and 1 Integer
+create table test_3 (`id` string, `lineid` int) stored as orc;
+
+insert into table test_3 values ('one',1), ('seven',1);
+
+explain
+select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+);
+
+select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+);
+
+explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3;
+
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3;
+
+-- 1 Integer and 1 String and 1 Double
+create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) stored as orc;
+
+insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5);
+
+explain
+select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+);
+
+select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+);
+
+explain
+select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4;
+
+select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4;
\ No newline at end of file
[26/50] [abbrv] hive git commit: HIVE-11783: Extending HPL/SQL parser
(Dmitry Tolpeko reviewed by Alan Gates)
Posted by xu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/06790789/hplsql/src/test/results/offline/select_db2.out.txt
----------------------------------------------------------------------
diff --git a/hplsql/src/test/results/offline/select_db2.out.txt b/hplsql/src/test/results/offline/select_db2.out.txt
new file mode 100644
index 0000000..1d64e8a
--- /dev/null
+++ b/hplsql/src/test/results/offline/select_db2.out.txt
@@ -0,0 +1,6 @@
+Ln:1 SELECT
+Ln:1 select coalesce(max(info_id) + 1, 0) from sproc_info
+Ln:1 Not executed - offline mode set
+Ln:3 SELECT
+Ln:3 select cd, cd + inc days, cd - inc days + coalesce(inc, 0) days from (select date '2015-09-02' as cd, 3 as inc from sysibm.sysdummy1)
+Ln:3 Not executed - offline mode set
\ No newline at end of file
[40/50] [abbrv] hive git commit: HIVE-11748: HivePreparedStatement's
setTimestamp() does not quote value as required (Angus Smithson,
reviewed by Sergio Pena)
Posted by xu...@apache.org.
HIVE-11748: HivePreparedStatement's setTimestamp() does not quote value as required (Angus Smithson, reviewed by Sergio Pena)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cdc65dc7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cdc65dc7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cdc65dc7
Branch: refs/heads/beeline-cli
Commit: cdc65dc7c6a0c725054839269a9c04ba02da0f5f
Parents: b98a60d
Author: Sergio Pena <se...@cloudera.com>
Authored: Wed Sep 23 14:18:16 2015 -0500
Committer: Sergio Pena <se...@cloudera.com>
Committed: Wed Sep 23 14:18:16 2015 -0500
----------------------------------------------------------------------
NOTICE | 3 +
.../org/apache/hive/jdbc/TestJdbcDriver2.java | 80 +++++++++++---------
.../apache/hive/jdbc/HivePreparedStatement.java | 4 +-
3 files changed, 48 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/cdc65dc7/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
index 32d89b1..5c862df 100644
--- a/NOTICE
+++ b/NOTICE
@@ -9,3 +9,6 @@ Copyright (c) 2010-2014 Oracle and/or its affiliates.
This project includes software copyrighted by Microsoft Corporation and
licensed under the Apache License, Version 2.0.
+
+This project includes software copyrighted by Dell SecureWorks and
+licensed under the Apache License, Version 2.0.
http://git-wip-us.apache.org/repos/asf/hive/blob/cdc65dc7/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
index b2dd2ab..3aa6bce 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
@@ -37,7 +37,9 @@ import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
+import java.sql.Timestamp;
import java.sql.Types;
+import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
@@ -412,29 +414,28 @@ public class TestJdbcDriver2 {
@Test
public void testPrepareStatement() {
-
- String sql = "from (select count(1) from "
+ String sql = "FROM (SELECT 1 FROM "
+ tableName
+ " where 'not?param?not?param' <> 'not_param??not_param' and ?=? "
+ " and 1=? and 2=? and 3.0=? and 4.0=? and 'test\\'string\"'=? and 5=? and ?=? "
+ " and date '2012-01-01' = date ?"
- + " ) t select '2011-03-25' ddate,'China',true bv, 10 num limit 10";
+ + " and timestamp '2012-04-22 09:00:00.123456789' = timestamp ?"
+ + " ) t SELECT '2011-03-25' ddate,'China',true bv, 10 num LIMIT 1";
///////////////////////////////////////////////
//////////////////// correct testcase
//////////////////// executed twice: once with the typed ps setters, once with the generic setObject
//////////////////////////////////////////////
try {
- PreparedStatement ps = createPreapredStatementUsingSetXXX(sql);
- ResultSet res = ps.executeQuery();
- assertPreparedStatementResultAsExpected(res);
- ps.close();
-
- ps = createPreapredStatementUsingSetObject(sql);
- res = ps.executeQuery();
- assertPreparedStatementResultAsExpected(res);
- ps.close();
+ try (PreparedStatement ps = createPreapredStatementUsingSetXXX(sql);
+ ResultSet res = ps.executeQuery()) {
+ assertPreparedStatementResultAsExpected(res);
+ }
+ try (PreparedStatement ps = createPreapredStatementUsingSetObject(sql);
+ ResultSet res = ps.executeQuery()) {
+ assertPreparedStatementResultAsExpected(res);
+ }
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
@@ -445,9 +446,8 @@ public class TestJdbcDriver2 {
//////////////////////////////////////////////
// set nothing for prepared sql
Exception expectedException = null;
- try {
- PreparedStatement ps = con.prepareStatement(sql);
- ps.executeQuery();
+ try (PreparedStatement ps = con.prepareStatement(sql);
+ ResultSet ignored = ps.executeQuery()) {
} catch (Exception e) {
expectedException = e;
}
@@ -457,11 +457,10 @@ public class TestJdbcDriver2 {
// set some of parameters for prepared sql, not all of them.
expectedException = null;
- try {
- PreparedStatement ps = con.prepareStatement(sql);
+ try (PreparedStatement ps = con.prepareStatement(sql)) {
ps.setBoolean(1, true);
ps.setBoolean(2, true);
- ps.executeQuery();
+ try (ResultSet ignored = ps.executeQuery()) {}
} catch (Exception e) {
expectedException = e;
}
@@ -471,16 +470,11 @@ public class TestJdbcDriver2 {
// set the wrong type parameters for prepared sql.
expectedException = null;
- try {
- PreparedStatement ps = con.prepareStatement(sql);
-
+ try (PreparedStatement ps = con.prepareStatement(sql)) {
// wrong type here
ps.setString(1, "wrong");
-
- assertTrue(true);
- ResultSet res = ps.executeQuery();
- if (!res.next()) {
- throw new Exception("there must be a empty result set");
+ try (ResultSet res = ps.executeQuery()) {
+ assertFalse("ResultSet was not empty", res.next());
}
} catch (Exception e) {
expectedException = e;
@@ -491,17 +485,15 @@ public class TestJdbcDriver2 {
// setObject to the yet unknown type java.util.Date
expectedException = null;
- try {
- PreparedStatement ps = con.prepareStatement(sql);
+ try (PreparedStatement ps = con.prepareStatement(sql)) {
ps.setObject(1, new Date());
- ps.executeQuery();
+ try (ResultSet ignored = ps.executeQuery()) {}
} catch (Exception e) {
expectedException = e;
}
assertNotNull(
"Setting to an unknown type should throw an exception",
expectedException);
-
}
private PreparedStatement createPreapredStatementUsingSetObject(String sql) throws SQLException {
@@ -509,7 +501,6 @@ public class TestJdbcDriver2 {
ps.setObject(1, true); //setBoolean
ps.setObject(2, true); //setBoolean
-
ps.setObject(3, Short.valueOf("1")); //setShort
ps.setObject(4, 2); //setInt
ps.setObject(5, 3f); //setFloat
@@ -519,6 +510,7 @@ public class TestJdbcDriver2 {
ps.setObject(9, (byte) 1); //setByte
ps.setObject(10, (byte) 1); //setByte
ps.setString(11, "2012-01-01"); //setString
+ ps.setObject(12, Timestamp.valueOf("2012-04-22 09:00:00.123456789")); //setTimestamp
ps.setMaxRows(2);
return ps;
@@ -529,7 +521,6 @@ public class TestJdbcDriver2 {
ps.setBoolean(1, true); //setBoolean
ps.setBoolean(2, true); //setBoolean
-
ps.setShort(3, Short.valueOf("1")); //setShort
ps.setInt(4, 2); //setInt
ps.setFloat(5, 3f); //setFloat
@@ -539,15 +530,17 @@ public class TestJdbcDriver2 {
ps.setByte(9, (byte) 1); //setByte
ps.setByte(10, (byte) 1); //setByte
ps.setString(11, "2012-01-01"); //setString
+ ps.setTimestamp(12, Timestamp.valueOf("2012-04-22 09:00:00.123456789")); //setTimestamp
ps.setMaxRows(2);
return ps;
}
- private void assertPreparedStatementResultAsExpected(ResultSet res ) throws SQLException {
+ private void assertPreparedStatementResultAsExpected(ResultSet res) throws SQLException {
assertNotNull(res);
+ assertTrue("ResultSet contained no rows", res.next());
- while (res.next()) {
+ do {
assertEquals("2011-03-25", res.getString("ddate"));
assertEquals("10", res.getString("num"));
assertEquals((byte) 10, res.getByte("num"));
@@ -561,9 +554,7 @@ public class TestJdbcDriver2 {
assertNotNull(o);
o = res.getObject("num");
assertNotNull(o);
- }
- res.close();
- assertTrue(true);
+ } while (res.next());
}
/**
@@ -2382,4 +2373,19 @@ public void testParseUrlHttpMode() throws SQLException, JdbcUriParseException,
fail(e.toString());
}
}
+
+ @Test
+ public void testPrepareSetTimestamp() throws SQLException, ParseException {
+ String sql = String.format("SELECT * FROM %s WHERE c17 = ?", dataTypeTableName);
+ try (PreparedStatement ps = con.prepareStatement(sql)) {
+ Timestamp timestamp = Timestamp.valueOf("2012-04-22 09:00:00.123456789");
+ ps.setTimestamp(1, timestamp);
+ // Ensure we find the single row which matches our timestamp (where field 1 has value 1)
+ try (ResultSet resultSet = ps.executeQuery()) {
+ assertTrue(resultSet.next());
+ assertEquals(1, resultSet.getInt(1));
+ assertFalse(resultSet.next());
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/cdc65dc7/jdbc/src/java/org/apache/hive/jdbc/HivePreparedStatement.java
----------------------------------------------------------------------
diff --git a/jdbc/src/java/org/apache/hive/jdbc/HivePreparedStatement.java b/jdbc/src/java/org/apache/hive/jdbc/HivePreparedStatement.java
index 7687537..c28b7d6 100644
--- a/jdbc/src/java/org/apache/hive/jdbc/HivePreparedStatement.java
+++ b/jdbc/src/java/org/apache/hive/jdbc/HivePreparedStatement.java
@@ -607,7 +607,7 @@ public class HivePreparedStatement extends HiveStatement implements PreparedStat
} else if (x instanceof Character) {
setString(parameterIndex, x.toString());
} else if (x instanceof Timestamp) {
- setString(parameterIndex, x.toString());
+ setTimestamp(parameterIndex, (Timestamp) x);
} else if (x instanceof BigDecimal) {
setString(parameterIndex, x.toString());
} else {
@@ -728,7 +728,7 @@ public class HivePreparedStatement extends HiveStatement implements PreparedStat
*/
public void setTimestamp(int parameterIndex, Timestamp x) throws SQLException {
- this.parameters.put(parameterIndex, x.toString());
+ this.parameters.put(parameterIndex, "'" + x.toString() + "'");
}
/*
[17/50] [abbrv] hive git commit: HIVE-11820 : export tables with size
of >32MB throws java.lang.IllegalArgumentException: Skip CRC is valid only
with update options (Takahiko Saito via Ashutosh Chauhan)
Posted by xu...@apache.org.
HIVE-11820 : export tables with size of >32MB throws java.lang.IllegalArgumentException: Skip CRC is valid only with update options (Takahiko Saito via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/93a66276
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/93a66276
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/93a66276
Branch: refs/heads/beeline-cli
Commit: 93a6627606cb2dad0e04c3a885f71c1be405f51d
Parents: cc78dd5
Author: Takahiko Saito <ty...@gmail.com>
Authored: Wed Sep 16 15:34:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sat Sep 19 23:50:49 2015 -0700
----------------------------------------------------------------------
.../main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java | 5 ++++-
.../main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java | 4 +++-
2 files changed, 7 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/93a66276/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
----------------------------------------------------------------------
diff --git a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
index 6c6ccbc..93dcbd3 100644
--- a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
+++ b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
@@ -73,6 +73,8 @@ import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.tools.distcp2.DistCp;
import org.apache.hadoop.tools.distcp2.DistCpOptions;
+import org.apache.hadoop.tools.distcp2.DistCpOptions.FileAttribute;
+
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.VersionInfo;
@@ -672,8 +674,9 @@ public class Hadoop20SShims extends HadoopShimsSecure {
public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {
DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
- options.setSkipCRC(true);
options.setSyncFolder(true);
+ options.setSkipCRC(true);
+ options.preserve(FileAttribute.BLOCKSIZE);
try {
DistCp distcp = new DistCp(conf, options);
distcp.execute();
http://git-wip-us.apache.org/repos/asf/hive/blob/93a66276/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 3292cb3..83369ee 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -97,6 +97,7 @@ import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.tools.DistCp;
import org.apache.hadoop.tools.DistCpOptions;
+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.tez.test.MiniTezCluster;
@@ -1213,8 +1214,9 @@ public class Hadoop23Shims extends HadoopShimsSecure {
public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {
DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
- options.setSkipCRC(true);
options.setSyncFolder(true);
+ options.setSkipCRC(true);
+ options.preserve(FileAttribute.BLOCKSIZE);
try {
DistCp distcp = new DistCp(conf, options);
distcp.execute();
[11/50] [abbrv] hive git commit: HIVE-11841 : KeyValuesInputMerger
creates huge logs (Rajesh Balamohan, reviewed by Gopal V)
Posted by xu...@apache.org.
HIVE-11841 : KeyValuesInputMerger creates huge logs (Rajesh Balamohan, reviewed by Gopal V)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9c8d7c9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9c8d7c9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9c8d7c9
Branch: refs/heads/beeline-cli
Commit: e9c8d7c94f10d19ce5dc3b564f6ad1c53d4480b5
Parents: 68c0e99
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Sep 18 11:24:44 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Sep 18 11:24:44 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/exec/tez/tools/KeyValuesInputMerger.java | 1 -
1 file changed, 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e9c8d7c9/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/KeyValuesInputMerger.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/KeyValuesInputMerger.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/KeyValuesInputMerger.java
index 9bc6418..2db2f98 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/KeyValuesInputMerger.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/KeyValuesInputMerger.java
@@ -104,7 +104,6 @@ public class KeyValuesInputMerger extends KeyValuesReader {
@Override
public Object next() {
- l4j.info("next called on " + currentIterator);
return currentIterator.next();
}
[45/50] [abbrv] hive git commit: HIVE-10785 : Support aggregate push
down through joins (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Posted by xu...@apache.org.
HIVE-10785 : Support aggregate push down through joins (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/68d6cfda
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/68d6cfda
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/68d6cfda
Branch: refs/heads/beeline-cli
Commit: 68d6cfda78b3ec6b42cf0d42df62aa1f2716d414
Parents: 1528135
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Thu Sep 17 21:49:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Thu Sep 24 13:58:50 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 2 +-
.../hadoop/hive/ql/exec/FunctionRegistry.java | 3 +-
.../functions/HiveSqlCountAggFunction.java | 72 +
.../functions/HiveSqlMinMaxAggFunction.java | 49 +
.../functions/HiveSqlSumAggFunction.java | 125 ++
.../rules/HiveAggregateJoinTransposeRule.java | 372 +++++
.../translator/SqlFunctionConverter.java | 40 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +
.../hive/ql/udf/generic/GenericUDAFSum.java | 2 +-
.../udf/generic/GenericUDAFSumEmptyIsZero.java | 63 +
.../clientpositive/groupby_join_pushdown.q | 55 +
.../clientpositive/groupby_join_pushdown.q.out | 1522 ++++++++++++++++++
.../results/clientpositive/show_functions.q.out | 1 +
13 files changed, 2297 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f3e2168..dffdb5c 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -814,7 +814,7 @@ public class HiveConf extends Configuration {
+ " expressed as multiple of Local FS write cost"),
HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;"
+ " expressed as multiple of Local FS read cost"),
-
+ AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"),
// hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row,
// need to remove by hive .13. Also, do not change default (see SMB operator)
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index f1fe30d..218b2df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -370,6 +370,7 @@ public final class FunctionRegistry {
system.registerGenericUDAF("min", new GenericUDAFMin());
system.registerGenericUDAF("sum", new GenericUDAFSum());
+ system.registerGenericUDAF("$SUM0", new GenericUDAFSumEmptyIsZero());
system.registerGenericUDAF("count", new GenericUDAFCount());
system.registerGenericUDAF("avg", new GenericUDAFAverage());
system.registerGenericUDAF("std", new GenericUDAFStd());
@@ -960,7 +961,7 @@ public final class FunctionRegistry {
GenericUDAFParameterInfo paramInfo =
new SimpleGenericUDAFParameterInfo(
args, isDistinct, isAllColumns);
-
+
GenericUDAFEvaluator udafEvaluator;
if (udafResolver instanceof GenericUDAFResolver2) {
udafEvaluator =
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
new file mode 100644
index 0000000..7937040
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlSplittableAggFunction;
+import org.apache.calcite.sql.SqlSplittableAggFunction.CountSplitter;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.util.ImmutableIntList;
+
+public class HiveSqlCountAggFunction extends SqlAggFunction {
+
+ final SqlReturnTypeInference returnTypeInference;
+ final SqlOperandTypeInference operandTypeInference;
+ final SqlOperandTypeChecker operandTypeChecker;
+
+ public HiveSqlCountAggFunction(SqlReturnTypeInference returnTypeInference,
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) {
+ super(
+ "count",
+ SqlKind.OTHER_FUNCTION,
+ returnTypeInference,
+ operandTypeInference,
+ operandTypeChecker,
+ SqlFunctionCategory.NUMERIC);
+ this.returnTypeInference = returnTypeInference;
+ this.operandTypeChecker = operandTypeChecker;
+ this.operandTypeInference = operandTypeInference;
+ }
+
+ @Override
+ public <T> T unwrap(Class<T> clazz) {
+ if (clazz == SqlSplittableAggFunction.class) {
+ return clazz.cast(new HiveCountSplitter());
+ }
+ return super.unwrap(clazz);
+ }
+
+ class HiveCountSplitter extends CountSplitter {
+
+ @Override
+ public AggregateCall other(RelDataTypeFactory typeFactory, AggregateCall e) {
+
+ return AggregateCall.create(
+ new HiveSqlCountAggFunction(returnTypeInference, operandTypeInference, operandTypeChecker),
+ false, ImmutableIntList.of(), -1,
+ typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true), "count");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java
new file mode 100644
index 0000000..77dca1f
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlSplittableAggFunction;
+import org.apache.calcite.sql.SqlSplittableAggFunction.SelfSplitter;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+
+public class HiveSqlMinMaxAggFunction extends SqlAggFunction {
+
+ public HiveSqlMinMaxAggFunction(SqlReturnTypeInference returnTypeInference,
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, boolean isMin) {
+ super(
+ isMin ? "min" : "max",
+ SqlKind.OTHER_FUNCTION,
+ returnTypeInference,
+ operandTypeInference,
+ operandTypeChecker,
+ SqlFunctionCategory.NUMERIC);
+ }
+
+ @Override
+ public <T> T unwrap(Class<T> clazz) {
+ if (clazz == SqlSplittableAggFunction.class) {
+ return clazz.cast(SelfSplitter.INSTANCE);
+ }
+ return super.unwrap(clazz);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
new file mode 100644
index 0000000..8f62970
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlSplittableAggFunction;
+import org.apache.calcite.sql.SqlSplittableAggFunction.SumSplitter;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.util.ImmutableIntList;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * <code>Sum</code> is an aggregator which returns the sum of the values which
+ * go into it. It has precisely one argument of numeric type (<code>int</code>,
+ * <code>long</code>, <code>float</code>, <code>double</code>), and the result
+ * is the same type.
+ */
+public class HiveSqlSumAggFunction extends SqlAggFunction {
+
+ final SqlReturnTypeInference returnTypeInference;
+ final SqlOperandTypeInference operandTypeInference;
+ final SqlOperandTypeChecker operandTypeChecker;
+
+ //~ Constructors -----------------------------------------------------------
+
+ public HiveSqlSumAggFunction(SqlReturnTypeInference returnTypeInference,
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) {
+ super(
+ "sum",
+ SqlKind.OTHER_FUNCTION,
+ returnTypeInference,
+ operandTypeInference,
+ operandTypeChecker,
+ SqlFunctionCategory.NUMERIC);
+ this.returnTypeInference = returnTypeInference;
+ this.operandTypeChecker = operandTypeChecker;
+ this.operandTypeInference = operandTypeInference;
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+
+ @Override
+ public <T> T unwrap(Class<T> clazz) {
+ if (clazz == SqlSplittableAggFunction.class) {
+ return clazz.cast(new HiveSumSplitter());
+ }
+ return super.unwrap(clazz);
+ }
+
+ class HiveSumSplitter extends SumSplitter {
+
+ @Override
+ public AggregateCall other(RelDataTypeFactory typeFactory, AggregateCall e) {
+ RelDataType countRetType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true);
+ return AggregateCall.create(
+ new HiveSqlCountAggFunction(ReturnTypes.explicit(countRetType), operandTypeInference, operandTypeChecker),
+ false, ImmutableIntList.of(), -1, countRetType, "count");
+ }
+
+ @Override
+ public AggregateCall topSplit(RexBuilder rexBuilder,
+ Registry<RexNode> extra, int offset, RelDataType inputRowType,
+ AggregateCall aggregateCall, int leftSubTotal, int rightSubTotal) {
+ final List<RexNode> merges = new ArrayList<>();
+ final List<RelDataTypeField> fieldList = inputRowType.getFieldList();
+ if (leftSubTotal >= 0) {
+ final RelDataType type = fieldList.get(leftSubTotal).getType();
+ merges.add(rexBuilder.makeInputRef(type, leftSubTotal));
+ }
+ if (rightSubTotal >= 0) {
+ final RelDataType type = fieldList.get(rightSubTotal).getType();
+ merges.add(rexBuilder.makeInputRef(type, rightSubTotal));
+ }
+ RexNode node;
+ switch (merges.size()) {
+ case 1:
+ node = merges.get(0);
+ break;
+ case 2:
+ node = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, merges);
+ node = rexBuilder.makeAbstractCast(aggregateCall.type, node);
+ break;
+ default:
+ throw new AssertionError("unexpected count " + merges);
+ }
+ int ordinal = extra.register(node);
+ return AggregateCall.create(new HiveSqlSumAggFunction(returnTypeInference, operandTypeInference, operandTypeChecker),
+ false, ImmutableList.of(ordinal), -1, aggregateCall.type, aggregateCall.name);
+ }
+ }
+}
+
+// End SqlSumAggFunction.java
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
new file mode 100644
index 0000000..211b6fa
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
@@ -0,0 +1,372 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.rules.AggregateJoinTransposeRule;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlSplittableAggFunction;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Planner rule that pushes an
+ * {@link org.apache.calcite.rel.core.Aggregate}
+ * past a {@link org.apache.calcite.rel.core.Join}.
+ */
+public class HiveAggregateJoinTransposeRule extends AggregateJoinTransposeRule {
+
+ /** Extended instance of the rule that can push down aggregate functions. */
+ public static final HiveAggregateJoinTransposeRule INSTANCE =
+ new HiveAggregateJoinTransposeRule(HiveAggregate.class, HiveAggregate.HIVE_AGGR_REL_FACTORY,
+ HiveJoin.class, HiveJoin.HIVE_JOIN_FACTORY, HiveProject.DEFAULT_PROJECT_FACTORY, true);
+
+ private final RelFactories.AggregateFactory aggregateFactory;
+
+ private final RelFactories.JoinFactory joinFactory;
+
+ private final RelFactories.ProjectFactory projectFactory;
+
+ private final boolean allowFunctions;
+
+ /** Creates an AggregateJoinTransposeRule that may push down functions. */
+ private HiveAggregateJoinTransposeRule(Class<? extends Aggregate> aggregateClass,
+ RelFactories.AggregateFactory aggregateFactory,
+ Class<? extends Join> joinClass,
+ RelFactories.JoinFactory joinFactory,
+ RelFactories.ProjectFactory projectFactory,
+ boolean allowFunctions) {
+ super(aggregateClass, aggregateFactory, joinClass, joinFactory, projectFactory, true);
+ this.aggregateFactory = aggregateFactory;
+ this.joinFactory = joinFactory;
+ this.projectFactory = projectFactory;
+ this.allowFunctions = allowFunctions;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final Aggregate aggregate = call.rel(0);
+ final Join join = call.rel(1);
+ final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
+
+ // If any aggregate functions do not support splitting, bail out
+ // If any aggregate call has a filter, bail out
+ for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
+ if (aggregateCall.getAggregation().unwrap(SqlSplittableAggFunction.class)
+ == null) {
+ return;
+ }
+ if (aggregateCall.filterArg >= 0) {
+ return;
+ }
+ }
+
+ // If it is not an inner join, we do not push the
+ // aggregate operator
+ if (join.getJoinType() != JoinRelType.INNER) {
+ return;
+ }
+
+ if (!allowFunctions && !aggregate.getAggCallList().isEmpty()) {
+ return;
+ }
+
+ // Do the columns used by the join appear in the output of the aggregate?
+ final ImmutableBitSet aggregateColumns = aggregate.getGroupSet();
+ final ImmutableBitSet keyColumns = keyColumns(aggregateColumns,
+ RelMetadataQuery.getPulledUpPredicates(join).pulledUpPredicates);
+ final ImmutableBitSet joinColumns =
+ RelOptUtil.InputFinder.bits(join.getCondition());
+ final boolean allColumnsInAggregate =
+ keyColumns.contains(joinColumns);
+ final ImmutableBitSet belowAggregateColumns =
+ aggregateColumns.union(joinColumns);
+
+ // Split join condition
+ final List<Integer> leftKeys = Lists.newArrayList();
+ final List<Integer> rightKeys = Lists.newArrayList();
+ RexNode nonEquiConj =
+ RelOptUtil.splitJoinCondition(join.getLeft(), join.getRight(),
+ join.getCondition(), leftKeys, rightKeys);
+ // If it contains non-equi join conditions, we bail out
+ if (!nonEquiConj.isAlwaysTrue()) {
+ return;
+ }
+
+ // Push each aggregate function down to each side that contains all of its
+ // arguments. Note that COUNT(*), because it has no arguments, can go to
+ // both sides.
+ final Map<Integer, Integer> map = new HashMap<>();
+ final List<Side> sides = new ArrayList<>();
+ int uniqueCount = 0;
+ int offset = 0;
+ int belowOffset = 0;
+ for (int s = 0; s < 2; s++) {
+ final Side side = new Side();
+ final RelNode joinInput = join.getInput(s);
+ int fieldCount = joinInput.getRowType().getFieldCount();
+ final ImmutableBitSet fieldSet =
+ ImmutableBitSet.range(offset, offset + fieldCount);
+ final ImmutableBitSet belowAggregateKeyNotShifted =
+ belowAggregateColumns.intersect(fieldSet);
+ for (Ord<Integer> c : Ord.zip(belowAggregateKeyNotShifted)) {
+ map.put(c.e, belowOffset + c.i);
+ }
+ final ImmutableBitSet belowAggregateKey =
+ belowAggregateKeyNotShifted.shift(-offset);
+ final boolean unique;
+ if (!allowFunctions) {
+ assert aggregate.getAggCallList().isEmpty();
+ // If there are no functions, it doesn't matter as much whether we
+ // aggregate the inputs before the join, because there will not be
+ // any functions experiencing a cartesian product effect.
+ //
+ // But finding out whether the input is already unique requires a call
+ // to areColumnsUnique that currently (until [CALCITE-794] "Detect
+ // cycles when computing statistics" is fixed) places a heavy load on
+ // the metadata system.
+ //
+ // So we choose to imagine the the input is already unique, which is
+ // untrue but harmless.
+ //
+ unique = true;
+ } else {
+ final Boolean unique0 =
+ RelMetadataQuery.areColumnsUnique(joinInput, belowAggregateKey);
+ unique = unique0 != null && unique0;
+ }
+ if (unique) {
+ ++uniqueCount;
+ side.newInput = joinInput;
+ } else {
+ List<AggregateCall> belowAggCalls = new ArrayList<>();
+ final SqlSplittableAggFunction.Registry<AggregateCall>
+ belowAggCallRegistry = registry(belowAggCalls);
+ final Mappings.TargetMapping mapping =
+ s == 0
+ ? Mappings.createIdentity(fieldCount)
+ : Mappings.createShiftMapping(fieldCount + offset, 0, offset,
+ fieldCount);
+ for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
+ final SqlAggFunction aggregation = aggCall.e.getAggregation();
+ final SqlSplittableAggFunction splitter =
+ Preconditions.checkNotNull(
+ aggregation.unwrap(SqlSplittableAggFunction.class));
+ final AggregateCall call1;
+ if (fieldSet.contains(ImmutableBitSet.of(aggCall.e.getArgList()))) {
+ call1 = splitter.split(aggCall.e, mapping);
+ } else {
+ call1 = splitter.other(rexBuilder.getTypeFactory(), aggCall.e);
+ }
+ if (call1 != null) {
+ side.split.put(aggCall.i,
+ belowAggregateKey.cardinality()
+ + belowAggCallRegistry.register(call1));
+ }
+ }
+ side.newInput = aggregateFactory.createAggregate(joinInput, false,
+ belowAggregateKey, null, belowAggCalls);
+ }
+ offset += fieldCount;
+ belowOffset += side.newInput.getRowType().getFieldCount();
+ sides.add(side);
+ }
+
+ if (uniqueCount == 2) {
+ // Both inputs to the join are unique. There is nothing to be gained by
+ // this rule. In fact, this aggregate+join may be the result of a previous
+ // invocation of this rule; if we continue we might loop forever.
+ return;
+ }
+
+ // Update condition
+ final Mapping mapping = (Mapping) Mappings.target(
+ new Function<Integer, Integer>() {
+ @Override
+ public Integer apply(Integer a0) {
+ return map.get(a0);
+ }
+ },
+ join.getRowType().getFieldCount(),
+ belowOffset);
+ final RexNode newCondition =
+ RexUtil.apply(mapping, join.getCondition());
+
+ // Create new join
+ RelNode newJoin = joinFactory.createJoin(sides.get(0).newInput,
+ sides.get(1).newInput, newCondition, join.getJoinType(),
+ join.getVariablesStopped(), join.isSemiJoinDone());
+
+ // Aggregate above to sum up the sub-totals
+ final List<AggregateCall> newAggCalls = new ArrayList<>();
+ final int groupIndicatorCount =
+ aggregate.getGroupCount() + aggregate.getIndicatorCount();
+ final int newLeftWidth = sides.get(0).newInput.getRowType().getFieldCount();
+ final List<RexNode> projects =
+ new ArrayList<>(rexBuilder.identityProjects(newJoin.getRowType()));
+ for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
+ final SqlAggFunction aggregation = aggCall.e.getAggregation();
+ final SqlSplittableAggFunction splitter =
+ Preconditions.checkNotNull(
+ aggregation.unwrap(SqlSplittableAggFunction.class));
+ final Integer leftSubTotal = sides.get(0).split.get(aggCall.i);
+ final Integer rightSubTotal = sides.get(1).split.get(aggCall.i);
+ newAggCalls.add(
+ splitter.topSplit(rexBuilder, registry(projects),
+ groupIndicatorCount, newJoin.getRowType(), aggCall.e,
+ leftSubTotal == null ? -1 : leftSubTotal,
+ rightSubTotal == null ? -1 : rightSubTotal + newLeftWidth));
+ }
+ RelNode r = newJoin;
+ b:
+ if (allColumnsInAggregate && newAggCalls.isEmpty() &&
+ RelOptUtil.areRowTypesEqual(r.getRowType(), aggregate.getRowType(), false)) {
+ // no need to aggregate
+ } else {
+ r = RelOptUtil.createProject(r, projects, null, true, projectFactory);
+ if (allColumnsInAggregate) {
+ // let's see if we can convert
+ List<RexNode> projects2 = new ArrayList<>();
+ for (int key : Mappings.apply(mapping, aggregate.getGroupSet())) {
+ projects2.add(rexBuilder.makeInputRef(r, key));
+ }
+ for (AggregateCall newAggCall : newAggCalls) {
+ final SqlSplittableAggFunction splitter =
+ newAggCall.getAggregation()
+ .unwrap(SqlSplittableAggFunction.class);
+ if (splitter != null) {
+ projects2.add(
+ splitter.singleton(rexBuilder, r.getRowType(), newAggCall));
+ }
+ }
+ if (projects2.size()
+ == aggregate.getGroupSet().cardinality() + newAggCalls.size()) {
+ // We successfully converted agg calls into projects.
+ r = RelOptUtil.createProject(r, projects2, null, true, projectFactory);
+ break b;
+ }
+ }
+ r = aggregateFactory.createAggregate(r, aggregate.indicator,
+ Mappings.apply(mapping, aggregate.getGroupSet()),
+ Mappings.apply2(mapping, aggregate.getGroupSets()), newAggCalls);
+ }
+ call.transformTo(r);
+ }
+
+ /** Computes the closure of a set of columns according to a given list of
+ * constraints. Each 'x = y' constraint causes bit y to be set if bit x is
+ * set, and vice versa. */
+ private static ImmutableBitSet keyColumns(ImmutableBitSet aggregateColumns,
+ ImmutableList<RexNode> predicates) {
+ SortedMap<Integer, BitSet> equivalence = new TreeMap<>();
+ for (RexNode pred : predicates) {
+ populateEquivalences(equivalence, pred);
+ }
+ ImmutableBitSet keyColumns = aggregateColumns;
+ for (Integer aggregateColumn : aggregateColumns) {
+ final BitSet bitSet = equivalence.get(aggregateColumn);
+ if (bitSet != null) {
+ keyColumns = keyColumns.union(bitSet);
+ }
+ }
+ return keyColumns;
+ }
+
+ private static void populateEquivalences(Map<Integer, BitSet> equivalence,
+ RexNode predicate) {
+ switch (predicate.getKind()) {
+ case EQUALS:
+ RexCall call = (RexCall) predicate;
+ final List<RexNode> operands = call.getOperands();
+ if (operands.get(0) instanceof RexInputRef) {
+ final RexInputRef ref0 = (RexInputRef) operands.get(0);
+ if (operands.get(1) instanceof RexInputRef) {
+ final RexInputRef ref1 = (RexInputRef) operands.get(1);
+ populateEquivalence(equivalence, ref0.getIndex(), ref1.getIndex());
+ populateEquivalence(equivalence, ref1.getIndex(), ref0.getIndex());
+ }
+ }
+ }
+ }
+
+ private static void populateEquivalence(Map<Integer, BitSet> equivalence,
+ int i0, int i1) {
+ BitSet bitSet = equivalence.get(i0);
+ if (bitSet == null) {
+ bitSet = new BitSet();
+ equivalence.put(i0, bitSet);
+ }
+ bitSet.set(i1);
+ }
+
+ /** Creates a {@link org.apache.calcite.sql.SqlSplittableAggFunction.Registry}
+ * that is a view of a list. */
+ private static <E> SqlSplittableAggFunction.Registry<E>
+ registry(final List<E> list) {
+ return new SqlSplittableAggFunction.Registry<E>() {
+ @Override
+ public int register(E e) {
+ int i = list.indexOf(e);
+ if (i < 0) {
+ i = list.size();
+ list.add(e);
+ }
+ return i;
+ }
+ };
+ }
+
+ /** Work space for an input to a join. */
+ private static class Side {
+ final Map<Integer, Integer> split = new HashMap<>();
+ RelNode newInput;
+ }
+}
+
+// End AggregateJoinTransposeRule.java
+
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index fd78824..d59c6bb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -45,6 +45,9 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
import org.apache.hadoop.hive.ql.parse.ASTNode;
@@ -310,6 +313,7 @@ public class SqlFunctionConverter {
registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in"));
registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between"));
registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct"));
+
}
private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) {
@@ -339,8 +343,7 @@ public class SqlFunctionConverter {
// UDAF is assumed to be deterministic
public static class CalciteUDAF extends SqlAggFunction {
public CalciteUDAF(String opName, SqlReturnTypeInference returnTypeInference,
- SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker,
- ImmutableList<RelDataType> argTypes, RelDataType retType) {
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) {
super(opName, SqlKind.OTHER_FUNCTION, returnTypeInference, operandTypeInference,
operandTypeChecker, SqlFunctionCategory.USER_DEFINED_FUNCTION);
}
@@ -367,8 +370,6 @@ public class SqlFunctionConverter {
private SqlReturnTypeInference returnTypeInference;
private SqlOperandTypeInference operandTypeInference;
private SqlOperandTypeChecker operandTypeChecker;
- private ImmutableList<RelDataType> argTypes;
- private RelDataType retType;
}
private static CalciteUDFInfo getUDFInfo(String hiveUdfName,
@@ -382,10 +383,6 @@ public class SqlFunctionConverter {
typeFamilyBuilder.add(Util.first(at.getSqlTypeName().getFamily(), SqlTypeFamily.ANY));
}
udfInfo.operandTypeChecker = OperandTypes.family(typeFamilyBuilder.build());
-
- udfInfo.argTypes = ImmutableList.<RelDataType> copyOf(calciteArgTypes);
- udfInfo.retType = calciteRetType;
-
return udfInfo;
}
@@ -413,13 +410,34 @@ public class SqlFunctionConverter {
public static SqlAggFunction getCalciteAggFn(String hiveUdfName,
ImmutableList<RelDataType> calciteArgTypes, RelDataType calciteRetType) {
SqlAggFunction calciteAggFn = (SqlAggFunction) hiveToCalcite.get(hiveUdfName);
+
if (calciteAggFn == null) {
CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType);
- calciteAggFn = new CalciteUDAF(uInf.udfName, uInf.returnTypeInference,
- uInf.operandTypeInference, uInf.operandTypeChecker, uInf.argTypes, uInf.retType);
- }
+ switch (hiveUdfName.toLowerCase()) {
+ case "sum":
+ calciteAggFn = new HiveSqlSumAggFunction(uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker);
+ break;
+ case "count":
+ calciteAggFn = new HiveSqlCountAggFunction(uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker);
+ break;
+ case "min":
+ calciteAggFn = new HiveSqlMinMaxAggFunction(uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker, true);
+ break;
+ case "max":
+ calciteAggFn = new HiveSqlMinMaxAggFunction(uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker, false);
+ break;
+ default:
+ calciteAggFn = new CalciteUDAF(uInf.udfName, uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker);
+ break;
+ }
+ }
return calciteAggFn;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 0a7ce3a..9c731b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -63,6 +63,7 @@ import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.metadata.CachingRelMetadataProvider;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.rules.AggregateJoinTransposeRule;
import org.apache.calcite.rel.rules.FilterAggregateTransposeRule;
import org.apache.calcite.rel.rules.FilterProjectTransposeRule;
import org.apache.calcite.rel.rules.JoinToMultiJoinRule;
@@ -134,6 +135,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule;
@@ -885,6 +887,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE);
hepPgmBldr.addRuleInstance(new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY));
hepPgmBldr.addRuleInstance(HiveAggregateProjectMergeRule.INSTANCE);
+ if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) {
+ hepPgmBldr.addRuleInstance(HiveAggregateJoinTransposeRule.INSTANCE);
+ }
hepPgm = hepPgmBldr.build();
HepPlanner hepPlanner = new HepPlanner(hepPgm);
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
index 5a5846e..c6ffbec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
@@ -356,7 +356,7 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver {
*/
public static class GenericUDAFSumLong extends GenericUDAFEvaluator {
private PrimitiveObjectInspector inputOI;
- private LongWritable result;
+ protected LongWritable result;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
new file mode 100644
index 0000000..ab7ab04
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+@Description(name = "$SUM0", value = "_FUNC_(x) - Returns the sum of a set of numbers, zero if empty")
+public class GenericUDAFSumEmptyIsZero extends GenericUDAFSum {
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
+ throws SemanticException {
+ if (parameters.length != 1) {
+ throw new UDFArgumentTypeException(parameters.length - 1,
+ "Exactly one argument is expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0,
+ "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case LONG:
+ return new SumZeroIfEmpty();
+ default:
+ throw new UDFArgumentTypeException(0,
+ "Only bigint type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ public static class SumZeroIfEmpty extends GenericUDAFSumLong {
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ SumLongAgg myagg = (SumLongAgg) agg;
+ result.set(myagg.sum);
+ return result;
+ }
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/test/queries/clientpositive/groupby_join_pushdown.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_join_pushdown.q b/ql/src/test/queries/clientpositive/groupby_join_pushdown.q
new file mode 100644
index 0000000..bf1ae4b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_join_pushdown.q
@@ -0,0 +1,55 @@
+set hive.transpose.aggr.join=true;
+EXPLAIN
+SELECT f.key, g.key, count(g.key)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key;
+
+EXPLAIN
+SELECT f.key, g.key
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key;
+
+EXPLAIN
+SELECT DISTINCT f.value, g.value
+FROM src f JOIN src g ON(f.value = g.value);
+
+EXPLAIN
+SELECT f.key, g.key, COUNT(*)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key;
+
+EXPLAIN
+SELECT f.ctinyint, g.ctinyint, SUM(f.cbigint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cint = g.cint)
+GROUP BY f.ctinyint, g.ctinyint ;
+
+EXPLAIN
+SELECT f.cbigint, g.cbigint, MAX(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cbigint = g.cbigint)
+GROUP BY f.cbigint, g.cbigint ;
+
+explain
+SELECT f.ctinyint, g.ctinyint, MIN(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint;
+
+explain
+SELECT MIN(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint;
+
+explain
+SELECT count(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint;
+
+explain
+SELECT count(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint;
+
+explain
+SELECT sum(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint;
+
[43/50] [abbrv] hive git commit: HIVE-11926: Stats annotation might
not extract stats for varchar/decimal columns (Chaoyu Tang,
reviewed by Xuefu Zhang)
Posted by xu...@apache.org.
HIVE-11926: Stats annotation might not extract stats for varchar/decimal columns (Chaoyu Tang, reviewed by Xuefu Zhang)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/15281351
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/15281351
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/15281351
Branch: refs/heads/beeline-cli
Commit: 1528135176df0bb30351471eb05b919d706669b8
Parents: 648f2c6
Author: ctang <ct...@gmail.com>
Authored: Thu Sep 24 12:20:42 2015 -0400
Committer: ctang <ct...@gmail.com>
Committed: Thu Sep 24 14:43:32 2015 -0400
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 182 ++++++++++---------
1 file changed, 94 insertions(+), 88 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/15281351/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 2c970bd..cc8c9e8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -429,10 +429,11 @@ public class StatsUtils {
String colType, String defaultPartName) {
Range range = null;
String partVal;
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
+ String colTypeLowerCase = colType.toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
long min = Long.MAX_VALUE;
long max = Long.MIN_VALUE;
for (Partition partition : partitions) {
@@ -447,8 +448,8 @@ public class StatsUtils {
}
}
range = new Range(min, max);
- } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
double min = Double.MAX_VALUE;
double max = Double.MIN_VALUE;
for (Partition partition : partitions) {
@@ -463,7 +464,7 @@ public class StatsUtils {
}
}
range = new Range(min, max);
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
double min = Double.MAX_VALUE;
double max = Double.MIN_VALUE;
for (Partition partition : partitions) {
@@ -515,18 +516,18 @@ public class StatsUtils {
continue;
}
ObjectInspector oi = ci.getObjectInspector();
- String colType = ci.getTypeName();
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.LIST_TYPE_NAME)
- || colType.startsWith(serdeConstants.MAP_TYPE_NAME)
- || colType.startsWith(serdeConstants.STRUCT_TYPE_NAME)
- || colType.startsWith(serdeConstants.UNION_TYPE_NAME)) {
- avgRowSize += getAvgColLenOfVariableLengthTypes(conf, oi, colType);
+ String colTypeLowerCase = ci.getTypeName().toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.LIST_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) {
+ avgRowSize += getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
} else {
- avgRowSize += getAvgColLenOfFixedLengthTypes(colType);
+ avgRowSize += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
}
}
return avgRowSize;
@@ -640,38 +641,38 @@ public class StatsUtils {
*/
public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName,
String colName) {
- ColStatistics cs = new ColStatistics(colName, cso.getColType());
- String colType = cso.getColType();
+ String colTypeLowerCase = cso.getColType().toLowerCase();
+ ColStatistics cs = new ColStatistics(colName, colTypeLowerCase);
ColumnStatisticsData csd = cso.getStatsData();
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) {
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)) {
cs.setCountDistint(csd.getLongStats().getNumDVs());
cs.setNumNulls(csd.getLongStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
cs.setCountDistint(csd.getLongStats().getNumDVs());
cs.setNumNulls(csd.getLongStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive2());
cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
cs.setCountDistint(csd.getDoubleStats().getNumDVs());
cs.setNumNulls(csd.getDoubleStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
cs.setCountDistint(csd.getDoubleStats().getNumDVs());
cs.setNumNulls(csd.getDoubleStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive2());
cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
cs.setCountDistint(csd.getStringStats().getNumDVs());
cs.setNumNulls(csd.getStringStats().getNumNulls());
cs.setAvgColLen(csd.getStringStats().getAvgColLen());
- } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) {
cs.setCountDistint(2);
} else {
@@ -681,12 +682,12 @@ public class StatsUtils {
cs.setNumFalses(csd.getBooleanStats().getNumFalses());
cs.setNumNulls(csd.getBooleanStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
cs.setAvgColLen(csd.getBinaryStats().getAvgColLen());
cs.setNumNulls(csd.getBinaryStats().getNumNulls());
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
cs.setCountDistint(csd.getDecimalStats().getNumDVs());
cs.setNumNulls(csd.getDecimalStats().getNumNulls());
@@ -697,7 +698,7 @@ public class StatsUtils {
BigDecimal minVal = HiveDecimal.
create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
cs.setRange(minVal, maxVal);
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
} else {
// Columns statistics for complex datatypes are not supported yet
@@ -741,7 +742,9 @@ public class StatsUtils {
List<ColStatistics> stats = new ArrayList<ColStatistics>(colStats.size());
for (ColumnStatisticsObj statObj : colStats) {
ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName());
- stats.add(cs);
+ if (cs != null) {
+ stats.add(cs);
+ }
}
return stats;
}
@@ -776,8 +779,8 @@ public class StatsUtils {
String colType) {
long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
-
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) {
+ String colTypeLowCase = colType.toLowerCase();
+ if (colTypeLowCase.equals(serdeConstants.STRING_TYPE_NAME)) {
// constant string projection Ex: select "hello" from table
if (oi instanceof ConstantObjectInspector) {
@@ -793,7 +796,7 @@ public class StatsUtils {
// return the variable length from config
return configVarLen;
}
- } else if (colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ } else if (colTypeLowCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
// constant varchar projection
if (oi instanceof ConstantObjectInspector) {
@@ -806,7 +809,7 @@ public class StatsUtils {
VarcharTypeInfo type = (VarcharTypeInfo) ((HiveVarcharObjectInspector) oi).getTypeInfo();
return type.getLength();
}
- } else if (colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ } else if (colTypeLowCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
// constant char projection
if (oi instanceof ConstantObjectInspector) {
@@ -819,7 +822,7 @@ public class StatsUtils {
CharTypeInfo type = (CharTypeInfo) ((HiveCharObjectInspector) oi).getTypeInfo();
return type.getLength();
}
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
// constant byte arrays
if (oi instanceof ConstantObjectInspector) {
@@ -858,17 +861,17 @@ public class StatsUtils {
switch (oi.getCategory()) {
case PRIMITIVE:
- String colType = oi.getTypeName();
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
- int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType);
+ String colTypeLowerCase = oi.getTypeName().toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
result += JavaDataModel.get().lengthForStringOfLength(avgColLen);
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
- int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType);
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
+ int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
result += JavaDataModel.get().lengthForByteArrayOfSize(avgColLen);
} else {
- result += getAvgColLenOfFixedLengthTypes(colType);
+ result += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
}
break;
case LIST:
@@ -952,21 +955,22 @@ public class StatsUtils {
* @return raw data size
*/
public static long getAvgColLenOfFixedLengthTypes(String colType) {
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
+ String colTypeLowerCase = colType.toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
return JavaDataModel.get().primitive1();
- } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
- || colType.equalsIgnoreCase("long")) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
+ || colTypeLowerCase.equals("long")) {
return JavaDataModel.get().primitive2();
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
return JavaDataModel.get().lengthOfTimestamp();
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
return JavaDataModel.get().lengthOfDate();
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
return JavaDataModel.get().lengthOfDecimal();
} else {
return 0;
@@ -982,25 +986,26 @@ public class StatsUtils {
* @return raw data size
*/
public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length) {
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
+ String colTypeLowerCase = colType.toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
return JavaDataModel.get().lengthForIntArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
return JavaDataModel.get().lengthForDoubleArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
- || colType.equalsIgnoreCase("long")) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
+ || colTypeLowerCase.equals("long")) {
return JavaDataModel.get().lengthForLongArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
return JavaDataModel.get().lengthForByteArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
return JavaDataModel.get().lengthForBooleanArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
return JavaDataModel.get().lengthForTimestampArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
return JavaDataModel.get().lengthForDateArrayOfSize(length);
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
return JavaDataModel.get().lengthForDecimalArrayOfSize(length);
} else {
return 0;
@@ -1267,8 +1272,9 @@ public class StatsUtils {
throw new IllegalArgumentException("not supported expr type " + end.getClass());
}
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)
+ colType = colType.toLowerCase();
+ if (colType.equals(serdeConstants.STRING_TYPE_NAME)
+ || colType.equals(serdeConstants.BINARY_TYPE_NAME)
|| colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
|| colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
|| colType.startsWith(serdeConstants.LIST_TYPE_NAME)
@@ -1380,30 +1386,30 @@ public class StatsUtils {
for (ColStatistics cs : colStats) {
if (cs != null) {
- String colType = cs.getColumnType();
+ String colTypeLowerCase = cs.getColumnType().toLowerCase();
long nonNullCount = numRows - cs.getNumNulls();
double sizeOf = 0;
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
sizeOf = cs.getAvgColLen();
- } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
int acl = (int) Math.round(cs.getAvgColLen());
sizeOf = JavaDataModel.get().lengthForStringOfLength(acl);
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
int acl = (int) Math.round(cs.getAvgColLen());
sizeOf = JavaDataModel.get().lengthForByteArrayOfSize(acl);
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
sizeOf = JavaDataModel.get().lengthOfTimestamp();
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
sizeOf = JavaDataModel.get().lengthOfDecimal();
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
sizeOf = JavaDataModel.get().lengthOfDate();
} else {
sizeOf = cs.getAvgColLen();
[14/50] [abbrv] hive git commit: HIVE-11860: StatsNoJobTask fails to
collect fast stats when table have subdirectories (Prasanth Jayachandran
reviewed by Sergey Shelukhin)
Posted by xu...@apache.org.
HIVE-11860: StatsNoJobTask fails to collect fast stats when table have subdirectories (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f08a0330
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f08a0330
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f08a0330
Branch: refs/heads/beeline-cli
Commit: f08a0330bc98fbf80b878fd931f74df5e724a5cf
Parents: 3672a27
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Fri Sep 18 17:13:13 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Fri Sep 18 17:13:13 2015 -0500
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/exec/StatsNoJobTask.java | 25 +-
.../queries/clientpositive/union_fast_stats.q | 68 +++
.../clientpositive/tez/union_fast_stats.q.out | 526 +++++++++++++++++++
.../clientpositive/union_fast_stats.q.out | 526 +++++++++++++++++++
5 files changed, 1128 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f08a0330/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 0d3e1cc..b47d1b5 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -177,6 +177,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
union9.q,\
unionDistinct_1.q,\
unionDistinct_2.q,\
+ union_fast_stats.q,\
update_after_multiple_inserts.q,\
update_all_non_partitioned.q,\
update_all_partitioned.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/f08a0330/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
index 4ecb20f..0d99cbc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
@@ -32,7 +32,7 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -46,7 +46,6 @@ import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec;
import org.apache.hadoop.hive.ql.plan.StatsNoJobWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
-import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
@@ -144,9 +143,8 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable
long fileSize = 0;
long numFiles = 0;
FileSystem fs = dir.getFileSystem(conf);
- List<FileStatus> fileList =
- ShimLoader.getHadoopShims().listLocatedStatus(fs, dir,
- hiddenFileFilter);
+ FileStatus[] fileList = HiveStatsUtils.getFileStatusRecurse(dir, -1, fs);
+
boolean statsAvailable = false;
for(FileStatus file: fileList) {
if (!file.isDir()) {
@@ -155,7 +153,6 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable
InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0,
new String[] { partn.getLocation() });
org.apache.hadoop.mapred.RecordReader<?, ?> recordReader =
- (org.apache.hadoop.mapred.RecordReader<?, ?>)
inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL);
StatsProvidingRecordReader statsRR;
if (recordReader instanceof StatsProvidingRecordReader) {
@@ -242,9 +239,8 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable
long fileSize = 0;
long numFiles = 0;
FileSystem fs = dir.getFileSystem(conf);
- List<FileStatus> fileList =
- ShimLoader.getHadoopShims().listLocatedStatus(fs, dir,
- hiddenFileFilter);
+ FileStatus[] fileList = HiveStatsUtils.getFileStatusRecurse(dir, -1, fs);
+
boolean statsAvailable = false;
for(FileStatus file: fileList) {
if (!file.isDir()) {
@@ -252,8 +248,8 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable
table.getInputFormatClass(), jc);
InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[] { table
.getDataLocation().toString() });
- org.apache.hadoop.mapred.RecordReader<?, ?> recordReader = (org.apache.hadoop.mapred.RecordReader<?, ?>) inputFormat
- .getRecordReader(dummySplit, jc, Reporter.NULL);
+ org.apache.hadoop.mapred.RecordReader<?, ?> recordReader =
+ inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL);
StatsProvidingRecordReader statsRR;
if (recordReader instanceof StatsProvidingRecordReader) {
statsRR = (StatsProvidingRecordReader) recordReader;
@@ -354,13 +350,6 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable
}
}
- private static final PathFilter hiddenFileFilter = new PathFilter() {
- public boolean accept(Path p) {
- String name = p.getName();
- return !name.startsWith("_") && !name.startsWith(".");
- }
- };
-
private String toString(Map<String, String> parameters) {
StringBuilder builder = new StringBuilder();
for (String statType : StatsSetupConst.supportedStats) {
http://git-wip-us.apache.org/repos/asf/hive/blob/f08a0330/ql/src/test/queries/clientpositive/union_fast_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_fast_stats.q b/ql/src/test/queries/clientpositive/union_fast_stats.q
new file mode 100644
index 0000000..616af43
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_fast_stats.q
@@ -0,0 +1,68 @@
+set hive.stats.dbclass=fs;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.merge.tezfiles=false;
+
+drop table small_alltypesorc1a;
+drop table small_alltypesorc2a;
+drop table small_alltypesorc3a;
+drop table small_alltypesorc4a;
+drop table small_alltypesorc_a;
+
+create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5;
+create table small_alltypesorc2a as select * from alltypesorc where cint is null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5;
+create table small_alltypesorc3a as select * from alltypesorc where cint is not null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5;
+create table small_alltypesorc4a as select * from alltypesorc where cint is null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5;
+
+create table small_alltypesorc_a stored as orc as select * from
+(select * from (select * from small_alltypesorc1a) sq1
+ union all
+ select * from (select * from small_alltypesorc2a) sq2
+ union all
+ select * from (select * from small_alltypesorc3a) sq3
+ union all
+ select * from (select * from small_alltypesorc4a) sq4) q;
+
+desc formatted small_alltypesorc_a;
+
+ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS;
+
+desc formatted small_alltypesorc_a;
+
+insert into table small_alltypesorc_a select * from small_alltypesorc1a;
+
+desc formatted small_alltypesorc_a;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+set hive.merge.tezfiles=true;
+
+drop table small_alltypesorc1a;
+drop table small_alltypesorc2a;
+drop table small_alltypesorc3a;
+drop table small_alltypesorc4a;
+drop table small_alltypesorc_a;
+
+create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5;
+create table small_alltypesorc2a as select * from alltypesorc where cint is null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5;
+create table small_alltypesorc3a as select * from alltypesorc where cint is not null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5;
+create table small_alltypesorc4a as select * from alltypesorc where cint is null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5;
+
+create table small_alltypesorc_a stored as orc as select * from
+(select * from (select * from small_alltypesorc1a) sq1
+ union all
+ select * from (select * from small_alltypesorc2a) sq2
+ union all
+ select * from (select * from small_alltypesorc3a) sq3
+ union all
+ select * from (select * from small_alltypesorc4a) sq4) q;
+
+desc formatted small_alltypesorc_a;
+
+ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS;
+
+desc formatted small_alltypesorc_a;
+
+insert into table small_alltypesorc_a select * from small_alltypesorc1a;
+
+desc formatted small_alltypesorc_a;
http://git-wip-us.apache.org/repos/asf/hive/blob/f08a0330/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out b/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
new file mode 100644
index 0000000..d0d82a0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
@@ -0,0 +1,526 @@
+PREHOOK: query: drop table small_alltypesorc1a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table small_alltypesorc1a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table small_alltypesorc2a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table small_alltypesorc2a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table small_alltypesorc3a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table small_alltypesorc3a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table small_alltypesorc4a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table small_alltypesorc4a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table small_alltypesorc_a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table small_alltypesorc_a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc1a
+POSTHOOK: query: create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc1a
+PREHOOK: query: create table small_alltypesorc2a as select * from alltypesorc where cint is null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc2a
+POSTHOOK: query: create table small_alltypesorc2a as select * from alltypesorc where cint is null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc2a
+PREHOOK: query: create table small_alltypesorc3a as select * from alltypesorc where cint is not null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc3a
+POSTHOOK: query: create table small_alltypesorc3a as select * from alltypesorc where cint is not null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc3a
+PREHOOK: query: create table small_alltypesorc4a as select * from alltypesorc where cint is null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc4a
+POSTHOOK: query: create table small_alltypesorc4a as select * from alltypesorc where cint is null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc4a
+PREHOOK: query: create table small_alltypesorc_a stored as orc as select * from
+(select * from (select * from small_alltypesorc1a) sq1
+ union all
+ select * from (select * from small_alltypesorc2a) sq2
+ union all
+ select * from (select * from small_alltypesorc3a) sq3
+ union all
+ select * from (select * from small_alltypesorc4a) sq4) q
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@small_alltypesorc1a
+PREHOOK: Input: default@small_alltypesorc2a
+PREHOOK: Input: default@small_alltypesorc3a
+PREHOOK: Input: default@small_alltypesorc4a
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: create table small_alltypesorc_a stored as orc as select * from
+(select * from (select * from small_alltypesorc1a) sq1
+ union all
+ select * from (select * from small_alltypesorc2a) sq2
+ union all
+ select * from (select * from small_alltypesorc3a) sq3
+ union all
+ select * from (select * from small_alltypesorc4a) sq4) q
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@small_alltypesorc1a
+POSTHOOK: Input: default@small_alltypesorc2a
+POSTHOOK: Input: default@small_alltypesorc3a
+POSTHOOK: Input: default@small_alltypesorc4a
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc_a
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 3915
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc_a
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: Output: default@small_alltypesorc_a
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 4
+ numRows 15
+ rawDataSize 3483
+ totalSize 3915
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: insert into table small_alltypesorc_a select * from small_alltypesorc1a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc1a
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: insert into table small_alltypesorc_a select * from small_alltypesorc1a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc1a
+POSTHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: Lineage: small_alltypesorc_a.cbigint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cbigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cboolean1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cboolean1, type:boolean, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cboolean2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cboolean2, type:boolean, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cdouble SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cfloat SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.csmallint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cstring1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cstring2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cstring2, type:string, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctimestamp1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctimestamp2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctinyint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 5
+ numRows 20
+ rawDataSize 4552
+ totalSize 5225
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: drop table small_alltypesorc1a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@small_alltypesorc1a
+PREHOOK: Output: default@small_alltypesorc1a
+POSTHOOK: query: drop table small_alltypesorc1a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@small_alltypesorc1a
+POSTHOOK: Output: default@small_alltypesorc1a
+PREHOOK: query: drop table small_alltypesorc2a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@small_alltypesorc2a
+PREHOOK: Output: default@small_alltypesorc2a
+POSTHOOK: query: drop table small_alltypesorc2a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@small_alltypesorc2a
+POSTHOOK: Output: default@small_alltypesorc2a
+PREHOOK: query: drop table small_alltypesorc3a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@small_alltypesorc3a
+PREHOOK: Output: default@small_alltypesorc3a
+POSTHOOK: query: drop table small_alltypesorc3a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@small_alltypesorc3a
+POSTHOOK: Output: default@small_alltypesorc3a
+PREHOOK: query: drop table small_alltypesorc4a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@small_alltypesorc4a
+PREHOOK: Output: default@small_alltypesorc4a
+POSTHOOK: query: drop table small_alltypesorc4a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@small_alltypesorc4a
+POSTHOOK: Output: default@small_alltypesorc4a
+PREHOOK: query: drop table small_alltypesorc_a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: drop table small_alltypesorc_a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: Output: default@small_alltypesorc_a
+PREHOOK: query: create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc1a
+POSTHOOK: query: create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc1a
+PREHOOK: query: create table small_alltypesorc2a as select * from alltypesorc where cint is null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc2a
+POSTHOOK: query: create table small_alltypesorc2a as select * from alltypesorc where cint is null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc2a
+PREHOOK: query: create table small_alltypesorc3a as select * from alltypesorc where cint is not null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc3a
+POSTHOOK: query: create table small_alltypesorc3a as select * from alltypesorc where cint is not null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc3a
+PREHOOK: query: create table small_alltypesorc4a as select * from alltypesorc where cint is null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc4a
+POSTHOOK: query: create table small_alltypesorc4a as select * from alltypesorc where cint is null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc4a
+PREHOOK: query: create table small_alltypesorc_a stored as orc as select * from
+(select * from (select * from small_alltypesorc1a) sq1
+ union all
+ select * from (select * from small_alltypesorc2a) sq2
+ union all
+ select * from (select * from small_alltypesorc3a) sq3
+ union all
+ select * from (select * from small_alltypesorc4a) sq4) q
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@small_alltypesorc1a
+PREHOOK: Input: default@small_alltypesorc2a
+PREHOOK: Input: default@small_alltypesorc3a
+PREHOOK: Input: default@small_alltypesorc4a
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: create table small_alltypesorc_a stored as orc as select * from
+(select * from (select * from small_alltypesorc1a) sq1
+ union all
+ select * from (select * from small_alltypesorc2a) sq2
+ union all
+ select * from (select * from small_alltypesorc3a) sq3
+ union all
+ select * from (select * from small_alltypesorc4a) sq4) q
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@small_alltypesorc1a
+POSTHOOK: Input: default@small_alltypesorc2a
+POSTHOOK: Input: default@small_alltypesorc3a
+POSTHOOK: Input: default@small_alltypesorc4a
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc_a
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 5
+ rawDataSize 1069
+ totalSize 3177
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc_a
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: Output: default@small_alltypesorc_a
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 15
+ rawDataSize 3320
+ totalSize 3177
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: insert into table small_alltypesorc_a select * from small_alltypesorc1a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc1a
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: insert into table small_alltypesorc_a select * from small_alltypesorc1a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc1a
+POSTHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: Lineage: small_alltypesorc_a.cbigint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cbigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cboolean1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cboolean1, type:boolean, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cboolean2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cboolean2, type:boolean, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cdouble SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cfloat SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.csmallint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cstring1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cstring2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cstring2, type:string, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctimestamp1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctimestamp2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctinyint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 2
+ numRows 20
+ rawDataSize 4389
+ totalSize 4487
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
http://git-wip-us.apache.org/repos/asf/hive/blob/f08a0330/ql/src/test/results/clientpositive/union_fast_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_fast_stats.q.out b/ql/src/test/results/clientpositive/union_fast_stats.q.out
new file mode 100644
index 0000000..71a0486
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union_fast_stats.q.out
@@ -0,0 +1,526 @@
+PREHOOK: query: drop table small_alltypesorc1a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table small_alltypesorc1a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table small_alltypesorc2a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table small_alltypesorc2a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table small_alltypesorc3a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table small_alltypesorc3a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table small_alltypesorc4a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table small_alltypesorc4a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table small_alltypesorc_a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table small_alltypesorc_a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc1a
+POSTHOOK: query: create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc1a
+PREHOOK: query: create table small_alltypesorc2a as select * from alltypesorc where cint is null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc2a
+POSTHOOK: query: create table small_alltypesorc2a as select * from alltypesorc where cint is null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc2a
+PREHOOK: query: create table small_alltypesorc3a as select * from alltypesorc where cint is not null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc3a
+POSTHOOK: query: create table small_alltypesorc3a as select * from alltypesorc where cint is not null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc3a
+PREHOOK: query: create table small_alltypesorc4a as select * from alltypesorc where cint is null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc4a
+POSTHOOK: query: create table small_alltypesorc4a as select * from alltypesorc where cint is null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc4a
+PREHOOK: query: create table small_alltypesorc_a stored as orc as select * from
+(select * from (select * from small_alltypesorc1a) sq1
+ union all
+ select * from (select * from small_alltypesorc2a) sq2
+ union all
+ select * from (select * from small_alltypesorc3a) sq3
+ union all
+ select * from (select * from small_alltypesorc4a) sq4) q
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@small_alltypesorc1a
+PREHOOK: Input: default@small_alltypesorc2a
+PREHOOK: Input: default@small_alltypesorc3a
+PREHOOK: Input: default@small_alltypesorc4a
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: create table small_alltypesorc_a stored as orc as select * from
+(select * from (select * from small_alltypesorc1a) sq1
+ union all
+ select * from (select * from small_alltypesorc2a) sq2
+ union all
+ select * from (select * from small_alltypesorc3a) sq3
+ union all
+ select * from (select * from small_alltypesorc4a) sq4) q
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@small_alltypesorc1a
+POSTHOOK: Input: default@small_alltypesorc2a
+POSTHOOK: Input: default@small_alltypesorc3a
+POSTHOOK: Input: default@small_alltypesorc4a
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc_a
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 4
+ numRows 15
+ rawDataSize 3483
+ totalSize 3915
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc_a
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: Output: default@small_alltypesorc_a
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 4
+ numRows 15
+ rawDataSize 3483
+ totalSize 3915
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: insert into table small_alltypesorc_a select * from small_alltypesorc1a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc1a
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: insert into table small_alltypesorc_a select * from small_alltypesorc1a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc1a
+POSTHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: Lineage: small_alltypesorc_a.cbigint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cbigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cboolean1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cboolean1, type:boolean, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cboolean2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cboolean2, type:boolean, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cdouble SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cfloat SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.csmallint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cstring1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cstring2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cstring2, type:string, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctimestamp1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctimestamp2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctinyint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 5
+ numRows 20
+ rawDataSize 4552
+ totalSize 5225
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: drop table small_alltypesorc1a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@small_alltypesorc1a
+PREHOOK: Output: default@small_alltypesorc1a
+POSTHOOK: query: drop table small_alltypesorc1a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@small_alltypesorc1a
+POSTHOOK: Output: default@small_alltypesorc1a
+PREHOOK: query: drop table small_alltypesorc2a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@small_alltypesorc2a
+PREHOOK: Output: default@small_alltypesorc2a
+POSTHOOK: query: drop table small_alltypesorc2a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@small_alltypesorc2a
+POSTHOOK: Output: default@small_alltypesorc2a
+PREHOOK: query: drop table small_alltypesorc3a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@small_alltypesorc3a
+PREHOOK: Output: default@small_alltypesorc3a
+POSTHOOK: query: drop table small_alltypesorc3a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@small_alltypesorc3a
+POSTHOOK: Output: default@small_alltypesorc3a
+PREHOOK: query: drop table small_alltypesorc4a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@small_alltypesorc4a
+PREHOOK: Output: default@small_alltypesorc4a
+POSTHOOK: query: drop table small_alltypesorc4a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@small_alltypesorc4a
+POSTHOOK: Output: default@small_alltypesorc4a
+PREHOOK: query: drop table small_alltypesorc_a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: drop table small_alltypesorc_a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: Output: default@small_alltypesorc_a
+PREHOOK: query: create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc1a
+POSTHOOK: query: create table small_alltypesorc1a as select * from alltypesorc where cint is not null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc1a
+PREHOOK: query: create table small_alltypesorc2a as select * from alltypesorc where cint is null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc2a
+POSTHOOK: query: create table small_alltypesorc2a as select * from alltypesorc where cint is null and ctinyint is not null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc2a
+PREHOOK: query: create table small_alltypesorc3a as select * from alltypesorc where cint is not null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc3a
+POSTHOOK: query: create table small_alltypesorc3a as select * from alltypesorc where cint is not null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc3a
+PREHOOK: query: create table small_alltypesorc4a as select * from alltypesorc where cint is null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc4a
+POSTHOOK: query: create table small_alltypesorc4a as select * from alltypesorc where cint is null and ctinyint is null order by ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 limit 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc4a
+PREHOOK: query: create table small_alltypesorc_a stored as orc as select * from
+(select * from (select * from small_alltypesorc1a) sq1
+ union all
+ select * from (select * from small_alltypesorc2a) sq2
+ union all
+ select * from (select * from small_alltypesorc3a) sq3
+ union all
+ select * from (select * from small_alltypesorc4a) sq4) q
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@small_alltypesorc1a
+PREHOOK: Input: default@small_alltypesorc2a
+PREHOOK: Input: default@small_alltypesorc3a
+PREHOOK: Input: default@small_alltypesorc4a
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: create table small_alltypesorc_a stored as orc as select * from
+(select * from (select * from small_alltypesorc1a) sq1
+ union all
+ select * from (select * from small_alltypesorc2a) sq2
+ union all
+ select * from (select * from small_alltypesorc3a) sq3
+ union all
+ select * from (select * from small_alltypesorc4a) sq4) q
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@small_alltypesorc1a
+POSTHOOK: Input: default@small_alltypesorc2a
+POSTHOOK: Input: default@small_alltypesorc3a
+POSTHOOK: Input: default@small_alltypesorc4a
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_alltypesorc_a
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 15
+ rawDataSize 3483
+ totalSize 3176
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc_a
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: Output: default@small_alltypesorc_a
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 15
+ rawDataSize 3320
+ totalSize 3176
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: insert into table small_alltypesorc_a select * from small_alltypesorc1a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc1a
+PREHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: query: insert into table small_alltypesorc_a select * from small_alltypesorc1a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc1a
+POSTHOOK: Output: default@small_alltypesorc_a
+POSTHOOK: Lineage: small_alltypesorc_a.cbigint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cbigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cboolean1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cboolean1, type:boolean, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cboolean2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cboolean2, type:boolean, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cdouble SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cfloat SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.csmallint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cstring1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.cstring2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:cstring2, type:string, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctimestamp1 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctimestamp2 SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: small_alltypesorc_a.ctinyint SIMPLE [(small_alltypesorc1a)small_alltypesorc1a.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+PREHOOK: query: desc formatted small_alltypesorc_a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@small_alltypesorc_a
+POSTHOOK: query: desc formatted small_alltypesorc_a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@small_alltypesorc_a
+# col_name data_type comment
+
+ctinyint tinyint
+csmallint smallint
+cint int
+cbigint bigint
+cfloat float
+cdouble double
+cstring1 string
+cstring2 string
+ctimestamp1 timestamp
+ctimestamp2 timestamp
+cboolean1 boolean
+cboolean2 boolean
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 2
+ numRows 20
+ rawDataSize 4389
+ totalSize 4486
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
[25/50] [abbrv] hive git commit: HIVE-11711: Merge hbase-metastore
branch to trunk
Posted by xu...@apache.org.
HIVE-11711: Merge hbase-metastore branch to trunk
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/52383033
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/52383033
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/52383033
Branch: refs/heads/beeline-cli
Commit: 52383033822b87b02853eaaf15db1b2904617615
Parents: 514ab79 4c17ecf
Author: Daniel Dai <da...@hortonworks.com>
Authored: Mon Sep 21 22:02:22 2015 -0700
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Mon Sep 21 22:02:22 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ant/QTestGenTask.java | 11 +
bin/ext/hbaseimport.cmd | 35 +
bin/ext/hbaseimport.sh | 27 +
bin/ext/hbaseschematool.sh | 27 +
.../apache/hadoop/hive/common/ObjectPair.java | 5 +
.../org/apache/hadoop/hive/conf/HiveConf.java | 98 +-
.../apache/hive/common/util/BloomFilter.java | 20 +-
data/conf/tez/hive-site.xml | 10 +
itests/hive-unit/pom.xml | 35 +
.../hadoop/hive/metastore/TestAdminUser.java | 4 +-
.../hive/metastore/TestHiveMetaStore.java | 3 +
.../metastore/hbase/HBaseIntegrationTests.java | 117 +
.../TestHBaseAggrStatsCacheIntegration.java | 691 +
.../hive/metastore/hbase/TestHBaseImport.java | 650 +
.../metastore/hbase/TestHBaseMetastoreSql.java | 223 +
.../hbase/TestHBaseStoreIntegration.java | 1794 +
.../hbase/TestStorageDescriptorSharing.java | 191 +
itests/qtest/pom.xml | 10 +-
itests/util/pom.xml | 32 +
.../metastore/hbase/HBaseStoreTestUtil.java | 45 +
.../org/apache/hadoop/hive/ql/QTestUtil.java | 41 +-
metastore/if/hive_metastore.thrift | 54 +
metastore/pom.xml | 82 +
.../metastore/hbase/HbaseMetastoreProto.java | 34901 +++++++++++++++++
.../gen/thrift/gen-cpp/ThriftHiveMetastore.cpp | 6919 ++--
.../gen/thrift/gen-cpp/ThriftHiveMetastore.h | 664 +
.../ThriftHiveMetastore_server.skeleton.cpp | 25 +
.../gen/thrift/gen-cpp/hive_metastore_types.cpp | 1294 +-
.../gen/thrift/gen-cpp/hive_metastore_types.h | 371 +
.../hive/metastore/api/AbortTxnRequest.java | 2 +-
.../metastore/api/AddDynamicPartitions.java | 2 +-
.../metastore/api/AddPartitionsRequest.java | 2 +-
.../hive/metastore/api/AddPartitionsResult.java | 2 +-
.../hadoop/hive/metastore/api/AggrStats.java | 2 +-
.../metastore/api/AlreadyExistsException.java | 2 +-
.../metastore/api/BinaryColumnStatsData.java | 2 +-
.../metastore/api/BooleanColumnStatsData.java | 2 +-
.../hive/metastore/api/CheckLockRequest.java | 2 +-
.../metastore/api/ClearFileMetadataRequest.java | 438 +
.../metastore/api/ClearFileMetadataResult.java | 283 +
.../hive/metastore/api/ColumnStatistics.java | 2 +-
.../metastore/api/ColumnStatisticsDesc.java | 2 +-
.../hive/metastore/api/ColumnStatisticsObj.java | 2 +-
.../hive/metastore/api/CommitTxnRequest.java | 2 +-
.../hive/metastore/api/CompactionRequest.java | 2 +-
.../api/ConfigValSecurityException.java | 2 +-
.../api/CurrentNotificationEventId.java | 2 +-
.../hadoop/hive/metastore/api/Database.java | 2 +-
.../apache/hadoop/hive/metastore/api/Date.java | 2 +-
.../hive/metastore/api/DateColumnStatsData.java | 2 +-
.../hadoop/hive/metastore/api/Decimal.java | 2 +-
.../metastore/api/DecimalColumnStatsData.java | 2 +-
.../metastore/api/DoubleColumnStatsData.java | 2 +-
.../hive/metastore/api/DropPartitionsExpr.java | 2 +-
.../metastore/api/DropPartitionsRequest.java | 2 +-
.../metastore/api/DropPartitionsResult.java | 2 +-
.../hive/metastore/api/EnvironmentContext.java | 2 +-
.../hadoop/hive/metastore/api/FieldSchema.java | 2 +-
.../hive/metastore/api/FireEventRequest.java | 2 +-
.../hive/metastore/api/FireEventResponse.java | 2 +-
.../hadoop/hive/metastore/api/Function.java | 2 +-
.../metastore/api/GetAllFunctionsResponse.java | 38 +-
.../api/GetFileMetadataByExprRequest.java | 548 +
.../api/GetFileMetadataByExprResult.java | 703 +
.../metastore/api/GetFileMetadataRequest.java | 438 +
.../metastore/api/GetFileMetadataResult.java | 540 +
.../metastore/api/GetOpenTxnsInfoResponse.java | 2 +-
.../hive/metastore/api/GetOpenTxnsResponse.java | 2 +-
.../api/GetPrincipalsInRoleRequest.java | 2 +-
.../api/GetPrincipalsInRoleResponse.java | 2 +-
.../api/GetRoleGrantsForPrincipalRequest.java | 2 +-
.../api/GetRoleGrantsForPrincipalResponse.java | 2 +-
.../api/GrantRevokePrivilegeRequest.java | 2 +-
.../api/GrantRevokePrivilegeResponse.java | 2 +-
.../metastore/api/GrantRevokeRoleRequest.java | 2 +-
.../metastore/api/GrantRevokeRoleResponse.java | 2 +-
.../hive/metastore/api/HeartbeatRequest.java | 2 +-
.../metastore/api/HeartbeatTxnRangeRequest.java | 2 +-
.../api/HeartbeatTxnRangeResponse.java | 2 +-
.../hive/metastore/api/HiveObjectPrivilege.java | 2 +-
.../hive/metastore/api/HiveObjectRef.java | 2 +-
.../apache/hadoop/hive/metastore/api/Index.java | 2 +-
.../api/IndexAlreadyExistsException.java | 2 +-
.../metastore/api/InsertEventRequestData.java | 2 +-
.../metastore/api/InvalidInputException.java | 2 +-
.../metastore/api/InvalidObjectException.java | 2 +-
.../api/InvalidOperationException.java | 2 +-
.../api/InvalidPartitionException.java | 2 +-
.../hive/metastore/api/LockComponent.java | 2 +-
.../hadoop/hive/metastore/api/LockRequest.java | 2 +-
.../hadoop/hive/metastore/api/LockResponse.java | 2 +-
.../hive/metastore/api/LongColumnStatsData.java | 2 +-
.../hive/metastore/api/MetaException.java | 2 +-
.../hive/metastore/api/MetadataPpdResult.java | 508 +
.../hive/metastore/api/NoSuchLockException.java | 2 +-
.../metastore/api/NoSuchObjectException.java | 2 +-
.../hive/metastore/api/NoSuchTxnException.java | 2 +-
.../hive/metastore/api/NotificationEvent.java | 2 +-
.../metastore/api/NotificationEventRequest.java | 2 +-
.../api/NotificationEventResponse.java | 2 +-
.../hive/metastore/api/OpenTxnRequest.java | 2 +-
.../hive/metastore/api/OpenTxnsResponse.java | 2 +-
.../apache/hadoop/hive/metastore/api/Order.java | 2 +-
.../hadoop/hive/metastore/api/Partition.java | 2 +-
.../api/PartitionListComposingSpec.java | 2 +-
.../hive/metastore/api/PartitionSpec.java | 2 +-
.../api/PartitionSpecWithSharedSD.java | 2 +-
.../hive/metastore/api/PartitionWithoutSD.java | 2 +-
.../metastore/api/PartitionsByExprRequest.java | 2 +-
.../metastore/api/PartitionsByExprResult.java | 2 +-
.../metastore/api/PartitionsStatsRequest.java | 2 +-
.../metastore/api/PartitionsStatsResult.java | 2 +-
.../metastore/api/PrincipalPrivilegeSet.java | 2 +-
.../hadoop/hive/metastore/api/PrivilegeBag.java | 2 +-
.../hive/metastore/api/PrivilegeGrantInfo.java | 2 +-
.../metastore/api/PutFileMetadataRequest.java | 588 +
.../metastore/api/PutFileMetadataResult.java | 283 +
.../hadoop/hive/metastore/api/ResourceUri.java | 2 +-
.../apache/hadoop/hive/metastore/api/Role.java | 2 +-
.../hive/metastore/api/RolePrincipalGrant.java | 2 +-
.../hadoop/hive/metastore/api/Schema.java | 2 +-
.../hadoop/hive/metastore/api/SerDeInfo.java | 2 +-
.../api/SetPartitionsStatsRequest.java | 2 +-
.../hive/metastore/api/ShowCompactRequest.java | 2 +-
.../hive/metastore/api/ShowCompactResponse.java | 2 +-
.../api/ShowCompactResponseElement.java | 2 +-
.../hive/metastore/api/ShowLocksRequest.java | 2 +-
.../hive/metastore/api/ShowLocksResponse.java | 2 +-
.../metastore/api/ShowLocksResponseElement.java | 2 +-
.../hadoop/hive/metastore/api/SkewedInfo.java | 2 +-
.../hive/metastore/api/StorageDescriptor.java | 2 +-
.../metastore/api/StringColumnStatsData.java | 2 +-
.../apache/hadoop/hive/metastore/api/Table.java | 2 +-
.../hive/metastore/api/TableStatsRequest.java | 2 +-
.../hive/metastore/api/TableStatsResult.java | 2 +-
.../hive/metastore/api/ThriftHiveMetastore.java | 8422 ++--
.../hive/metastore/api/TxnAbortedException.java | 2 +-
.../hadoop/hive/metastore/api/TxnInfo.java | 2 +-
.../hive/metastore/api/TxnOpenException.java | 2 +-
.../apache/hadoop/hive/metastore/api/Type.java | 2 +-
.../hive/metastore/api/UnknownDBException.java | 2 +-
.../api/UnknownPartitionException.java | 2 +-
.../metastore/api/UnknownTableException.java | 2 +-
.../hive/metastore/api/UnlockRequest.java | 2 +-
.../hadoop/hive/metastore/api/Version.java | 2 +-
.../gen-php/metastore/ThriftHiveMetastore.php | 2810 +-
.../src/gen/thrift/gen-php/metastore/Types.php | 1009 +-
.../hive_metastore/ThriftHiveMetastore-remote | 49 +-
.../hive_metastore/ThriftHiveMetastore.py | 1563 +-
.../gen/thrift/gen-py/hive_metastore/ttypes.py | 734 +-
.../gen/thrift/gen-rb/hive_metastore_types.rb | 167 +
.../gen/thrift/gen-rb/thrift_hive_metastore.rb | 267 +
.../hadoop/hive/metastore/HiveAlterHandler.java | 38 +-
.../hadoop/hive/metastore/HiveMetaStore.java | 266 +-
.../hive/metastore/HiveMetaStoreClient.java | 112 +-
.../hadoop/hive/metastore/IMetaStoreClient.java | 6 +
.../hadoop/hive/metastore/ObjectStore.java | 418 +-
.../hive/metastore/PartFilterExprUtil.java | 149 +
.../apache/hadoop/hive/metastore/RawStore.java | 66 +-
.../hadoop/hive/metastore/RawStoreProxy.java | 5 +-
.../hbase/AggrStatsInvalidatorFilter.java | 121 +
.../hadoop/hive/metastore/hbase/Counter.java | 53 +
.../hive/metastore/hbase/HBaseConnection.java | 96 +
.../metastore/hbase/HBaseFilterPlanUtil.java | 612 +
.../hive/metastore/hbase/HBaseImport.java | 535 +
.../hive/metastore/hbase/HBaseReadWrite.java | 2106 +
.../hive/metastore/hbase/HBaseSchemaTool.java | 239 +
.../hadoop/hive/metastore/hbase/HBaseStore.java | 2387 ++
.../hadoop/hive/metastore/hbase/HBaseUtils.java | 1340 +
.../hive/metastore/hbase/ObjectCache.java | 81 +
.../hive/metastore/hbase/PartitionCache.java | 168 +
.../metastore/hbase/PartitionKeyComparator.java | 292 +
.../hbase/SharedStorageDescriptor.java | 251 +
.../hadoop/hive/metastore/hbase/StatsCache.java | 326 +
.../metastore/hbase/TephraHBaseConnection.java | 127 +
.../metastore/hbase/VanillaHBaseConnection.java | 137 +
.../stats/BinaryColumnStatsAggregator.java | 35 +
.../stats/BooleanColumnStatsAggregator.java | 35 +
.../hbase/stats/ColumnStatsAggregator.java | 26 +
.../stats/ColumnStatsAggregatorFactory.java | 94 +
.../stats/DecimalColumnStatsAggregator.java | 43 +
.../stats/DoubleColumnStatsAggregator.java | 36 +
.../hbase/stats/LongColumnStatsAggregator.java | 36 +
.../stats/StringColumnStatsAggregator.java | 36 +
.../hive/metastore/parser/ExpressionTree.java | 9 +-
.../metastore/hbase/hbase_metastore_proto.proto | 282 +
.../DummyRawStoreControlledCommit.java | 56 +-
.../DummyRawStoreForJdoConnection.java | 50 +-
.../hadoop/hive/metastore/TestObjectStore.java | 43 +-
.../hadoop/hive/metastore/hbase/MockUtils.java | 199 +
.../hbase/TestHBaseAggregateStatsCache.java | 316 +
.../hbase/TestHBaseFilterPlanUtil.java | 483 +
.../hive/metastore/hbase/TestHBaseStore.java | 1307 +
.../metastore/hbase/TestHBaseStoreCached.java | 378 +
.../hbase/TestSharedStorageDescriptor.java | 153 +
pom.xml | 3 +-
.../hadoop/hive/ql/plan/api/Adjacency.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Graph.java | 2 +-
.../hadoop/hive/ql/plan/api/Operator.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Query.java | 2 +-
.../hadoop/hive/ql/plan/api/QueryPlan.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Stage.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Task.java | 2 +-
.../java/org/apache/hadoop/hive/ql/Driver.java | 6 +
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 64 +-
.../apache/hadoop/hive/ql/metadata/Hive.java | 9 +-
.../hadoop/hive/ql/metadata/Partition.java | 29 +-
.../AuthorizationPreEventListener.java | 2 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 3 +
.../hive/metastore/TestMetastoreExpr.java | 2 +-
.../dynpart_sort_opt_vectorization.q | 2 +
.../clientpositive/dynpart_sort_optimization.q | 2 +
.../dynpart_sort_opt_vectorization.q.out | 12 +-
.../dynpart_sort_optimization.q.out | 12 +-
.../tez/dynpart_sort_opt_vectorization.q.out | 12 +-
.../tez/dynpart_sort_optimization.q.out | 12 +-
ql/src/test/templates/TestCliDriver.vm | 3 +-
.../hadoop/hive/serde/test/InnerStruct.java | 2 +-
.../hadoop/hive/serde/test/ThriftTestObj.java | 2 +-
.../hadoop/hive/serde2/thrift/test/Complex.java | 2 +-
.../hive/serde2/thrift/test/IntString.java | 2 +-
.../hive/serde2/thrift/test/MegaStruct.java | 2 +-
.../hive/serde2/thrift/test/MiniStruct.java | 2 +-
.../hive/serde2/thrift/test/SetIntString.java | 2 +-
.../BinarySortableSerDeWithEndPrefix.java | 41 +
.../hadoop/hive/service/HiveClusterStatus.java | 2 +-
.../hive/service/HiveServerException.java | 2 +-
.../apache/hadoop/hive/service/ThriftHive.java | 2 +-
.../service/cli/thrift/TArrayTypeEntry.java | 2 +-
.../hive/service/cli/thrift/TBinaryColumn.java | 2 +-
.../hive/service/cli/thrift/TBoolColumn.java | 2 +-
.../hive/service/cli/thrift/TBoolValue.java | 2 +-
.../hive/service/cli/thrift/TByteColumn.java | 2 +-
.../hive/service/cli/thrift/TByteValue.java | 2 +-
.../hive/service/cli/thrift/TCLIService.java | 2 +-
.../cli/thrift/TCancelDelegationTokenReq.java | 2 +-
.../cli/thrift/TCancelDelegationTokenResp.java | 2 +-
.../service/cli/thrift/TCancelOperationReq.java | 2 +-
.../cli/thrift/TCancelOperationResp.java | 2 +-
.../service/cli/thrift/TCloseOperationReq.java | 2 +-
.../service/cli/thrift/TCloseOperationResp.java | 2 +-
.../service/cli/thrift/TCloseSessionReq.java | 2 +-
.../service/cli/thrift/TCloseSessionResp.java | 2 +-
.../hive/service/cli/thrift/TColumnDesc.java | 2 +-
.../hive/service/cli/thrift/TDoubleColumn.java | 2 +-
.../hive/service/cli/thrift/TDoubleValue.java | 2 +-
.../cli/thrift/TExecuteStatementReq.java | 2 +-
.../cli/thrift/TExecuteStatementResp.java | 2 +-
.../service/cli/thrift/TFetchResultsReq.java | 2 +-
.../service/cli/thrift/TFetchResultsResp.java | 2 +-
.../service/cli/thrift/TGetCatalogsReq.java | 2 +-
.../service/cli/thrift/TGetCatalogsResp.java | 2 +-
.../hive/service/cli/thrift/TGetColumnsReq.java | 2 +-
.../service/cli/thrift/TGetColumnsResp.java | 2 +-
.../cli/thrift/TGetDelegationTokenReq.java | 2 +-
.../cli/thrift/TGetDelegationTokenResp.java | 2 +-
.../service/cli/thrift/TGetFunctionsReq.java | 2 +-
.../service/cli/thrift/TGetFunctionsResp.java | 2 +-
.../hive/service/cli/thrift/TGetInfoReq.java | 2 +-
.../hive/service/cli/thrift/TGetInfoResp.java | 2 +-
.../cli/thrift/TGetOperationStatusReq.java | 2 +-
.../cli/thrift/TGetOperationStatusResp.java | 2 +-
.../cli/thrift/TGetResultSetMetadataReq.java | 2 +-
.../cli/thrift/TGetResultSetMetadataResp.java | 2 +-
.../hive/service/cli/thrift/TGetSchemasReq.java | 2 +-
.../service/cli/thrift/TGetSchemasResp.java | 2 +-
.../service/cli/thrift/TGetTableTypesReq.java | 2 +-
.../service/cli/thrift/TGetTableTypesResp.java | 2 +-
.../hive/service/cli/thrift/TGetTablesReq.java | 2 +-
.../hive/service/cli/thrift/TGetTablesResp.java | 2 +-
.../service/cli/thrift/TGetTypeInfoReq.java | 2 +-
.../service/cli/thrift/TGetTypeInfoResp.java | 2 +-
.../service/cli/thrift/THandleIdentifier.java | 2 +-
.../hive/service/cli/thrift/TI16Column.java | 2 +-
.../hive/service/cli/thrift/TI16Value.java | 2 +-
.../hive/service/cli/thrift/TI32Column.java | 2 +-
.../hive/service/cli/thrift/TI32Value.java | 2 +-
.../hive/service/cli/thrift/TI64Column.java | 2 +-
.../hive/service/cli/thrift/TI64Value.java | 2 +-
.../hive/service/cli/thrift/TMapTypeEntry.java | 2 +-
.../service/cli/thrift/TOpenSessionReq.java | 2 +-
.../service/cli/thrift/TOpenSessionResp.java | 2 +-
.../service/cli/thrift/TOperationHandle.java | 2 +-
.../service/cli/thrift/TPrimitiveTypeEntry.java | 2 +-
.../cli/thrift/TRenewDelegationTokenReq.java | 2 +-
.../cli/thrift/TRenewDelegationTokenResp.java | 2 +-
.../apache/hive/service/cli/thrift/TRow.java | 2 +-
.../apache/hive/service/cli/thrift/TRowSet.java | 2 +-
.../hive/service/cli/thrift/TSessionHandle.java | 2 +-
.../apache/hive/service/cli/thrift/TStatus.java | 2 +-
.../hive/service/cli/thrift/TStringColumn.java | 2 +-
.../hive/service/cli/thrift/TStringValue.java | 2 +-
.../service/cli/thrift/TStructTypeEntry.java | 2 +-
.../hive/service/cli/thrift/TTableSchema.java | 2 +-
.../hive/service/cli/thrift/TTypeDesc.java | 2 +-
.../service/cli/thrift/TTypeQualifiers.java | 2 +-
.../service/cli/thrift/TUnionTypeEntry.java | 2 +-
.../cli/thrift/TUserDefinedTypeEntry.java | 2 +-
.../gen-py/hive_service/ThriftHive-remote | 49 +-
299 files changed, 74878 insertions(+), 7084 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/data/conf/tez/hive-site.xml
----------------------------------------------------------------------
diff --cc data/conf/tez/hive-site.xml
index b4abe90,bcda3ea..c4c2a12
--- a/data/conf/tez/hive-site.xml
+++ b/data/conf/tez/hive-site.xml
@@@ -254,13 -254,13 +254,23 @@@
</property>
<property>
+ <name>hive.tez.java.opts</name>
+ <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
+</property>
+
+<property>
+ <name>tez.am.launch.cmd-opts</name>
+ <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
+</property>
+
++<property>
+ <name>hive.metastore.fastpath</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>hive.metastore.rawstore.impl</name>
+ <value>org.apache.hadoop.hive.metastore.hbase.HBaseStore</value>
+ </property>
+
</configuration>
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/pom.xml
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/52383033/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out
----------------------------------------------------------------------
[29/50] [abbrv] hive git commit: HIVE-11897 : JDO rollback can throw
pointless exceptions (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Posted by xu...@apache.org.
HIVE-11897 : JDO rollback can throw pointless exceptions (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/44741dab
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/44741dab
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/44741dab
Branch: refs/heads/beeline-cli
Commit: 44741dabf0a4e7a9bf21fb2ee1a0b00d9d8eeddb
Parents: 451381c
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Sep 22 15:39:47 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Sep 22 15:39:47 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/metastore/ObjectStore.java | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/44741dab/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 5d2dc29..d9ed883 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -43,6 +43,7 @@ import java.util.concurrent.locks.ReentrantLock;
import java.util.regex.Pattern;
import javax.jdo.JDODataStoreException;
+import javax.jdo.JDOException;
import javax.jdo.JDOHelper;
import javax.jdo.JDOObjectNotFoundException;
import javax.jdo.PersistenceManager;
@@ -2431,7 +2432,20 @@ public class ObjectStore implements RawStore, Configurable {
throw new MetaException(ex.getMessage());
}
if (!isInTxn) {
- rollbackTransaction();
+ JDOException rollbackEx = null;
+ try {
+ rollbackTransaction();
+ } catch (JDOException jex) {
+ rollbackEx = jex;
+ }
+ if (rollbackEx != null) {
+ // Datanucleus propagates some pointless exceptions and rolls back in the finally.
+ if (currentTransaction != null && currentTransaction.isActive()) {
+ throw rollbackEx; // Throw if the tx wasn't rolled back.
+ }
+ LOG.info("Ignoring exception, rollback succeeded: " + rollbackEx.getMessage());
+ }
+
start = doTrace ? System.nanoTime() : 0;
openTransaction();
if (table != null) {
[18/50] [abbrv] hive git commit: HIVE-11891 - Add basic performance
logging to metastore calls (Brock via Szehon)
Posted by xu...@apache.org.
HIVE-11891 - Add basic performance logging to metastore calls (Brock via Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/21861592
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/21861592
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/21861592
Branch: refs/heads/beeline-cli
Commit: 2186159209db49f2aeab06be7c38203fbbb5550c
Parents: 93a6627
Author: Brock Noland <br...@apache.org>
Authored: Sun Sep 20 15:49:01 2015 -0700
Committer: Brock Noland <br...@apache.org>
Committed: Sun Sep 20 15:49:01 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/log/PerfLogger.java | 196 +++++++++++++++++++
.../hive/metastore/RetryingHMSHandler.java | 33 +++-
.../java/org/apache/hadoop/hive/ql/Driver.java | 11 +-
.../hadoop/hive/ql/exec/MapJoinOperator.java | 3 +-
.../ql/exec/SparkHashTableSinkOperator.java | 3 +-
.../apache/hadoop/hive/ql/exec/Utilities.java | 11 +-
.../hadoop/hive/ql/exec/spark/SparkPlan.java | 3 +-
.../hive/ql/exec/spark/SparkPlanGenerator.java | 3 +-
.../hive/ql/exec/spark/SparkRecordHandler.java | 3 +-
.../hadoop/hive/ql/exec/spark/SparkTask.java | 2 +-
.../ql/exec/spark/status/SparkJobMonitor.java | 2 +-
.../hive/ql/exec/tez/RecordProcessor.java | 3 +-
.../hive/ql/exec/tez/ReduceRecordProcessor.java | 1 -
.../hive/ql/exec/tez/ReduceRecordSource.java | 3 +-
.../hadoop/hive/ql/exec/tez/TezJobMonitor.java | 4 +-
.../hadoop/hive/ql/exec/tez/TezProcessor.java | 3 +-
.../apache/hadoop/hive/ql/exec/tez/TezTask.java | 2 +-
.../hive/ql/io/CombineHiveInputFormat.java | 10 +-
.../hadoop/hive/ql/io/HiveInputFormat.java | 5 +-
.../apache/hadoop/hive/ql/log/PerfLogger.java | 195 ------------------
.../hive/ql/optimizer/ppr/PartitionPruner.java | 7 +-
.../hive/ql/parse/spark/SparkCompiler.java | 3 +-
.../hadoop/hive/ql/session/SessionState.java | 37 ++--
23 files changed, 293 insertions(+), 250 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
new file mode 100644
index 0000000..6263a6d
--- /dev/null
+++ b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
@@ -0,0 +1,196 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.log;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * PerfLogger.
+ *
+ * Can be used to measure and log the time spent by a piece of code.
+ */
+public class PerfLogger {
+ public static final String ACQUIRE_READ_WRITE_LOCKS = "acquireReadWriteLocks";
+ public static final String COMPILE = "compile";
+ public static final String PARSE = "parse";
+ public static final String ANALYZE = "semanticAnalyze";
+ public static final String DO_AUTHORIZATION = "doAuthorization";
+ public static final String DRIVER_EXECUTE = "Driver.execute";
+ public static final String INPUT_SUMMARY = "getInputSummary";
+ public static final String GET_SPLITS = "getSplits";
+ public static final String RUN_TASKS = "runTasks";
+ public static final String SERIALIZE_PLAN = "serializePlan";
+ public static final String DESERIALIZE_PLAN = "deserializePlan";
+ public static final String CLONE_PLAN = "clonePlan";
+ public static final String TASK = "task.";
+ public static final String RELEASE_LOCKS = "releaseLocks";
+ public static final String PRUNE_LISTING = "prune-listing";
+ public static final String PARTITION_RETRIEVING = "partition-retrieving";
+ public static final String PRE_HOOK = "PreHook.";
+ public static final String POST_HOOK = "PostHook.";
+ public static final String FAILURE_HOOK = "FailureHook.";
+ public static final String DRIVER_RUN = "Driver.run";
+ public static final String TIME_TO_SUBMIT = "TimeToSubmit";
+ public static final String TEZ_SUBMIT_TO_RUNNING = "TezSubmitToRunningDag";
+ public static final String TEZ_BUILD_DAG = "TezBuildDag";
+ public static final String TEZ_SUBMIT_DAG = "TezSubmitDag";
+ public static final String TEZ_RUN_DAG = "TezRunDag";
+ public static final String TEZ_CREATE_VERTEX = "TezCreateVertex.";
+ public static final String TEZ_RUN_VERTEX = "TezRunVertex.";
+ public static final String TEZ_INITIALIZE_PROCESSOR = "TezInitializeProcessor";
+ public static final String TEZ_RUN_PROCESSOR = "TezRunProcessor";
+ public static final String TEZ_INIT_OPERATORS = "TezInitializeOperators";
+ public static final String LOAD_HASHTABLE = "LoadHashtable";
+
+ public static final String SPARK_SUBMIT_TO_RUNNING = "SparkSubmitToRunning";
+ public static final String SPARK_BUILD_PLAN = "SparkBuildPlan";
+ public static final String SPARK_BUILD_RDD_GRAPH = "SparkBuildRDDGraph";
+ public static final String SPARK_SUBMIT_JOB = "SparkSubmitJob";
+ public static final String SPARK_RUN_JOB = "SparkRunJob";
+ public static final String SPARK_CREATE_TRAN = "SparkCreateTran.";
+ public static final String SPARK_RUN_STAGE = "SparkRunStage.";
+ public static final String SPARK_INIT_OPERATORS = "SparkInitializeOperators";
+ public static final String SPARK_GENERATE_TASK_TREE = "SparkGenerateTaskTree";
+ public static final String SPARK_OPTIMIZE_OPERATOR_TREE = "SparkOptimizeOperatorTree";
+ public static final String SPARK_OPTIMIZE_TASK_TREE = "SparkOptimizeTaskTree";
+ public static final String SPARK_FLUSH_HASHTABLE = "SparkFlushHashTable.";
+
+ protected final Map<String, Long> startTimes = new HashMap<String, Long>();
+ protected final Map<String, Long> endTimes = new HashMap<String, Long>();
+
+ static final private Log LOG = LogFactory.getLog(PerfLogger.class.getName());
+ protected static final ThreadLocal<PerfLogger> perfLogger = new ThreadLocal<PerfLogger>();
+
+
+ public PerfLogger() {
+ // Use getPerfLogger to get an instance of PerfLogger
+ }
+
+ public static PerfLogger getPerfLogger(HiveConf conf, boolean resetPerfLogger) {
+ PerfLogger result = perfLogger.get();
+ if (resetPerfLogger || result == null) {
+ if (conf == null) {
+ result = new PerfLogger();
+ } else {
+ try {
+ result = (PerfLogger) ReflectionUtils.newInstance(conf.getClassByName(
+ conf.getVar(HiveConf.ConfVars.HIVE_PERF_LOGGER)), conf);
+ } catch (ClassNotFoundException e) {
+ LOG.error("Performance Logger Class not found:" + e.getMessage());
+ result = new PerfLogger();
+ }
+ }
+ perfLogger.set(result);
+ }
+ return result;
+ }
+
+ /**
+ * Call this function when you start to measure time spent by a piece of code.
+ * @param callerName the logging object to be used.
+ * @param method method or ID that identifies this perf log element.
+ */
+ public void PerfLogBegin(String callerName, String method) {
+ long startTime = System.currentTimeMillis();
+ LOG.info("<PERFLOG method=" + method + " from=" + callerName + ">");
+ startTimes.put(method, new Long(startTime));
+ }
+ /**
+ * Call this function in correspondence of PerfLogBegin to mark the end of the measurement.
+ * @param callerName
+ * @param method
+ * @return long duration the difference between now and startTime, or -1 if startTime is null
+ */
+ public long PerfLogEnd(String callerName, String method) {
+ return PerfLogEnd(callerName, method, null);
+ }
+
+ /**
+ * Call this function in correspondence of PerfLogBegin to mark the end of the measurement.
+ * @param callerName
+ * @param method
+ * @return long duration the difference between now and startTime, or -1 if startTime is null
+ */
+ public long PerfLogEnd(String callerName, String method, String additionalInfo) {
+ Long startTime = startTimes.get(method);
+ long endTime = System.currentTimeMillis();
+ long duration = -1;
+
+ endTimes.put(method, new Long(endTime));
+
+ StringBuilder sb = new StringBuilder("</PERFLOG method=").append(method);
+ if (startTime != null) {
+ sb.append(" start=").append(startTime);
+ }
+ sb.append(" end=").append(endTime);
+ if (startTime != null) {
+ duration = endTime - startTime.longValue();
+ sb.append(" duration=").append(duration);
+ }
+ sb.append(" from=").append(callerName);
+ if (additionalInfo != null) {
+ sb.append(" ").append(additionalInfo);
+ }
+ sb.append(">");
+ LOG.info(sb);
+
+ return duration;
+ }
+
+ public Long getStartTime(String method) {
+ long startTime = 0L;
+
+ if (startTimes.containsKey(method)) {
+ startTime = startTimes.get(method);
+ }
+ return startTime;
+ }
+
+ public Long getEndTime(String method) {
+ long endTime = 0L;
+
+ if (endTimes.containsKey(method)) {
+ endTime = endTimes.get(method);
+ }
+ return endTime;
+ }
+
+ public boolean startTimeHasMethod(String method) {
+ return startTimes.containsKey(method);
+ }
+
+ public boolean endTimeHasMethod(String method) {
+ return endTimes.containsKey(method);
+ }
+
+ public Long getDuration(String method) {
+ long duration = 0;
+ if (startTimes.containsKey(method) && endTimes.containsKey(method)) {
+ duration = endTimes.get(method) - startTimes.get(method);
+ }
+ return duration;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java
index 892aef4..56276b6 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.common.classification.InterfaceStability;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.datanucleus.exceptions.NucleusException;
@InterfaceAudience.Private
@@ -41,6 +42,17 @@ import org.datanucleus.exceptions.NucleusException;
public class RetryingHMSHandler implements InvocationHandler {
private static final Log LOG = LogFactory.getLog(RetryingHMSHandler.class);
+ private static final String CLASS_NAME = RetryingHMSHandler.class.getName();
+
+ private static class Result {
+ private final Object result;
+ private final int numRetries;
+
+ public Result(Object result, int numRetries) {
+ this.result = result;
+ this.numRetries = numRetries;
+ }
+ }
private final IHMSHandler baseHandler;
private final MetaStoreInit.MetaStoreInitData metaStoreInitData =
@@ -78,6 +90,25 @@ public class RetryingHMSHandler implements InvocationHandler {
@Override
public Object invoke(final Object proxy, final Method method, final Object[] args) throws Throwable {
+ int retryCount = -1;
+ int threadId = HiveMetaStore.HMSHandler.get();
+ boolean error = true;
+ PerfLogger perfLogger = PerfLogger.getPerfLogger(origConf, false);
+ perfLogger.PerfLogBegin(CLASS_NAME, method.getName());
+ try {
+ Result result = invokeInternal(proxy, method, args);
+ retryCount = result.numRetries;
+ error = false;
+ return result.result;
+ } finally {
+ StringBuffer additionalInfo = new StringBuffer();
+ additionalInfo.append("threadId=").append(threadId).append(" retryCount=").append(retryCount)
+ .append(" error=").append(error);
+ perfLogger.PerfLogEnd(CLASS_NAME, method.getName(), additionalInfo.toString());
+ }
+ }
+
+ public Result invokeInternal(final Object proxy, final Method method, final Object[] args) throws Throwable {
boolean gotNewConnectUrl = false;
boolean reloadConf = HiveConf.getBoolVar(origConf,
@@ -106,7 +137,7 @@ public class RetryingHMSHandler implements InvocationHandler {
Deadline.startTimer(method.getName());
Object object = method.invoke(baseHandler, args);
Deadline.stopTimer();
- return object;
+ return new Result(object, retryCount);
} catch (javax.jdo.JDOException e) {
caughtException = e;
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index 4030075..43159c6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -364,7 +364,7 @@ public class Driver implements CommandProcessor {
* @return 0 for ok
*/
public int compile(String command, boolean resetTaskIds) {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE);
//holder for parent command type/string when executing reentrant queries
@@ -953,7 +953,7 @@ public class Driver implements CommandProcessor {
* @param startTxnImplicitly in AC=false, the 1st DML starts a txn
**/
private int acquireLocksAndOpenTxn(boolean startTxnImplicitly) {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
SessionState ss = SessionState.get();
@@ -1039,7 +1039,7 @@ public class Driver implements CommandProcessor {
**/
private void releaseLocksAndCommitOrRollback(List<HiveLock> hiveLocks, boolean commit)
throws LockException {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RELEASE_LOCKS);
SessionState ss = SessionState.get();
@@ -1194,7 +1194,7 @@ public class Driver implements CommandProcessor {
}
// Reset the perf logger
- PerfLogger perfLogger = PerfLogger.getPerfLogger(true);
+ PerfLogger perfLogger = SessionState.getPerfLogger(true);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TIME_TO_SUBMIT);
@@ -1282,7 +1282,6 @@ public class Driver implements CommandProcessor {
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_RUN);
- perfLogger.close(LOG, plan);
// Take all the driver run hooks and post-execute them.
try {
@@ -1406,7 +1405,7 @@ public class Driver implements CommandProcessor {
}
public int execute() throws CommandNeedRetryException {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE);
boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME));
int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
index a9159a5..02d61eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
@@ -60,6 +60,7 @@ import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
@@ -76,7 +77,7 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
private static final long serialVersionUID = 1L;
private static final Log LOG = LogFactory.getLog(MapJoinOperator.class.getName());
private static final String CLASS_NAME = MapJoinOperator.class.getName();
- private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ private final PerfLogger perfLogger = SessionState.getPerfLogger();
private transient String cacheKey;
private transient ObjectCache cache;
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
index aa8808a..af368eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
import org.apache.hadoop.hive.ql.plan.SparkBucketMapJoinContext;
import org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
public class SparkHashTableSinkOperator
@@ -48,7 +49,7 @@ public class SparkHashTableSinkOperator
private static final int MIN_REPLICATION = 10;
private static final long serialVersionUID = 1L;
private final String CLASS_NAME = this.getClass().getName();
- private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ private final PerfLogger perfLogger = SessionState.getPerfLogger();
protected static final Log LOG = LogFactory.getLog(SparkHashTableSinkOperator.class.getName());
private final HashTableSinkOperator htsOperator;
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index ca86301..bcf85a4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -82,7 +82,6 @@ import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;
import org.antlr.runtime.CommonToken;
-import org.apache.calcite.util.ChunkList;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.WordUtils;
@@ -936,7 +935,7 @@ public final class Utilities {
}
private static void serializePlan(Object plan, OutputStream out, Configuration conf, boolean cloningPlan) {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SERIALIZE_PLAN);
String serializationType = conf.get(HiveConf.ConfVars.PLAN_SERIALIZATION.varname, "kryo");
LOG.info("Serializing " + plan.getClass().getSimpleName() + " via " + serializationType);
@@ -962,7 +961,7 @@ public final class Utilities {
}
private static <T> T deserializePlan(InputStream in, Class<T> planClass, Configuration conf, boolean cloningPlan) {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN);
T plan;
String serializationType = conf.get(HiveConf.ConfVars.PLAN_SERIALIZATION.varname, "kryo");
@@ -997,7 +996,7 @@ public final class Utilities {
*/
public static MapredWork clonePlan(MapredWork plan) {
// TODO: need proper clone. Meanwhile, let's at least keep this horror in one place
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN);
ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
Configuration conf = new HiveConf();
@@ -1014,7 +1013,7 @@ public final class Utilities {
* @return The clone.
*/
public static BaseWork cloneBaseWork(BaseWork plan) {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN);
ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
Configuration conf = new HiveConf();
@@ -2530,7 +2529,7 @@ public final class Utilities {
*/
public static ContentSummary getInputSummary(final Context ctx, MapWork work, PathFilter filter)
throws IOException {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
long[] summary = {0, 0, 0};
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java
index daf9698..9906118 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java
@@ -30,6 +30,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.log.PerfLogger;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.io.BytesWritable;
import org.apache.spark.api.java.JavaPairRDD;
@@ -39,7 +40,7 @@ import com.google.common.base.Preconditions;
public class SparkPlan {
private static final String CLASS_NAME = SparkPlan.class.getName();
private static final Log LOG = LogFactory.getLog(SparkPlan.class);
- private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ private final PerfLogger perfLogger = SessionState.getPerfLogger();
private final Set<SparkTran> rootTrans = new HashSet<SparkTran>();
private final Set<SparkTran> leafTrans = new HashSet<SparkTran>();
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
index 762ce7d..4c3ee4b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper;
import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat;
import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
import org.apache.hadoop.hive.ql.log.PerfLogger;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -61,7 +62,7 @@ import org.apache.spark.api.java.JavaSparkContext;
@SuppressWarnings("rawtypes")
public class SparkPlanGenerator {
private static final String CLASS_NAME = SparkPlanGenerator.class.getName();
- private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ private final PerfLogger perfLogger = SessionState.getPerfLogger();
private static final Log LOG = LogFactory.getLog(SparkPlanGenerator.class);
private JavaSparkContext sc;
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java
index 97b3471..3d37753 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java
@@ -22,6 +22,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.log.PerfLogger;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
@@ -35,7 +36,7 @@ import java.util.Iterator;
public abstract class SparkRecordHandler {
protected static final String CLASS_NAME = SparkRecordHandler.class.getName();
- protected final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ protected final PerfLogger perfLogger = SessionState.getPerfLogger();
private static final Log LOG = LogFactory.getLog(SparkRecordHandler.class);
// used to log memory usage periodically
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
index a36dc6e..eac812f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
@@ -80,7 +80,7 @@ public class SparkTask extends Task<SparkWork> {
private static final String CLASS_NAME = SparkTask.class.getName();
private static final Log LOG = LogFactory.getLog(CLASS_NAME);
private static final LogHelper console = new LogHelper(LOG);
- private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ private final PerfLogger perfLogger = SessionState.getPerfLogger();
private static final long serialVersionUID = 1L;
private SparkCounters sparkCounters;
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobMonitor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobMonitor.java
index 3fceeb0..6fc20c7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobMonitor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobMonitor.java
@@ -38,7 +38,7 @@ abstract class SparkJobMonitor {
protected static final String CLASS_NAME = SparkJobMonitor.class.getName();
protected static final Log LOG = LogFactory.getLog(CLASS_NAME);
protected static SessionState.LogHelper console = new SessionState.LogHelper(LOG);
- protected final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ protected final PerfLogger perfLogger = SessionState.getPerfLogger();
protected final int checkInterval = 1000;
protected final long monitorTimeoutInteval;
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java
index c563d9d..87fded1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.tez.mapreduce.processor.MRTaskReporter;
@@ -64,7 +65,7 @@ public abstract class RecordProcessor {
protected boolean isLogTraceEnabled = false;
protected MRTaskReporter reporter;
- protected PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ protected PerfLogger perfLogger = SessionState.getPerfLogger();
protected String CLASS_NAME = RecordProcessor.class.getName();
public RecordProcessor(JobConf jConf, ProcessorContext processorContext) {
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
index d649672..91ba2bb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
@@ -51,7 +51,6 @@ import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.api.ProcessorContext;
import org.apache.tez.runtime.api.Reader;
-import org.apache.tez.runtime.library.api.KeyValuesReader;
/**
* Process input from tez LogicalInput and write output - for a map plan
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
index 89f7572..1f2f9f9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterF
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -115,7 +116,7 @@ public class ReduceRecordSource implements RecordSource {
private ObjectInspector valueObjectInspector;
- private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ private final PerfLogger perfLogger = SessionState.getPerfLogger();
private Iterable<Object> valueWritables;
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
index 1e1603b..754c332 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
@@ -61,8 +61,6 @@ import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
-import jline.TerminalFactory;
-
/**
* TezJobMonitor keeps track of a tez job while it's being executed. It will
* print status to the console and retrieve final status of the job after
@@ -100,7 +98,7 @@ public class TezJobMonitor {
private String separator;
private transient LogHelper console;
- private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ private final PerfLogger perfLogger = SessionState.getPerfLogger();
private final int checkInterval = 200;
private final int maxRetryInterval = 2500;
private final int printInterval = 3000;
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java
index 39f9db6..fad79f8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java
@@ -26,6 +26,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.log.PerfLogger;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.util.StringUtils;
@@ -54,7 +55,7 @@ public class TezProcessor extends AbstractLogicalIOProcessor {
protected JobConf jobConf;
private static final String CLASS_NAME = TezProcessor.class.getName();
- private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ private final PerfLogger perfLogger = SessionState.getPerfLogger();
protected ProcessorContext processorContext;
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 4a1a712..2d740ed 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -77,7 +77,7 @@ import org.json.JSONObject;
public class TezTask extends Task<TezWork> {
private static final String CLASS_NAME = TezTask.class.getName();
- private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ private final PerfLogger perfLogger = SessionState.getPerfLogger();
private TezCounters counters;
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
index 11740d1..53bc1fa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
@@ -25,11 +25,8 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.Queue;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
@@ -39,11 +36,9 @@ import java.util.concurrent.Future;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -52,19 +47,18 @@ import org.apache.hadoop.hive.ql.parse.SplitSample;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.HadoopShims.CombineFileInputFormatShim;
import org.apache.hadoop.hive.shims.HadoopShimsSecure.InputSplitShim;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.lib.CombineFileSplit;
@@ -462,7 +456,7 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ
*/
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
init(job);
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index fd16b35..1ac1669 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -37,7 +37,6 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.spark.SparkDynamicPartitionPruner;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -46,10 +45,10 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.MapWork;
-import org.apache.hadoop.hive.ql.plan.MergeJoinWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.Writable;
@@ -351,7 +350,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
}
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
init(job);
Path[] dirs = getInputPaths(job);
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java b/ql/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
deleted file mode 100644
index 20ca195..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
+++ /dev/null
@@ -1,195 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.log;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.QueryPlan;
-import org.apache.hadoop.hive.ql.session.SessionState;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * PerfLogger.
- *
- * Can be used to measure and log the time spent by a piece of code.
- */
-public class PerfLogger {
- public static final String ACQUIRE_READ_WRITE_LOCKS = "acquireReadWriteLocks";
- public static final String COMPILE = "compile";
- public static final String PARSE = "parse";
- public static final String ANALYZE = "semanticAnalyze";
- public static final String DO_AUTHORIZATION = "doAuthorization";
- public static final String DRIVER_EXECUTE = "Driver.execute";
- public static final String INPUT_SUMMARY = "getInputSummary";
- public static final String GET_SPLITS = "getSplits";
- public static final String RUN_TASKS = "runTasks";
- public static final String SERIALIZE_PLAN = "serializePlan";
- public static final String DESERIALIZE_PLAN = "deserializePlan";
- public static final String CLONE_PLAN = "clonePlan";
- public static final String TASK = "task.";
- public static final String RELEASE_LOCKS = "releaseLocks";
- public static final String PRUNE_LISTING = "prune-listing";
- public static final String PARTITION_RETRIEVING = "partition-retrieving";
- public static final String PRE_HOOK = "PreHook.";
- public static final String POST_HOOK = "PostHook.";
- public static final String FAILURE_HOOK = "FailureHook.";
- public static final String DRIVER_RUN = "Driver.run";
- public static final String TIME_TO_SUBMIT = "TimeToSubmit";
- public static final String TEZ_SUBMIT_TO_RUNNING = "TezSubmitToRunningDag";
- public static final String TEZ_BUILD_DAG = "TezBuildDag";
- public static final String TEZ_SUBMIT_DAG = "TezSubmitDag";
- public static final String TEZ_RUN_DAG = "TezRunDag";
- public static final String TEZ_CREATE_VERTEX = "TezCreateVertex.";
- public static final String TEZ_RUN_VERTEX = "TezRunVertex.";
- public static final String TEZ_INITIALIZE_PROCESSOR = "TezInitializeProcessor";
- public static final String TEZ_RUN_PROCESSOR = "TezRunProcessor";
- public static final String TEZ_INIT_OPERATORS = "TezInitializeOperators";
- public static final String LOAD_HASHTABLE = "LoadHashtable";
-
- public static final String SPARK_SUBMIT_TO_RUNNING = "SparkSubmitToRunning";
- public static final String SPARK_BUILD_PLAN = "SparkBuildPlan";
- public static final String SPARK_BUILD_RDD_GRAPH = "SparkBuildRDDGraph";
- public static final String SPARK_SUBMIT_JOB = "SparkSubmitJob";
- public static final String SPARK_RUN_JOB = "SparkRunJob";
- public static final String SPARK_CREATE_TRAN = "SparkCreateTran.";
- public static final String SPARK_RUN_STAGE = "SparkRunStage.";
- public static final String SPARK_INIT_OPERATORS = "SparkInitializeOperators";
- public static final String SPARK_GENERATE_TASK_TREE = "SparkGenerateTaskTree";
- public static final String SPARK_OPTIMIZE_OPERATOR_TREE = "SparkOptimizeOperatorTree";
- public static final String SPARK_OPTIMIZE_TASK_TREE = "SparkOptimizeTaskTree";
- public static final String SPARK_FLUSH_HASHTABLE = "SparkFlushHashTable.";
-
- protected static final ThreadLocal<PerfLogger> perfLogger = new ThreadLocal<PerfLogger>();
-
- protected final Map<String, Long> startTimes = new HashMap<String, Long>();
- protected final Map<String, Long> endTimes = new HashMap<String, Long>();
-
- static final private Log LOG = LogFactory.getLog(PerfLogger.class.getName());
-
- public PerfLogger() {
- // Use getPerfLogger to get an instance of PerfLogger
- }
-
- public static PerfLogger getPerfLogger() {
- return getPerfLogger(false);
- }
-
- /**
- * Call this function to get an instance of PerfLogger.
- *
- * Use resetPerfLogger to require a new instance. Useful at the beginning of execution.
- *
- * @return Session perflogger if there's a sessionstate, otherwise return the thread local instance
- */
- public static PerfLogger getPerfLogger(boolean resetPerfLogger) {
- if (SessionState.get() == null) {
- if (perfLogger.get() == null || resetPerfLogger) {
- perfLogger.set(new PerfLogger());
- }
- return perfLogger.get();
- } else {
- return SessionState.get().getPerfLogger(resetPerfLogger);
- }
- }
-
- /**
- * Call this function when you start to measure time spent by a piece of code.
- * @param _log the logging object to be used.
- * @param method method or ID that identifies this perf log element.
- */
- public void PerfLogBegin(String callerName, String method) {
- long startTime = System.currentTimeMillis();
- LOG.info("<PERFLOG method=" + method + " from=" + callerName + ">");
- startTimes.put(method, new Long(startTime));
- }
-
- /**
- * Call this function in correspondence of PerfLogBegin to mark the end of the measurement.
- * @param _log
- * @param method
- * @return long duration the difference between now and startTime, or -1 if startTime is null
- */
- public long PerfLogEnd(String callerName, String method) {
- Long startTime = startTimes.get(method);
- long endTime = System.currentTimeMillis();
- long duration = -1;
-
- endTimes.put(method, new Long(endTime));
-
- StringBuilder sb = new StringBuilder("</PERFLOG method=").append(method);
- if (startTime != null) {
- sb.append(" start=").append(startTime);
- }
- sb.append(" end=").append(endTime);
- if (startTime != null) {
- duration = endTime - startTime.longValue();
- sb.append(" duration=").append(duration);
- }
- sb.append(" from=").append(callerName).append(">");
- LOG.info(sb);
-
- return duration;
- }
-
- /**
- * Call this function at the end of processing a query (any time after the last call to PerfLogEnd
- * for a given query) to run any cleanup/final steps that need to be run
- * @param _log
- */
- public void close(Log _log, QueryPlan queryPlan) {
-
- }
-
- public Long getStartTime(String method) {
- long startTime = 0L;
-
- if (startTimes.containsKey(method)) {
- startTime = startTimes.get(method);
- }
- return startTime;
- }
-
- public Long getEndTime(String method) {
- long endTime = 0L;
-
- if (endTimes.containsKey(method)) {
- endTime = endTimes.get(method);
- }
- return endTime;
- }
-
- public boolean startTimeHasMethod(String method) {
- return startTimes.containsKey(method);
- }
-
- public boolean endTimeHasMethod(String method) {
- return endTimes.containsKey(method);
- }
-
- public Long getDuration(String method) {
- long duration = 0;
- if (startTimes.containsKey(method) && endTimes.containsKey(method)) {
- duration = endTimes.get(method) - startTimes.get(method);
- }
- return duration;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
index d264559..8eab603 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
@@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
@@ -400,7 +401,7 @@ public class PartitionPruner implements Transform {
// Now filter.
List<Partition> partitions = new ArrayList<Partition>();
boolean hasUnknownPartitions = false;
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
if (!doEvalClientSide) {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
try {
@@ -432,7 +433,7 @@ public class PartitionPruner implements Transform {
}
private static Set<Partition> getAllPartitions(Table tab) throws HiveException {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
Set<Partition> result = Hive.get().getAllPartitionsOf(tab);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
@@ -450,7 +451,7 @@ public class PartitionPruner implements Transform {
*/
static private boolean pruneBySequentialScan(Table tab, List<Partition> partitions,
ExprNodeGenericFuncDesc prunerExpr, HiveConf conf) throws HiveException, MetaException {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRUNE_LISTING);
List<String> partNames = Hive.get().getPartitionNames(
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
index 27a1d99..9ec7fd6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
@@ -87,6 +87,7 @@ import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SparkWork;
+import org.apache.hadoop.hive.ql.session.SessionState;
/**
* SparkCompiler translates the operator plan into SparkTasks.
@@ -95,7 +96,7 @@ import org.apache.hadoop.hive.ql.plan.SparkWork;
*/
public class SparkCompiler extends TaskCompiler {
private static final String CLASS_NAME = SparkCompiler.class.getName();
- private static final PerfLogger PERF_LOGGER = PerfLogger.getPerfLogger();
+ private static final PerfLogger PERF_LOGGER = SessionState.getPerfLogger();
private static final Log LOGGER = LogFactory.getLog(SparkCompiler.class);
public SparkCompiler() {
http://git-wip-us.apache.org/repos/asf/hive/blob/21861592/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 7ed8e5f..5f528167 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -104,6 +104,7 @@ public class SessionState {
private static final String LOCAL_SESSION_PATH_KEY = "_hive.local.session.path";
private static final String HDFS_SESSION_PATH_KEY = "_hive.hdfs.session.path";
private static final String TMP_TABLE_SPACE_KEY = "_hive.tmp_table_space";
+
private final Map<String, Map<String, Table>> tempTables = new HashMap<String, Map<String, Table>>();
private final Map<String, Map<String, ColumnStatisticsObj>> tempTableColStats =
new HashMap<String, Map<String, ColumnStatisticsObj>>();
@@ -596,7 +597,7 @@ public class SessionState {
* Create a given path if it doesn't exist.
*
* @param conf
- * @param pathString
+ * @param path
* @param permission
* @param isLocal
* @param isCleanUp
@@ -1523,25 +1524,37 @@ public class SessionState {
}
/**
- * @param resetPerfLogger
* @return Tries to return an instance of the class whose name is configured in
* hive.exec.perf.logger, but if it can't it just returns an instance of
* the base PerfLogger class
+ *
+ */
+ public static PerfLogger getPerfLogger() {
+ return getPerfLogger(false);
+ }
+ /**
+ * @param resetPerfLogger
+ * @return Tries to return an instance of the class whose name is configured in
+ * hive.exec.perf.logger, but if it can't it just returns an instance of
+ * the base PerfLogger class
+ *
*/
- public PerfLogger getPerfLogger(boolean resetPerfLogger) {
- if ((perfLogger == null) || resetPerfLogger) {
- try {
- perfLogger = (PerfLogger) ReflectionUtils.newInstance(conf.getClassByName(
- conf.getVar(ConfVars.HIVE_PERF_LOGGER)), conf);
- } catch (ClassNotFoundException e) {
- LOG.error("Performance Logger Class not found:" + e.getMessage());
- perfLogger = new PerfLogger();
- }
+ public static PerfLogger getPerfLogger(boolean resetPerfLogger) {
+ SessionState ss = get();
+ if (ss == null) {
+ return PerfLogger.getPerfLogger(null, resetPerfLogger);
+ } else if (ss.perfLogger != null && !resetPerfLogger) {
+ return ss.perfLogger;
+ } else {
+ PerfLogger perfLogger = PerfLogger.getPerfLogger(ss.getConf(), resetPerfLogger);
+ ss.perfLogger = perfLogger;
+ return perfLogger;
}
- return perfLogger;
}
+
+
public TezSessionState getTezSession() {
return tezSessionState;
}
[09/50] [abbrv] hive git commit: HIVE-11695 : If user have no permission to create LOCAL DIRECTORY ,the Hql does not throw any exception and fail silently. (WangMeng via Ashutosh Chauhan)
Posted by xu...@apache.org.
HIVE-11695 : If user have no permission to create LOCAL DIRECTORY ,the Hql does not throw any exception and fail silently. (WangMeng via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b934a804
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b934a804
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b934a804
Branch: refs/heads/beeline-cli
Commit: b934a804a75b7e01cf5ff043db4cc54c82d91641
Parents: c7de9b9
Author: WangMeng <me...@qiyi.com>
Authored: Wed Sep 16 21:00:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Sep 18 10:10:47 2015 -0700
----------------------------------------------------------------------
ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b934a804/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
index 6a19cc3..7e257e5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
@@ -132,7 +132,10 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
try {
// create the destination if it does not exist
if (!dstFs.exists(targetPath)) {
- FileUtils.mkdir(dstFs, targetPath, false, conf);
+ if (!FileUtils.mkdir(dstFs, targetPath, false, conf)) {
+ throw new HiveException(
+ "Failed to create local target directory for copy:" + targetPath);
+ }
}
} catch (IOException e) {
throw new HiveException("Unable to create target directory for copy" + targetPath, e);
[06/50] [abbrv] hive git commit: HIVE-11706 : Implement show create
database (Navis via Ashutosh Chauhan)
Posted by xu...@apache.org.
HIVE-11706 : Implement show create database (Navis via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3cf7bd9e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3cf7bd9e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3cf7bd9e
Branch: refs/heads/beeline-cli
Commit: 3cf7bd9e871e7f258d764b2d988cabfb356b6c71
Parents: 8da2ed3
Author: Navis Ryu <na...@apache.org>
Authored: Tue Sep 8 22:01:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Thu Sep 17 13:35:17 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 269 ++++++++-----------
.../hive/ql/parse/DDLSemanticAnalyzer.java | 17 ++
.../apache/hadoop/hive/ql/parse/HiveParser.g | 7 +-
.../hive/ql/parse/SemanticAnalyzerFactory.java | 2 +
.../org/apache/hadoop/hive/ql/plan/DDLWork.java | 21 ++
.../hadoop/hive/ql/plan/HiveOperation.java | 1 +
.../hive/ql/plan/ShowCreateDatabaseDesc.java | 94 +++++++
.../authorization/plugin/HiveOperationType.java | 1 +
.../plugin/sqlstd/Operation2Privilege.java | 2 +
.../clientpositive/show_create_database.q | 3 +
.../clientpositive/show_create_database.q.out | 19 ++
.../tez/show_create_database.q.out | 19 ++
12 files changed, 296 insertions(+), 159 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index 734742c..210736b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -163,6 +163,7 @@ import org.apache.hadoop.hive.ql.plan.RoleDDLDesc;
import org.apache.hadoop.hive.ql.plan.ShowColumnsDesc;
import org.apache.hadoop.hive.ql.plan.ShowCompactionsDesc;
import org.apache.hadoop.hive.ql.plan.ShowConfDesc;
+import org.apache.hadoop.hive.ql.plan.ShowCreateDatabaseDesc;
import org.apache.hadoop.hive.ql.plan.ShowCreateTableDesc;
import org.apache.hadoop.hive.ql.plan.ShowDatabasesDesc;
import org.apache.hadoop.hive.ql.plan.ShowFunctionsDesc;
@@ -440,6 +441,11 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
return showPartitions(db, showParts);
}
+ ShowCreateDatabaseDesc showCreateDb = work.getShowCreateDbDesc();
+ if (showCreateDb != null) {
+ return showCreateDatabase(db, showCreateDb);
+ }
+
ShowCreateTableDesc showCreateTbl = work.getShowCreateTblDesc();
if (showCreateTbl != null) {
return showCreateTable(db, showCreateTbl);
@@ -545,9 +551,23 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
return 0;
}
- private DataOutputStream getOutputStream(Path outputFile) throws Exception {
- FileSystem fs = outputFile.getFileSystem(conf);
- return fs.create(outputFile);
+ private DataOutputStream getOutputStream(String resFile) throws HiveException {
+ try {
+ return getOutputStream(new Path(resFile));
+ } catch (HiveException e) {
+ throw e;
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
+ }
+
+ private DataOutputStream getOutputStream(Path outputFile) throws HiveException {
+ try {
+ FileSystem fs = outputFile.getFileSystem(conf);
+ return fs.create(outputFile);
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
}
/**
@@ -1891,16 +1911,9 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
// write the results in the file
- DataOutputStream outStream = null;
+ DataOutputStream outStream = getOutputStream(showParts.getResFile());
try {
- Path resFile = new Path(showParts.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
-
formatter.showTablePartitions(outStream, parts);
-
- outStream.close();
- outStream = null;
} catch (Exception e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "show partitions for table " + tabName);
} finally {
@@ -1918,6 +1931,40 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
"NULL DEFINED AS"
};
+ private int showCreateDatabase(Hive db, ShowCreateDatabaseDesc showCreateDb) throws HiveException {
+ DataOutputStream outStream = getOutputStream(showCreateDb.getResFile());
+ try {
+ String dbName = showCreateDb.getDatabaseName();
+ return showCreateDatabase(db, outStream, dbName);
+ } catch (Exception e) {
+ throw new HiveException(e);
+ } finally {
+ IOUtils.closeStream(outStream);
+ }
+ }
+
+ private int showCreateDatabase(Hive db, DataOutputStream outStream, String databaseName)
+ throws Exception {
+ Database database = db.getDatabase(databaseName);
+
+ StringBuilder createDb_str = new StringBuilder();
+ createDb_str.append("CREATE DATABASE `").append(database.getName()).append("`\n");
+ if (database.getDescription() != null) {
+ createDb_str.append("COMMENT\n '");
+ createDb_str.append(escapeHiveCommand(database.getDescription())).append("'\n");
+ }
+ createDb_str.append("LOCATION\n '");
+ createDb_str.append(database.getLocationUri()).append("'\n");
+ String propertiesToString = propertiesToString(database.getParameters(), null);
+ if (!propertiesToString.isEmpty()) {
+ createDb_str.append("WITH DBPROPERTIES (\n");
+ createDb_str.append(propertiesToString).append(")\n");
+ }
+
+ outStream.write(createDb_str.toString().getBytes("UTF-8"));
+ return 0;
+ }
+
/**
* Write a statement of how to create a table to a file.
*
@@ -1931,6 +1978,19 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
*/
private int showCreateTable(Hive db, ShowCreateTableDesc showCreateTbl) throws HiveException {
// get the create table statement for the table and populate the output
+ DataOutputStream outStream = getOutputStream(showCreateTbl.getResFile());
+ try {
+ String tableName = showCreateTbl.getTableName();
+ return showCreateTable(db, outStream, tableName);
+ } catch (Exception e) {
+ throw new HiveException(e);
+ } finally {
+ IOUtils.closeStream(outStream);
+ }
+ }
+
+ private int showCreateTable(Hive db, DataOutputStream outStream, String tableName)
+ throws HiveException {
final String EXTERNAL = "external";
final String TEMPORARY = "temporary";
final String LIST_COLUMNS = "columns";
@@ -1943,22 +2003,14 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
boolean needsLocation = true;
StringBuilder createTab_str = new StringBuilder();
- String tableName = showCreateTbl.getTableName();
Table tbl = db.getTable(tableName, false);
- DataOutputStream outStream = null;
List<String> duplicateProps = new ArrayList<String>();
try {
- Path resFile = new Path(showCreateTbl.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
-
needsLocation = doesTableNeedLocation(tbl);
if (tbl.isView()) {
String createTab_stmt = "CREATE VIEW `" + tableName + "` AS " + tbl.getViewExpandedText();
outStream.writeBytes(createTab_stmt.toString());
- outStream.close();
- outStream = null;
return 0;
}
@@ -2115,18 +2167,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
String tbl_location = " '" + escapeHiveCommand(sd.getLocation()) + "'";
// Table properties
- String tbl_properties = "";
- if (!tbl.getParameters().isEmpty()) {
- Map<String, String> properties = new TreeMap<String, String>(tbl.getParameters());
- List<String> realProps = new ArrayList<String>();
- for (String key : properties.keySet()) {
- if (properties.get(key) != null && !duplicateProps.contains(key)) {
- realProps.add(" '" + key + "'='" +
- escapeHiveCommand(StringEscapeUtils.escapeJava(properties.get(key))) + "'");
- }
- }
- tbl_properties += StringUtils.join(realProps, ", \n");
- }
+ String tbl_properties = propertiesToString(tbl.getParameters(), duplicateProps);
createTab_stmt.add(TEMPORARY, tbl_temp);
createTab_stmt.add(EXTERNAL, tbl_external);
@@ -2142,23 +2183,30 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
createTab_stmt.add(TBL_PROPERTIES, tbl_properties);
outStream.writeBytes(createTab_stmt.render());
- outStream.close();
- outStream = null;
- } catch (FileNotFoundException e) {
- LOG.info("show create table: " + stringifyException(e));
- return 1;
} catch (IOException e) {
LOG.info("show create table: " + stringifyException(e));
return 1;
- } catch (Exception e) {
- throw new HiveException(e);
- } finally {
- IOUtils.closeStream(outStream);
}
return 0;
}
+ private String propertiesToString(Map<String, String> props, List<String> exclude) {
+ String prop_string = "";
+ if (!props.isEmpty()) {
+ Map<String, String> properties = new TreeMap<String, String>(props);
+ List<String> realProps = new ArrayList<String>();
+ for (String key : properties.keySet()) {
+ if (properties.get(key) != null && (exclude == null || !exclude.contains(key))) {
+ realProps.add(" '" + key + "'='" +
+ escapeHiveCommand(StringEscapeUtils.escapeJava(properties.get(key))) + "'");
+ }
+ }
+ prop_string += StringUtils.join(realProps, ", \n");
+ }
+ return prop_string;
+ }
+
private boolean containsNonNull(String[] values) {
for (String value : values) {
if (value != null) {
@@ -2202,12 +2250,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
indexes = db.getIndexes(tbl.getDbName(), tbl.getTableName(), (short) -1);
// write the results in the file
- DataOutputStream outStream = null;
+ DataOutputStream outStream = getOutputStream(showIndexes.getResFile());
try {
- Path resFile = new Path(showIndexes.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
-
if (showIndexes.isFormatted()) {
// column headers
outStream.writeBytes(MetaDataFormatUtils.getIndexColumnsHeader());
@@ -2219,10 +2263,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
{
outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation(index));
}
-
- outStream.close();
- outStream = null;
-
} catch (FileNotFoundException e) {
LOG.info("show indexes: " + stringifyException(e));
throw new HiveException(e.toString());
@@ -2259,15 +2299,9 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
LOG.info("results : " + databases.size());
// write the results in the file
- DataOutputStream outStream = null;
+ DataOutputStream outStream = getOutputStream(showDatabasesDesc.getResFile());
try {
- Path resFile = new Path(showDatabasesDesc.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
-
formatter.showDatabases(outStream, databases);
- outStream.close();
- outStream = null;
} catch (Exception e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "show databases");
} finally {
@@ -2304,16 +2338,10 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
// write the results in the file
- DataOutputStream outStream = null;
+ DataOutputStream outStream = getOutputStream(showTbls.getResFile());
try {
- Path resFile = new Path(showTbls.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
-
SortedSet<String> sortedTbls = new TreeSet<String>(tbls);
formatter.showTables(outStream, sortedTbls);
- outStream.close();
- outStream = null;
} catch (Exception e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "in database" + dbName);
} finally {
@@ -2328,12 +2356,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
Table table = db.getTable(showCols.getTableName());
// write the results in the file
- DataOutputStream outStream = null;
+ DataOutputStream outStream = getOutputStream(showCols.getResFile());;
try {
- Path resFile = new Path(showCols.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
-
List<FieldSchema> cols = table.getCols();
cols.addAll(table.getPartCols());
// In case the query is served by HiveServer2, don't pad it with spaces,
@@ -2341,8 +2365,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation(
cols, false, isOutputPadded, null));
- outStream.close();
- outStream = null;
} catch (IOException e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR);
} finally {
@@ -2377,11 +2399,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
// write the results in the file
- DataOutputStream outStream = null;
+ DataOutputStream outStream = getOutputStream(showFuncs.getResFile());
try {
- Path resFile = new Path(showFuncs.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
SortedSet<String> sortedFuncs = new TreeSet<String>(funcs);
// To remove the primitive types
sortedFuncs.removeAll(serdeConstants.PrimitiveTypes);
@@ -2392,8 +2411,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
outStream.writeBytes(iterFuncs.next());
outStream.write(terminator);
}
- outStream.close();
- outStream = null;
} catch (FileNotFoundException e) {
LOG.warn("show function: " + stringifyException(e));
return 1;
@@ -2430,11 +2447,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
// write the results in the file
- DataOutputStream outStream = null;
+ DataOutputStream outStream = getOutputStream(showLocks.getResFile());
try {
- Path resFile = new Path(showLocks.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
List<HiveLock> locks = null;
if (showLocks.getTableName() == null) {
@@ -2490,8 +2504,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
outStream.write(terminator);
}
- outStream.close();
- outStream = null;
} catch (FileNotFoundException e) {
LOG.warn("show function: " + stringifyException(e));
return 1;
@@ -2518,12 +2530,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
ShowLocksResponse rsp = lockMgr.getLocks();
// write the results in the file
- DataOutputStream os = null;
+ DataOutputStream os = getOutputStream(showLocks.getResFile());
try {
- Path resFile = new Path(showLocks.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- os = fs.create(resFile);
-
// Write a header
os.writeBytes("Lock ID");
os.write(separator);
@@ -2577,9 +2585,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
}
-
- os.close();
- os = null;
} catch (FileNotFoundException e) {
LOG.warn("show function: " + stringifyException(e));
return 1;
@@ -2599,12 +2604,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
ShowCompactResponse rsp = db.showCompactions();
// Write the results into the file
- DataOutputStream os = null;
+ DataOutputStream os = getOutputStream(desc.getResFile());
try {
- Path resFile = new Path(desc.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- os = fs.create(resFile);
-
// Write a header
os.writeBytes("Database");
os.write(separator);
@@ -2641,7 +2642,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
os.write(terminator);
}
}
- os.close();
} catch (IOException e) {
LOG.warn("show compactions: " + stringifyException(e));
return 1;
@@ -2656,12 +2656,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
GetOpenTxnsInfoResponse rsp = db.showTransactions();
// Write the results into the file
- DataOutputStream os = null;
+ DataOutputStream os = getOutputStream(desc.getResFile());
try {
- Path resFile = new Path(desc.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- os = fs.create(resFile);
-
// Write a header
os.writeBytes("Transaction ID");
os.write(separator);
@@ -2682,7 +2678,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
os.writeBytes(txn.getHostname());
os.write(terminator);
}
- os.close();
} catch (IOException e) {
LOG.warn("show transactions: " + stringifyException(e));
return 1;
@@ -2763,12 +2758,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
String funcName = descFunc.getName();
// write the results in the file
- DataOutputStream outStream = null;
+ DataOutputStream outStream = getOutputStream(descFunc.getResFile());
try {
- Path resFile = new Path(descFunc.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
-
// get the function documentation
Description desc = null;
Class<?> funcClass = null;
@@ -2801,9 +2792,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
outStream.write(terminator);
-
- outStream.close();
- outStream = null;
} catch (FileNotFoundException e) {
LOG.warn("describe function: " + stringifyException(e));
return 1;
@@ -2819,12 +2807,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
private int descDatabase(DescDatabaseDesc descDatabase) throws HiveException {
- DataOutputStream outStream = null;
+ DataOutputStream outStream = getOutputStream(descDatabase.getResFile());
try {
- Path resFile = new Path(descDatabase.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
-
Database database = db.getDatabase(descDatabase.getDatabaseName());
if (database == null) {
@@ -2851,9 +2835,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
database.getDescription(), location,
database.getOwnerName(), (null == ownerType) ? null : ownerType.name(), params);
- outStream.close();
- outStream = null;
- } catch (IOException e) {
+ } catch (Exception e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR);
} finally {
IOUtils.closeStream(outStream);
@@ -2899,16 +2881,9 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
// write the results in the file
- DataOutputStream outStream = null;
+ DataOutputStream outStream = getOutputStream(showTblStatus.getResFile());
try {
- Path resFile = new Path(showTblStatus.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
-
formatter.showTableStatus(outStream, db, conf, tbls, part, par);
-
- outStream.close();
- outStream = null;
} catch (Exception e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "show table status");
} finally {
@@ -3011,40 +2986,22 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
// describe the table - populate the output stream
Table tbl = db.getTable(tableName, false);
+ if (tbl == null) {
+ throw new HiveException(ErrorMsg.INVALID_TABLE, tableName);
+ }
Partition part = null;
- DataOutputStream outStream = null;
- try {
- Path resFile = new Path(descTbl.getResFile());
- if (tbl == null) {
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
- outStream.close();
- outStream = null;
- throw new HiveException(ErrorMsg.INVALID_TABLE, tableName);
- }
- if (descTbl.getPartSpec() != null) {
- part = db.getPartition(tbl, descTbl.getPartSpec(), false);
- if (part == null) {
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
- outStream.close();
- outStream = null;
- throw new HiveException(ErrorMsg.INVALID_PARTITION,
- StringUtils.join(descTbl.getPartSpec().keySet(), ','), tableName);
- }
- tbl = part.getTable();
+ if (descTbl.getPartSpec() != null) {
+ part = db.getPartition(tbl, descTbl.getPartSpec(), false);
+ if (part == null) {
+ throw new HiveException(ErrorMsg.INVALID_PARTITION,
+ StringUtils.join(descTbl.getPartSpec().keySet(), ','), tableName);
}
- } catch (IOException e) {
- throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
- } finally {
- IOUtils.closeStream(outStream);
+ tbl = part.getTable();
}
+ DataOutputStream outStream = getOutputStream(descTbl.getResFile());
try {
LOG.info("DDLTask: got data for " + tbl.getTableName());
- Path resFile = new Path(descTbl.getResFile());
- FileSystem fs = resFile.getFileSystem(conf);
- outStream = fs.create(resFile);
List<FieldSchema> cols = null;
List<ColumnStatisticsObj> colStats = null;
@@ -3092,13 +3049,9 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
descTbl.isPretty(), isOutputPadded, colStats);
LOG.info("DDLTask: written data for " + tbl.getTableName());
- outStream.close();
- outStream = null;
} catch (SQLException e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
- } catch (IOException e) {
- throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
} finally {
IOUtils.closeStream(outStream);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
index 9f8c756..2d7d9d4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
@@ -106,6 +106,7 @@ import org.apache.hadoop.hive.ql.plan.RoleDDLDesc;
import org.apache.hadoop.hive.ql.plan.ShowColumnsDesc;
import org.apache.hadoop.hive.ql.plan.ShowCompactionsDesc;
import org.apache.hadoop.hive.ql.plan.ShowConfDesc;
+import org.apache.hadoop.hive.ql.plan.ShowCreateDatabaseDesc;
import org.apache.hadoop.hive.ql.plan.ShowCreateTableDesc;
import org.apache.hadoop.hive.ql.plan.ShowDatabasesDesc;
import org.apache.hadoop.hive.ql.plan.ShowFunctionsDesc;
@@ -413,6 +414,10 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
ctx.setResFile(ctx.getLocalTmpPath());
analyzeShowPartitions(ast);
break;
+ case HiveParser.TOK_SHOW_CREATEDATABASE:
+ ctx.setResFile(ctx.getLocalTmpPath());
+ analyzeShowCreateDatabase(ast);
+ break;
case HiveParser.TOK_SHOW_CREATETABLE:
ctx.setResFile(ctx.getLocalTmpPath());
analyzeShowCreateTable(ast);
@@ -2078,6 +2083,18 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
setFetchTask(createFetchTask(showPartsDesc.getSchema()));
}
+ private void analyzeShowCreateDatabase(ASTNode ast) throws SemanticException {
+ String dbName = getUnescapedName((ASTNode)ast.getChild(0));
+ ShowCreateDatabaseDesc showCreateDbDesc =
+ new ShowCreateDatabaseDesc(dbName, ctx.getResFile().toString());
+
+ Database database = getDatabase(dbName);
+ inputs.add(new ReadEntity(database));
+ rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
+ showCreateDbDesc), conf));
+ setFetchTask(createFetchTask(showCreateDbDesc.getSchema()));
+ }
+
private void analyzeShowCreateTable(ASTNode ast) throws SemanticException {
ShowCreateTableDesc showCreateTblDesc;
String tableName = getUnescapedName((ASTNode)ast.getChild(0));
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 3969a54..3df67e9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -178,6 +178,7 @@ TOK_SHOWTABLES;
TOK_SHOWCOLUMNS;
TOK_SHOWFUNCTIONS;
TOK_SHOWPARTITIONS;
+TOK_SHOW_CREATEDATABASE;
TOK_SHOW_CREATETABLE;
TOK_SHOW_TABLESTATUS;
TOK_SHOW_TBLPROPERTIES;
@@ -1374,7 +1375,11 @@ showStatement
-> ^(TOK_SHOWCOLUMNS tableName $db_name?)
| KW_SHOW KW_FUNCTIONS (KW_LIKE showFunctionIdentifier|showFunctionIdentifier)? -> ^(TOK_SHOWFUNCTIONS KW_LIKE? showFunctionIdentifier?)
| KW_SHOW KW_PARTITIONS tabName=tableName partitionSpec? -> ^(TOK_SHOWPARTITIONS $tabName partitionSpec?)
- | KW_SHOW KW_CREATE KW_TABLE tabName=tableName -> ^(TOK_SHOW_CREATETABLE $tabName)
+ | KW_SHOW KW_CREATE (
+ (KW_DATABASE|KW_SCHEMA) => (KW_DATABASE|KW_SCHEMA) db_name=identifier -> ^(TOK_SHOW_CREATEDATABASE $db_name)
+ |
+ KW_TABLE tabName=tableName -> ^(TOK_SHOW_CREATETABLE $tabName)
+ )
| KW_SHOW KW_TABLE KW_EXTENDED ((KW_FROM|KW_IN) db_name=identifier)? KW_LIKE showStmtIdentifier partitionSpec?
-> ^(TOK_SHOW_TABLESTATUS showStmtIdentifier $db_name? partitionSpec?)
| KW_SHOW KW_TBLPROPERTIES tableName (LPAREN prptyName=StringLiteral RPAREN)? -> ^(TOK_SHOW_TBLPROPERTIES tableName $prptyName?)
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
index a2fbc11..0affe84 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
@@ -65,6 +65,7 @@ public final class SemanticAnalyzerFactory {
commandType.put(HiveParser.TOK_SHOWCOLUMNS, HiveOperation.SHOWCOLUMNS);
commandType.put(HiveParser.TOK_SHOW_TABLESTATUS, HiveOperation.SHOW_TABLESTATUS);
commandType.put(HiveParser.TOK_SHOW_TBLPROPERTIES, HiveOperation.SHOW_TBLPROPERTIES);
+ commandType.put(HiveParser.TOK_SHOW_CREATEDATABASE, HiveOperation.SHOW_CREATEDATABASE);
commandType.put(HiveParser.TOK_SHOW_CREATETABLE, HiveOperation.SHOW_CREATETABLE);
commandType.put(HiveParser.TOK_SHOWFUNCTIONS, HiveOperation.SHOWFUNCTIONS);
commandType.put(HiveParser.TOK_SHOWINDEXES, HiveOperation.SHOWINDEXES);
@@ -227,6 +228,7 @@ public final class SemanticAnalyzerFactory {
case HiveParser.TOK_SHOWCOLUMNS:
case HiveParser.TOK_SHOW_TABLESTATUS:
case HiveParser.TOK_SHOW_TBLPROPERTIES:
+ case HiveParser.TOK_SHOW_CREATEDATABASE:
case HiveParser.TOK_SHOW_CREATETABLE:
case HiveParser.TOK_SHOWFUNCTIONS:
case HiveParser.TOK_SHOWPARTITIONS:
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
index 8dbb3c1..a4c3db1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
@@ -57,6 +57,7 @@ public class DDLWork implements Serializable {
private ShowTxnsDesc showTxnsDesc;
private DescFunctionDesc descFunctionDesc;
private ShowPartitionsDesc showPartsDesc;
+ private ShowCreateDatabaseDesc showCreateDbDesc;
private ShowCreateTableDesc showCreateTblDesc;
private DescTableDesc descTblDesc;
private AddPartitionDesc addPartitionDesc;
@@ -367,6 +368,16 @@ public class DDLWork implements Serializable {
}
/**
+ * @param showCreateDbDesc
+ */
+ public DDLWork(HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs,
+ ShowCreateDatabaseDesc showCreateDbDesc) {
+ this(inputs, outputs);
+
+ this.showCreateDbDesc = showCreateDbDesc;
+ }
+
+ /**
* @param showCreateTblDesc
*/
public DDLWork(HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs,
@@ -832,6 +843,16 @@ public class DDLWork implements Serializable {
this.showPartsDesc = showPartsDesc;
}
+ @Explain(displayName = "Show Create Database Operator",
+ explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
+ public ShowCreateDatabaseDesc getShowCreateDbDesc() {
+ return showCreateDbDesc;
+ }
+
+ public void setShowCreateDbDesc(ShowCreateDatabaseDesc showCreateDbDesc) {
+ this.showCreateDbDesc = showCreateDbDesc;
+ }
+
/**
* @return the showCreateTblDesc
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
index dee2136..af7e43e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
@@ -64,6 +64,7 @@ public enum HiveOperation {
SHOWCOLUMNS("SHOWCOLUMNS", null, null),
SHOW_TABLESTATUS("SHOW_TABLESTATUS", null, null),
SHOW_TBLPROPERTIES("SHOW_TBLPROPERTIES", null, null),
+ SHOW_CREATEDATABASE("SHOW_CREATEDATABASE", new Privilege[]{Privilege.SELECT}, null),
SHOW_CREATETABLE("SHOW_CREATETABLE", new Privilege[]{Privilege.SELECT}, null),
SHOWFUNCTIONS("SHOWFUNCTIONS", null, null),
SHOWINDEXES("SHOWINDEXES", null, null),
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/java/org/apache/hadoop/hive/ql/plan/ShowCreateDatabaseDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ShowCreateDatabaseDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ShowCreateDatabaseDesc.java
new file mode 100644
index 0000000..2b12691
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ShowCreateDatabaseDesc.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.hive.ql.plan.Explain.Level;
+
+
+/**
+ * ShowCreateDatabaseDesc.
+ *
+ */
+@Explain(displayName = "Show Create Database",
+ explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
+public class ShowCreateDatabaseDesc extends DDLDesc implements Serializable {
+ private static final long serialVersionUID = 1L;
+ String resFile;
+ String dbName;
+
+ /**
+ * thrift ddl for the result of showcreatedatabase.
+ */
+ private static final String schema = "createdb_stmt#string";
+
+ public String getSchema() {
+ return schema;
+ }
+
+ /**
+ * For serialization use only.
+ */
+ public ShowCreateDatabaseDesc() {
+ }
+
+ /**
+ * @param resFile
+ * @param dbName
+ * name of database to show
+ */
+ public ShowCreateDatabaseDesc(String dbName, String resFile) {
+ this.dbName = dbName;
+ this.resFile = resFile;
+ }
+
+ /**
+ * @return the resFile
+ */
+ @Explain(displayName = "result file", explainLevels = { Level.EXTENDED })
+ public String getResFile() {
+ return resFile;
+ }
+
+ /**
+ * @param resFile
+ * the resFile to set
+ */
+ public void setResFile(String resFile) {
+ this.resFile = resFile;
+ }
+
+ /**
+ * @return the databaseName
+ */
+ @Explain(displayName = "database name",
+ explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
+ public String getDatabaseName() {
+ return dbName;
+ }
+
+ /**
+ * @param databaseName
+ * the dbName to set
+ */
+ public void setDatabaseName(String dbName) {
+ this.dbName = dbName;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
index 71be469..5418e9a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
@@ -66,6 +66,7 @@ public enum HiveOperationType {
SHOWCOLUMNS,
SHOW_TABLESTATUS,
SHOW_TBLPROPERTIES,
+ SHOW_CREATEDATABASE,
SHOW_CREATETABLE,
SHOWFUNCTIONS,
SHOWINDEXES,
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
index 8e61d57..ca8f53f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
@@ -309,6 +309,8 @@ public class Operation2Privilege {
// for now require select WITH GRANT
op2Priv.put(HiveOperationType.SHOW_CREATETABLE, PrivRequirement.newIOPrivRequirement
(SEL_GRANT_AR, null));
+ op2Priv.put(HiveOperationType.SHOW_CREATEDATABASE, PrivRequirement.newIOPrivRequirement
+(SEL_GRANT_AR, null));
// for now allow only create-view with 'select with grant'
// the owner will also have select with grant privileges on new view
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/test/queries/clientpositive/show_create_database.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/show_create_database.q b/ql/src/test/queries/clientpositive/show_create_database.q
new file mode 100644
index 0000000..6136f23
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/show_create_database.q
@@ -0,0 +1,3 @@
+CREATE DATABASE some_database comment 'for show create db test' WITH DBPROPERTIES ('somekey'='somevalue');
+SHOW CREATE DATABASE some_database;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/test/results/clientpositive/show_create_database.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_create_database.q.out b/ql/src/test/results/clientpositive/show_create_database.q.out
new file mode 100644
index 0000000..4755d2d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/show_create_database.q.out
@@ -0,0 +1,19 @@
+PREHOOK: query: CREATE DATABASE some_database comment 'for show create db test' WITH DBPROPERTIES ('somekey'='somevalue')
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:some_database
+POSTHOOK: query: CREATE DATABASE some_database comment 'for show create db test' WITH DBPROPERTIES ('somekey'='somevalue')
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:some_database
+PREHOOK: query: SHOW CREATE DATABASE some_database
+PREHOOK: type: SHOW_CREATEDATABASE
+PREHOOK: Input: database:some_database
+POSTHOOK: query: SHOW CREATE DATABASE some_database
+POSTHOOK: type: SHOW_CREATEDATABASE
+POSTHOOK: Input: database:some_database
+CREATE DATABASE `some_database`
+COMMENT
+ 'for show create db test'
+LOCATION
+#### A masked pattern was here ####
+WITH DBPROPERTIES (
+ 'somekey'='somevalue')
http://git-wip-us.apache.org/repos/asf/hive/blob/3cf7bd9e/ql/src/test/results/clientpositive/tez/show_create_database.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/show_create_database.q.out b/ql/src/test/results/clientpositive/tez/show_create_database.q.out
new file mode 100644
index 0000000..4755d2d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/show_create_database.q.out
@@ -0,0 +1,19 @@
+PREHOOK: query: CREATE DATABASE some_database comment 'for show create db test' WITH DBPROPERTIES ('somekey'='somevalue')
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:some_database
+POSTHOOK: query: CREATE DATABASE some_database comment 'for show create db test' WITH DBPROPERTIES ('somekey'='somevalue')
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:some_database
+PREHOOK: query: SHOW CREATE DATABASE some_database
+PREHOOK: type: SHOW_CREATEDATABASE
+PREHOOK: Input: database:some_database
+POSTHOOK: query: SHOW CREATE DATABASE some_database
+POSTHOOK: type: SHOW_CREATEDATABASE
+POSTHOOK: Input: database:some_database
+CREATE DATABASE `some_database`
+COMMENT
+ 'for show create db test'
+LOCATION
+#### A masked pattern was here ####
+WITH DBPROPERTIES (
+ 'somekey'='somevalue')
[02/50] [abbrv] hive git commit: HIVE-11842: Improve RuleRegExp by
caching some internal data structures (Jesus Camacho Rodriguez,
reviewed by Sergey Shelukhin)
Posted by xu...@apache.org.
HIVE-11842: Improve RuleRegExp by caching some internal data structures (Jesus Camacho Rodriguez, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79244ab4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79244ab4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79244ab4
Branch: refs/heads/beeline-cli
Commit: 79244ab453823b8787b70a08f923e25c2abbd0bf
Parents: 8d524e0
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Sep 17 17:46:55 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Sep 17 17:46:55 2015 +0100
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/lib/RuleRegExp.java | 61 ++++++++++++++++----
1 file changed, 51 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/79244ab4/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
index fd5f133..1e850d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
@@ -19,7 +19,9 @@
package org.apache.hadoop.hive.ql.lib;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.HashSet;
+import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.regex.Matcher;
@@ -125,6 +127,12 @@ public class RuleRegExp implements Rule {
*/
private int costPatternWithoutWildCardChar(Stack<Node> stack) throws SemanticException {
int numElems = (stack != null ? stack.size() : 0);
+
+ // No elements
+ if (numElems == 0) {
+ return -1;
+ }
+
int patLen = patternWithoutWildCardChar.length();
StringBuilder name = new StringBuilder(patLen + numElems);
for (int pos = numElems - 1; pos >= 0; pos--) {
@@ -133,9 +141,8 @@ public class RuleRegExp implements Rule {
if (name.length() >= patLen) {
if (patternWithoutWildCardChar.contentEquals(name)) {
return patLen;
- } else {
- return -1;
}
+ break;
}
}
return -1;
@@ -152,20 +159,54 @@ public class RuleRegExp implements Rule {
*/
private int costPatternWithORWildCardChar(Stack<Node> stack) throws SemanticException {
int numElems = (stack != null ? stack.size() : 0);
+
+ // No elements
+ if (numElems == 0) {
+ return -1;
+ }
+
+ // These DS are used to cache previously created String
+ Map<Integer,String> cachedNames = new HashMap<Integer,String>();
+ int maxDepth = numElems;
+ int maxLength = 0;
+
+ // For every pattern
for (String pattern : patternORWildChar) {
int patLen = pattern.length();
- StringBuilder name = new StringBuilder(patLen + numElems);
- for (int pos = numElems - 1; pos >= 0; pos--) {
- String nodeName = stack.get(pos).getName() + "%";
- name.insert(0, nodeName);
- if (name.length() >= patLen) {
- if (pattern.contentEquals(name)) {
- return patLen;
- } else {
+ // If the stack has been explored already till that level,
+ // obtained cached String
+ if (cachedNames.containsKey(patLen)) {
+ if (pattern.contentEquals(cachedNames.get(patLen))) {
+ return patLen;
+ }
+ } else if (maxLength >= patLen) {
+ // We have already explored the stack deep enough, but
+ // we do not have a matching
+ continue;
+ } else {
+ // We are going to build the name
+ StringBuilder name = new StringBuilder(patLen + numElems);
+ if (maxLength != 0) {
+ name.append(cachedNames.get(maxLength));
+ }
+ for (int pos = maxDepth - 1; pos >= 0; pos--) {
+ String nodeName = stack.get(pos).getName() + "%";
+ name.insert(0, nodeName);
+
+ // We cache the values
+ cachedNames.put(name.length(), name.toString());
+ maxLength = name.length();
+ maxDepth--;
+
+ if (name.length() >= patLen) {
+ if (pattern.contentEquals(name)) {
+ return patLen;
+ }
break;
}
}
+
}
}
return -1;
[12/50] [abbrv] hive git commit: HIVE-11833 : TxnHandler heartbeat
txn doesn't need serializable DB txn level (Sergey Shelukhin,
reviewed by Alan Gates)
Posted by xu...@apache.org.
HIVE-11833 : TxnHandler heartbeat txn doesn't need serializable DB txn level (Sergey Shelukhin, reviewed by Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4c0fb13b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4c0fb13b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4c0fb13b
Branch: refs/heads/beeline-cli
Commit: 4c0fb13b1313f8cafe866105515978524a032c76
Parents: e9c8d7c
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Sep 18 13:42:18 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Sep 18 13:42:18 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/metastore/txn/TxnHandler.java | 61 ++++++++++++--------
1 file changed, 36 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4c0fb13b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 795b2d9..9ecb82a 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -621,7 +621,7 @@ public class TxnHandler {
try {
Connection dbConn = null;
try {
- dbConn = getDbConn(Connection.TRANSACTION_SERIALIZABLE);
+ dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
heartbeatLock(dbConn, ids.getLockid());
heartbeatTxn(dbConn, ids.getTxnid());
} catch (SQLException e) {
@@ -1727,32 +1727,17 @@ public class TxnHandler {
try {
stmt = dbConn.createStatement();
long now = getDbTime(dbConn);
- // We need to check whether this transaction is valid and open
- String s = "select txn_state from TXNS where txn_id = " + txnid;
- LOG.debug("Going to execute query <" + s + ">");
- ResultSet rs = stmt.executeQuery(s);
- if (!rs.next()) {
- s = "select count(*) from COMPLETED_TXN_COMPONENTS where CTC_TXNID = " + txnid;
- ResultSet rs2 = stmt.executeQuery(s);
- boolean alreadyCommitted = rs2.next() && rs2.getInt(1) > 0;
- LOG.debug("Going to rollback");
- dbConn.rollback();
- if(alreadyCommitted) {
- //makes the message more informative - helps to find bugs in client code
- throw new NoSuchTxnException("Transaction " + JavaUtils.txnIdToString(txnid) + " is already committed.");
- }
- throw new NoSuchTxnException("No such transaction " + JavaUtils.txnIdToString(txnid));
- }
- if (rs.getString(1).charAt(0) == TXN_ABORTED) {
- LOG.debug("Going to rollback");
+ ensureValidTxn(dbConn, txnid, stmt);
+ String s = "update TXNS set txn_last_heartbeat = " + now +
+ " where txn_id = " + txnid + " and txn_state = '" + TXN_OPEN + "'";
+ LOG.debug("Going to execute update <" + s + ">");
+ int rc = stmt.executeUpdate(s);
+ if (rc < 1) {
+ ensureValidTxn(dbConn, txnid, stmt); // This should now throw some useful exception.
+ LOG.warn("Can neither heartbeat txn nor confirm it as invalid.");
dbConn.rollback();
- throw new TxnAbortedException("Transaction " + JavaUtils.txnIdToString(txnid) +
- " already aborted");//todo: add time of abort, which is not currently tracked
+ throw new NoSuchTxnException("No such txn: " + txnid);
}
- s = "update TXNS set txn_last_heartbeat = " + now +
- " where txn_id = " + txnid;
- LOG.debug("Going to execute update <" + s + ">");
- stmt.executeUpdate(s);
LOG.debug("Going to commit");
dbConn.commit();
} finally {
@@ -1760,6 +1745,32 @@ public class TxnHandler {
}
}
+ private static void ensureValidTxn(Connection dbConn, long txnid, Statement stmt)
+ throws SQLException, NoSuchTxnException, TxnAbortedException {
+ // We need to check whether this transaction is valid and open
+ String s = "select txn_state from TXNS where txn_id = " + txnid;
+ LOG.debug("Going to execute query <" + s + ">");
+ ResultSet rs = stmt.executeQuery(s);
+ if (!rs.next()) {
+ s = "select count(*) from COMPLETED_TXN_COMPONENTS where CTC_TXNID = " + txnid;
+ ResultSet rs2 = stmt.executeQuery(s);
+ boolean alreadyCommitted = rs2.next() && rs2.getInt(1) > 0;
+ LOG.debug("Going to rollback");
+ dbConn.rollback();
+ if(alreadyCommitted) {
+ //makes the message more informative - helps to find bugs in client code
+ throw new NoSuchTxnException("Transaction " + JavaUtils.txnIdToString(txnid) + " is already committed.");
+ }
+ throw new NoSuchTxnException("No such transaction " + JavaUtils.txnIdToString(txnid));
+ }
+ if (rs.getString(1).charAt(0) == TXN_ABORTED) {
+ LOG.debug("Going to rollback");
+ dbConn.rollback();
+ throw new TxnAbortedException("Transaction " + JavaUtils.txnIdToString(txnid) +
+ " already aborted");//todo: add time of abort, which is not currently tracked
+ }
+ }
+
// NEVER call this function without first calling heartbeat(long, long)
private long getTxnIdFromLockId(Connection dbConn, long extLockId)
throws NoSuchLockException, MetaException, SQLException {
[35/50] [abbrv] hive git commit: HIVE-11468: Vectorize Struct IN()
clauses (Matt McCline, via Gopal V)
Posted by xu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/test/results/clientpositive/vector_struct_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_struct_in.q.out b/ql/src/test/results/clientpositive/vector_struct_in.q.out
new file mode 100644
index 0000000..c78b428
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_struct_in.q.out
@@ -0,0 +1,825 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+-- 2 Strings
+create table test_1 (`id` string, `lineid` string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+-- 2 Strings
+create table test_1 (`id` string, `lineid` string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_1
+PREHOOK: query: insert into table test_1 values ('one','1'), ('seven','1')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test_1
+POSTHOOK: query: insert into table test_1 values ('one','1'), ('seven','1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test_1
+POSTHOOK: Lineage: test_1.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test_1.lineid SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_1
+ Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), lineid (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_1
+#### A masked pattern was here ####
+one 1
+seven 1
+PREHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_1
+ Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), lineid (type: string), (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_1
+#### A masked pattern was here ####
+POSTHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_1
+#### A masked pattern was here ####
+one 1 true
+seven 1 true
+PREHOOK: query: -- 2 Integers
+create table test_2 (`id` int, `lineid` int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_2
+POSTHOOK: query: -- 2 Integers
+create table test_2 (`id` int, `lineid` int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_2
+PREHOOK: query: insert into table test_2 values (1,1), (7,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@test_2
+POSTHOOK: query: insert into table test_2 values (1,1), (7,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@test_2
+POSTHOOK: Lineage: test_2.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test_2.lineid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_2
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), lineid (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_2
+#### A masked pattern was here ####
+1 1
+7 1
+PREHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_2
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), lineid (type: int), (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_2
+#### A masked pattern was here ####
+POSTHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_2
+#### A masked pattern was here ####
+1 1 true
+7 1 true
+PREHOOK: query: -- 1 String and 1 Integer
+create table test_3 (`id` string, `lineid` int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_3
+POSTHOOK: query: -- 1 String and 1 Integer
+create table test_3 (`id` string, `lineid` int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_3
+PREHOOK: query: insert into table test_3 values ('one',1), ('seven',1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@test_3
+POSTHOOK: query: insert into table test_3 values ('one',1), ('seven',1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@test_3
+POSTHOOK: Lineage: test_3.id SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test_3.lineid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_3
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), lineid (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_3
+#### A masked pattern was here ####
+one 1
+seven 1
+PREHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_3
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), lineid (type: int), (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_3
+#### A masked pattern was here ####
+POSTHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_3
+#### A masked pattern was here ####
+one 1 true
+seven 1 true
+PREHOOK: query: -- 1 Integer and 1 String and 1 Double
+create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_4
+POSTHOOK: query: -- 1 Integer and 1 String and 1 Double
+create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_4
+PREHOOK: query: insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@test_4
+POSTHOOK: query: insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@test_4
+POSTHOOK: Lineage: test_4.my_bigint EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test_4.my_double EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test_4.my_string SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_4
+ Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_4
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_4
+#### A masked pattern was here ####
+1 a 0.5
+PREHOOK: query: explain
+select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_4
+ Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double), (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_4
+#### A masked pattern was here ####
+POSTHOOK: query: select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_4
+#### A masked pattern was here ####
+1 a 0.5 true
+1 b 1.5 false
+2 b 1.5 false
[28/50] [abbrv] hive git commit: Merge branch 'master' of
https://git-wip-us.apache.org/repos/asf/hive
Posted by xu...@apache.org.
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/451381cf
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/451381cf
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/451381cf
Branch: refs/heads/beeline-cli
Commit: 451381cfb28d1b9ff2775d55d92924956b3a5cdb
Parents: 0679078 5238303
Author: Dmitry Tolpeko <dm...@gmail.com>
Authored: Tue Sep 22 06:41:46 2015 -0700
Committer: Dmitry Tolpeko <dm...@gmail.com>
Committed: Tue Sep 22 06:41:46 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ant/QTestGenTask.java | 11 +
beeline/src/main/resources/beeline-log4j2.xml | 5 +-
bin/beeline | 5 +
bin/ext/hbaseimport.cmd | 35 +
bin/ext/hbaseimport.sh | 27 +
bin/ext/hbaseschematool.sh | 27 +
.../apache/hadoop/hive/common/ObjectPair.java | 5 +
.../hadoop/hive/common/jsonexplain/tez/Op.java | 8 +-
.../hive/common/jsonexplain/tez/Stage.java | 14 +-
.../common/jsonexplain/tez/TezJsonParser.java | 17 +-
.../org/apache/hadoop/hive/conf/HiveConf.java | 111 +-
.../apache/hadoop/hive/ql/log/PerfLogger.java | 196 +
.../apache/hive/common/util/BloomFilter.java | 20 +-
common/src/main/resources/hive-log4j2.xml | 5 +-
.../test/resources/hive-exec-log4j2-test.xml | 5 +-
common/src/test/resources/hive-log4j2-test.xml | 5 +-
data/conf/hive-log4j2.xml | 5 +-
data/conf/tez/hive-site.xml | 10 +
data/files/dynpartdata1.txt | 5 +
data/files/dynpartdata2.txt | 6 +
.../HiveHBaseTableSnapshotInputFormat.java | 21 +-
.../queries/positive/hbase_handler_snapshot.q | 4 +
.../positive/hbase_handler_snapshot.q.out | 22 +
.../deployers/config/hive/hive-log4j2.xml | 5 +-
.../svr/src/main/config/webhcat-log4j2.xml | 5 +-
.../benchmark/serde/LazySimpleSerDeBench.java | 453 +
.../hive/thrift/TestHadoop20SAuthBridge.java | 420 -
.../hive/thrift/TestHadoopAuthBridge23.java | 423 +
itests/hive-unit/pom.xml | 35 +
.../hadoop/hive/metastore/TestAdminUser.java | 4 +-
.../hive/metastore/TestHiveMetaStore.java | 3 +
.../metastore/hbase/HBaseIntegrationTests.java | 117 +
.../TestHBaseAggrStatsCacheIntegration.java | 691 +
.../hive/metastore/hbase/TestHBaseImport.java | 650 +
.../metastore/hbase/TestHBaseMetastoreSql.java | 223 +
.../hbase/TestHBaseStoreIntegration.java | 1794 +
.../hbase/TestStorageDescriptorSharing.java | 191 +
.../hive/ql/security/FolderPermissionBase.java | 17 +-
itests/qtest/pom.xml | 10 +-
.../test/resources/testconfiguration.properties | 3 +
itests/util/pom.xml | 32 +
.../metastore/hbase/HBaseStoreTestUtil.java | 45 +
.../org/apache/hadoop/hive/ql/QTestUtil.java | 50 +-
.../hive/jdbc/ZooKeeperHiveClientHelper.java | 32 +-
metastore/if/hive_metastore.thrift | 54 +
metastore/pom.xml | 82 +
.../metastore/hbase/HbaseMetastoreProto.java | 34901 +++++++++++++++++
.../gen/thrift/gen-cpp/ThriftHiveMetastore.cpp | 6919 ++--
.../gen/thrift/gen-cpp/ThriftHiveMetastore.h | 664 +
.../ThriftHiveMetastore_server.skeleton.cpp | 25 +
.../gen/thrift/gen-cpp/hive_metastore_types.cpp | 1294 +-
.../gen/thrift/gen-cpp/hive_metastore_types.h | 371 +
.../hive/metastore/api/AbortTxnRequest.java | 2 +-
.../metastore/api/AddDynamicPartitions.java | 2 +-
.../metastore/api/AddPartitionsRequest.java | 2 +-
.../hive/metastore/api/AddPartitionsResult.java | 2 +-
.../hadoop/hive/metastore/api/AggrStats.java | 2 +-
.../metastore/api/AlreadyExistsException.java | 2 +-
.../metastore/api/BinaryColumnStatsData.java | 2 +-
.../metastore/api/BooleanColumnStatsData.java | 2 +-
.../hive/metastore/api/CheckLockRequest.java | 2 +-
.../metastore/api/ClearFileMetadataRequest.java | 438 +
.../metastore/api/ClearFileMetadataResult.java | 283 +
.../hive/metastore/api/ColumnStatistics.java | 2 +-
.../metastore/api/ColumnStatisticsDesc.java | 2 +-
.../hive/metastore/api/ColumnStatisticsObj.java | 2 +-
.../hive/metastore/api/CommitTxnRequest.java | 2 +-
.../hive/metastore/api/CompactionRequest.java | 2 +-
.../api/ConfigValSecurityException.java | 2 +-
.../api/CurrentNotificationEventId.java | 2 +-
.../hadoop/hive/metastore/api/Database.java | 2 +-
.../apache/hadoop/hive/metastore/api/Date.java | 2 +-
.../hive/metastore/api/DateColumnStatsData.java | 2 +-
.../hadoop/hive/metastore/api/Decimal.java | 2 +-
.../metastore/api/DecimalColumnStatsData.java | 2 +-
.../metastore/api/DoubleColumnStatsData.java | 2 +-
.../hive/metastore/api/DropPartitionsExpr.java | 2 +-
.../metastore/api/DropPartitionsRequest.java | 2 +-
.../metastore/api/DropPartitionsResult.java | 2 +-
.../hive/metastore/api/EnvironmentContext.java | 2 +-
.../hadoop/hive/metastore/api/FieldSchema.java | 2 +-
.../hive/metastore/api/FireEventRequest.java | 2 +-
.../hive/metastore/api/FireEventResponse.java | 2 +-
.../hadoop/hive/metastore/api/Function.java | 2 +-
.../metastore/api/GetAllFunctionsResponse.java | 38 +-
.../api/GetFileMetadataByExprRequest.java | 548 +
.../api/GetFileMetadataByExprResult.java | 703 +
.../metastore/api/GetFileMetadataRequest.java | 438 +
.../metastore/api/GetFileMetadataResult.java | 540 +
.../metastore/api/GetOpenTxnsInfoResponse.java | 2 +-
.../hive/metastore/api/GetOpenTxnsResponse.java | 2 +-
.../api/GetPrincipalsInRoleRequest.java | 2 +-
.../api/GetPrincipalsInRoleResponse.java | 2 +-
.../api/GetRoleGrantsForPrincipalRequest.java | 2 +-
.../api/GetRoleGrantsForPrincipalResponse.java | 2 +-
.../api/GrantRevokePrivilegeRequest.java | 2 +-
.../api/GrantRevokePrivilegeResponse.java | 2 +-
.../metastore/api/GrantRevokeRoleRequest.java | 2 +-
.../metastore/api/GrantRevokeRoleResponse.java | 2 +-
.../hive/metastore/api/HeartbeatRequest.java | 2 +-
.../metastore/api/HeartbeatTxnRangeRequest.java | 2 +-
.../api/HeartbeatTxnRangeResponse.java | 2 +-
.../hive/metastore/api/HiveObjectPrivilege.java | 2 +-
.../hive/metastore/api/HiveObjectRef.java | 2 +-
.../apache/hadoop/hive/metastore/api/Index.java | 2 +-
.../api/IndexAlreadyExistsException.java | 2 +-
.../metastore/api/InsertEventRequestData.java | 2 +-
.../metastore/api/InvalidInputException.java | 2 +-
.../metastore/api/InvalidObjectException.java | 2 +-
.../api/InvalidOperationException.java | 2 +-
.../api/InvalidPartitionException.java | 2 +-
.../hive/metastore/api/LockComponent.java | 2 +-
.../hadoop/hive/metastore/api/LockRequest.java | 2 +-
.../hadoop/hive/metastore/api/LockResponse.java | 2 +-
.../hive/metastore/api/LongColumnStatsData.java | 2 +-
.../hive/metastore/api/MetaException.java | 2 +-
.../hive/metastore/api/MetadataPpdResult.java | 508 +
.../hive/metastore/api/NoSuchLockException.java | 2 +-
.../metastore/api/NoSuchObjectException.java | 2 +-
.../hive/metastore/api/NoSuchTxnException.java | 2 +-
.../hive/metastore/api/NotificationEvent.java | 2 +-
.../metastore/api/NotificationEventRequest.java | 2 +-
.../api/NotificationEventResponse.java | 2 +-
.../hive/metastore/api/OpenTxnRequest.java | 2 +-
.../hive/metastore/api/OpenTxnsResponse.java | 2 +-
.../apache/hadoop/hive/metastore/api/Order.java | 2 +-
.../hadoop/hive/metastore/api/Partition.java | 2 +-
.../api/PartitionListComposingSpec.java | 2 +-
.../hive/metastore/api/PartitionSpec.java | 2 +-
.../api/PartitionSpecWithSharedSD.java | 2 +-
.../hive/metastore/api/PartitionWithoutSD.java | 2 +-
.../metastore/api/PartitionsByExprRequest.java | 2 +-
.../metastore/api/PartitionsByExprResult.java | 2 +-
.../metastore/api/PartitionsStatsRequest.java | 2 +-
.../metastore/api/PartitionsStatsResult.java | 2 +-
.../metastore/api/PrincipalPrivilegeSet.java | 2 +-
.../hadoop/hive/metastore/api/PrivilegeBag.java | 2 +-
.../hive/metastore/api/PrivilegeGrantInfo.java | 2 +-
.../metastore/api/PutFileMetadataRequest.java | 588 +
.../metastore/api/PutFileMetadataResult.java | 283 +
.../hadoop/hive/metastore/api/ResourceUri.java | 2 +-
.../apache/hadoop/hive/metastore/api/Role.java | 2 +-
.../hive/metastore/api/RolePrincipalGrant.java | 2 +-
.../hadoop/hive/metastore/api/Schema.java | 2 +-
.../hadoop/hive/metastore/api/SerDeInfo.java | 2 +-
.../api/SetPartitionsStatsRequest.java | 2 +-
.../hive/metastore/api/ShowCompactRequest.java | 2 +-
.../hive/metastore/api/ShowCompactResponse.java | 2 +-
.../api/ShowCompactResponseElement.java | 2 +-
.../hive/metastore/api/ShowLocksRequest.java | 2 +-
.../hive/metastore/api/ShowLocksResponse.java | 2 +-
.../metastore/api/ShowLocksResponseElement.java | 2 +-
.../hadoop/hive/metastore/api/SkewedInfo.java | 2 +-
.../hive/metastore/api/StorageDescriptor.java | 2 +-
.../metastore/api/StringColumnStatsData.java | 2 +-
.../apache/hadoop/hive/metastore/api/Table.java | 2 +-
.../hive/metastore/api/TableStatsRequest.java | 2 +-
.../hive/metastore/api/TableStatsResult.java | 2 +-
.../hive/metastore/api/ThriftHiveMetastore.java | 8422 ++--
.../hive/metastore/api/TxnAbortedException.java | 2 +-
.../hadoop/hive/metastore/api/TxnInfo.java | 2 +-
.../hive/metastore/api/TxnOpenException.java | 2 +-
.../apache/hadoop/hive/metastore/api/Type.java | 2 +-
.../hive/metastore/api/UnknownDBException.java | 2 +-
.../api/UnknownPartitionException.java | 2 +-
.../metastore/api/UnknownTableException.java | 2 +-
.../hive/metastore/api/UnlockRequest.java | 2 +-
.../hadoop/hive/metastore/api/Version.java | 2 +-
.../gen-php/metastore/ThriftHiveMetastore.php | 2810 +-
.../src/gen/thrift/gen-php/metastore/Types.php | 1009 +-
.../hive_metastore/ThriftHiveMetastore-remote | 49 +-
.../hive_metastore/ThriftHiveMetastore.py | 1563 +-
.../gen/thrift/gen-py/hive_metastore/ttypes.py | 734 +-
.../gen/thrift/gen-rb/hive_metastore_types.rb | 167 +
.../gen/thrift/gen-rb/thrift_hive_metastore.rb | 267 +
.../hadoop/hive/metastore/HiveAlterHandler.java | 38 +-
.../hadoop/hive/metastore/HiveMetaStore.java | 272 +-
.../hive/metastore/HiveMetaStoreClient.java | 112 +-
.../hadoop/hive/metastore/IMetaStoreClient.java | 6 +
.../hadoop/hive/metastore/ObjectStore.java | 437 +-
.../hive/metastore/PartFilterExprUtil.java | 149 +
.../apache/hadoop/hive/metastore/RawStore.java | 66 +-
.../hadoop/hive/metastore/RawStoreProxy.java | 5 +-
.../hive/metastore/RetryingHMSHandler.java | 33 +-
.../hbase/AggrStatsInvalidatorFilter.java | 121 +
.../hadoop/hive/metastore/hbase/Counter.java | 53 +
.../hive/metastore/hbase/HBaseConnection.java | 96 +
.../metastore/hbase/HBaseFilterPlanUtil.java | 612 +
.../hive/metastore/hbase/HBaseImport.java | 535 +
.../hive/metastore/hbase/HBaseReadWrite.java | 2106 +
.../hive/metastore/hbase/HBaseSchemaTool.java | 239 +
.../hadoop/hive/metastore/hbase/HBaseStore.java | 2387 ++
.../hadoop/hive/metastore/hbase/HBaseUtils.java | 1340 +
.../hive/metastore/hbase/ObjectCache.java | 81 +
.../hive/metastore/hbase/PartitionCache.java | 168 +
.../metastore/hbase/PartitionKeyComparator.java | 292 +
.../hbase/SharedStorageDescriptor.java | 251 +
.../hadoop/hive/metastore/hbase/StatsCache.java | 326 +
.../metastore/hbase/TephraHBaseConnection.java | 127 +
.../metastore/hbase/VanillaHBaseConnection.java | 137 +
.../stats/BinaryColumnStatsAggregator.java | 35 +
.../stats/BooleanColumnStatsAggregator.java | 35 +
.../hbase/stats/ColumnStatsAggregator.java | 26 +
.../stats/ColumnStatsAggregatorFactory.java | 94 +
.../stats/DecimalColumnStatsAggregator.java | 43 +
.../stats/DoubleColumnStatsAggregator.java | 36 +
.../hbase/stats/LongColumnStatsAggregator.java | 36 +
.../stats/StringColumnStatsAggregator.java | 36 +
.../hive/metastore/parser/ExpressionTree.java | 9 +-
.../hive/metastore/tools/HiveMetaTool.java | 5 +
.../hadoop/hive/metastore/txn/TxnHandler.java | 61 +-
.../metastore/hbase/hbase_metastore_proto.proto | 282 +
.../DummyRawStoreControlledCommit.java | 56 +-
.../DummyRawStoreForJdoConnection.java | 50 +-
.../hadoop/hive/metastore/TestObjectStore.java | 43 +-
.../hadoop/hive/metastore/hbase/MockUtils.java | 199 +
.../hbase/TestHBaseAggregateStatsCache.java | 316 +
.../hbase/TestHBaseFilterPlanUtil.java | 483 +
.../hive/metastore/hbase/TestHBaseStore.java | 1307 +
.../metastore/hbase/TestHBaseStoreCached.java | 378 +
.../hbase/TestSharedStorageDescriptor.java | 153 +
pom.xml | 25 +-
.../hadoop/hive/ql/plan/api/Adjacency.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Graph.java | 2 +-
.../hadoop/hive/ql/plan/api/Operator.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Query.java | 2 +-
.../hadoop/hive/ql/plan/api/QueryPlan.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Stage.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Task.java | 2 +-
.../java/org/apache/hadoop/hive/ql/Driver.java | 17 +-
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 333 +-
.../apache/hadoop/hive/ql/exec/ExplainTask.java | 24 +-
.../hadoop/hive/ql/exec/KeyWrapperFactory.java | 4 +
.../hadoop/hive/ql/exec/MapJoinOperator.java | 8 +-
.../apache/hadoop/hive/ql/exec/MoveTask.java | 31 +-
.../apache/hadoop/hive/ql/exec/Operator.java | 3 +-
.../ql/exec/SparkHashTableSinkOperator.java | 3 +-
.../hadoop/hive/ql/exec/StatsNoJobTask.java | 25 +-
.../apache/hadoop/hive/ql/exec/StatsTask.java | 13 +-
.../apache/hadoop/hive/ql/exec/Utilities.java | 11 +-
.../persistence/BytesBytesMultiHashMap.java | 11 +-
.../persistence/HybridHashTableContainer.java | 68 +-
.../ql/exec/persistence/PTFRowContainer.java | 14 +-
.../hive/ql/exec/persistence/RowContainer.java | 12 +-
.../hadoop/hive/ql/exec/spark/SparkPlan.java | 3 +-
.../hive/ql/exec/spark/SparkPlanGenerator.java | 3 +-
.../hive/ql/exec/spark/SparkRecordHandler.java | 3 +-
.../hadoop/hive/ql/exec/spark/SparkTask.java | 2 +-
.../ql/exec/spark/status/SparkJobMonitor.java | 2 +-
.../hadoop/hive/ql/exec/tez/DagUtils.java | 3 +
.../hive/ql/exec/tez/HashTableLoader.java | 7 +-
.../hadoop/hive/ql/exec/tez/InPlaceUpdates.java | 65 +
.../hive/ql/exec/tez/RecordProcessor.java | 3 +-
.../hive/ql/exec/tez/ReduceRecordProcessor.java | 1 -
.../hive/ql/exec/tez/ReduceRecordSource.java | 3 +-
.../hadoop/hive/ql/exec/tez/TezJobMonitor.java | 70 +-
.../hadoop/hive/ql/exec/tez/TezProcessor.java | 3 +-
.../apache/hadoop/hive/ql/exec/tez/TezTask.java | 20 +-
.../ql/exec/tez/tools/KeyValuesInputMerger.java | 1 -
.../ql/exec/vector/VectorizationContext.java | 10 +-
.../apache/hadoop/hive/ql/hooks/ATSHook.java | 9 +-
.../hadoop/hive/ql/hooks/LineageLogger.java | 95 +-
.../hive/ql/io/CombineHiveInputFormat.java | 10 +-
.../hadoop/hive/ql/io/HiveInputFormat.java | 5 +-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 151 +-
.../apache/hadoop/hive/ql/io/orc/OrcSerde.java | 1 +
.../apache/hadoop/hive/ql/io/orc/OrcStruct.java | 2 +-
.../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 4 +-
.../hive/ql/io/parquet/ProjectionPusher.java | 3 +-
.../serde/ParquetHiveArrayInspector.java | 12 +
.../ql/io/parquet/timestamp/NanoTimeUtils.java | 23 +-
.../hive/ql/io/sarg/ConvertAstToSearchArg.java | 4 +
.../apache/hadoop/hive/ql/lib/RuleRegExp.java | 61 +-
.../apache/hadoop/hive/ql/log/PerfLogger.java | 195 -
.../apache/hadoop/hive/ql/metadata/Hive.java | 34 +-
.../hadoop/hive/ql/metadata/Partition.java | 29 +-
.../ql/optimizer/ColumnPrunerProcFactory.java | 3 +
.../hive/ql/optimizer/ConvertJoinMapJoin.java | 22 +-
.../hive/ql/optimizer/GenMapRedUtils.java | 57 +-
.../ql/optimizer/ReduceSinkMapJoinProc.java | 19 +-
.../calcite/reloperators/HiveBetween.java | 75 +
.../optimizer/calcite/reloperators/HiveIn.java | 41 +
.../calcite/reloperators/HiveLimit.java | 57 -
.../calcite/reloperators/HiveSort.java | 110 -
.../calcite/reloperators/HiveSortLimit.java | 110 +
.../rules/HiveAggregateProjectMergeRule.java | 151 +
.../calcite/rules/HivePreFilteringRule.java | 37 +-
.../calcite/rules/HiveRelFieldTrimmer.java | 145 +-
.../calcite/stats/HiveRelMdMemory.java | 9 +-
.../calcite/stats/HiveRelMdParallelism.java | 4 +-
.../calcite/translator/ASTConverter.java | 24 +-
.../calcite/translator/HiveOpConverter.java | 8 +-
.../translator/PlanModifierForASTConv.java | 14 +-
.../translator/PlanModifierForReturnPath.java | 4 -
.../calcite/translator/PlanModifierUtil.java | 4 +-
.../translator/SqlFunctionConverter.java | 16 +-
.../hive/ql/optimizer/lineage/LineageCtx.java | 8 +-
.../hive/ql/optimizer/ppr/PartitionPruner.java | 7 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 28 +-
.../hive/ql/parse/DDLSemanticAnalyzer.java | 17 +
.../apache/hadoop/hive/ql/parse/HiveParser.g | 9 +-
.../apache/hadoop/hive/ql/parse/QBSubQuery.java | 7 -
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 4 +-
.../hive/ql/parse/SemanticAnalyzerFactory.java | 2 +
.../hadoop/hive/ql/parse/SubQueryUtils.java | 11 -
.../hive/ql/parse/spark/SparkCompiler.java | 3 +-
.../org/apache/hadoop/hive/ql/plan/DDLWork.java | 21 +
.../hadoop/hive/ql/plan/HiveOperation.java | 1 +
.../hive/ql/plan/ShowCreateDatabaseDesc.java | 94 +
.../AuthorizationPreEventListener.java | 2 +-
.../authorization/plugin/HiveOperationType.java | 1 +
.../plugin/sqlstd/Operation2Privilege.java | 2 +
.../sqlstd/SQLStdHiveAccessController.java | 5 +
.../hadoop/hive/ql/session/SessionState.java | 37 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 87 +-
.../org/apache/hadoop/hive/ql/udf/UDFJson.java | 2 +
.../hive/ql/udf/generic/GenericUDAFMax.java | 16 +-
ql/src/main/resources/hive-exec-log4j2.xml | 5 +-
ql/src/main/resources/tez-container-log4j2.xml | 5 +-
.../hive/metastore/TestMetastoreExpr.java | 2 +-
.../persistence/TestBytesBytesMultiHashMap.java | 3 +
.../ql/exec/persistence/TestHashPartition.java | 29 +
.../exec/persistence/TestPTFRowContainer.java | 31 +-
.../hadoop/hive/ql/io/orc/TestOrcStruct.java | 2 +
.../serde/TestParquetTimestampUtils.java | 38 +-
...nMapRedUtilsUsePartitionColumnsNegative.java | 73 +
...nMapRedUtilsUsePartitionColumnsPositive.java | 61 +
.../authorization_set_show_current_role.q | 3 +
.../clientpositive/bucket_map_join_tez1.q | 31 +
.../queries/clientpositive/cbo_rp_auto_join17.q | 14 +
.../cbo_rp_cross_product_check_2.q | 31 +
.../clientpositive/drop_table_with_index.q | 35 +
.../test/queries/clientpositive/dynpart_merge.q | 28 +
.../dynpart_sort_opt_vectorization.q | 2 +
.../clientpositive/dynpart_sort_optimization.q | 2 +
.../queries/clientpositive/exchgpartition2lel.q | 32 +
ql/src/test/queries/clientpositive/lineage3.q | 26 +
.../test/queries/clientpositive/load_orc_part.q | 5 +
.../parquet_mixed_partition_formats.q | 42 +
.../clientpositive/parquet_ppd_boolean.q | 42 +-
.../queries/clientpositive/parquet_ppd_char.q | 46 +-
.../queries/clientpositive/parquet_ppd_date.q | 64 +-
.../clientpositive/parquet_ppd_decimal.q | 106 +-
.../clientpositive/parquet_ppd_timestamp.q | 62 +-
.../clientpositive/parquet_ppd_varchar.q | 46 +-
.../clientpositive/parquet_predicate_pushdown.q | 20 +-
.../clientpositive/show_create_database.q | 3 +
.../queries/clientpositive/subquery_views.q | 22 +-
.../queries/clientpositive/union_fast_stats.q | 68 +
.../queries/clientpositive/vector_char_cast.q | 9 +
.../queries/clientpositive/windowing_udaf.q | 4 +
.../subquery_exists_implicit_gby.q.out | 8 +-
.../subquery_nested_subquery.q.out | 4 +-
.../subquery_notexists_implicit_gby.q.out | 8 +-
.../subquery_windowing_corr.q.out | 7 +-
.../alter_partition_coltype.q.out | 8 +-
.../clientpositive/annotate_stats_groupby.q.out | 106 +-
.../annotate_stats_groupby2.q.out | 28 +-
.../authorization_explain.q.java1.7.out | 2 +-
.../authorization_explain.q.java1.8.out | 2 +-
.../authorization_set_show_current_role.q.out | 8 +
.../results/clientpositive/auto_join18.q.out | 12 +-
.../auto_join18_multi_distinct.q.out | 12 +-
.../results/clientpositive/auto_join27.q.out | 18 +-
.../results/clientpositive/auto_join32.q.out | 4 +-
.../clientpositive/binarysortable_1.q.out | Bin 4329 -> 4325 bytes
.../clientpositive/cbo_rp_auto_join17.q.out | 118 +
.../cbo_rp_cross_product_check_2.q.out | 699 +
.../clientpositive/correlationoptimizer2.q.out | 220 +-
.../clientpositive/correlationoptimizer6.q.out | 232 +-
ql/src/test/results/clientpositive/count.q.out | 14 +-
.../results/clientpositive/ctas_colname.q.out | 52 +-
.../test/results/clientpositive/database.q.out | 2 +-
.../clientpositive/decimal_precision.q.out | 4 +-
.../results/clientpositive/decimal_udf.q.out | 30 +-
.../results/clientpositive/distinct_stats.q.out | 14 +-
.../clientpositive/drop_table_with_index.q.out | 152 +
.../results/clientpositive/dynpart_merge.q.out | 99 +
.../dynpart_sort_opt_vectorization.q.out | 117 +-
.../dynpart_sort_optimization.q.out | 117 +-
...ryption_select_read_only_encrypted_tbl.q.out | 4 +-
.../clientpositive/exchgpartition2lel.q.out | 182 +
.../clientpositive/explain_dependency.q.out | 18 +-
.../clientpositive/explain_dependency2.q.out | 16 +-
.../clientpositive/explain_logical.q.out | 78 +-
.../clientpositive/fetch_aggregation.q.out | 4 +-
.../test/results/clientpositive/gby_star.q.out | 54 +-
.../test/results/clientpositive/groupby12.q.out | 6 +-
.../results/clientpositive/groupby5_map.q.out | 4 +-
.../clientpositive/groupby5_map_skew.q.out | 4 +-
.../results/clientpositive/groupby_cube1.q.out | 12 +-
.../groupby_distinct_samekey.q.out | 6 +-
.../clientpositive/groupby_grouping_sets2.q.out | 10 +-
.../clientpositive/groupby_grouping_sets3.q.out | 12 +-
.../clientpositive/groupby_grouping_sets5.q.out | 8 +-
.../clientpositive/groupby_grouping_sets6.q.out | 8 +-
.../clientpositive/groupby_position.q.out | 36 +-
.../clientpositive/groupby_resolution.q.out | 60 +-
.../clientpositive/groupby_rollup1.q.out | 12 +-
.../clientpositive/groupby_sort_10.q.out | 8 +-
.../clientpositive/groupby_sort_11.q.out | 10 +-
.../results/clientpositive/groupby_sort_8.q.out | 12 +-
ql/src/test/results/clientpositive/having.q.out | 62 +-
.../test/results/clientpositive/having2.q.out | 12 +-
.../clientpositive/index_auto_mult_tables.q.out | 12 +-
.../clientpositive/index_auto_self_join.q.out | 12 +-
.../clientpositive/index_auto_update.q.out | 6 +-
.../index_bitmap_auto_partitioned.q.out | 6 +-
.../index_bitmap_compression.q.out | 6 +-
.../infer_bucket_sort_dyn_part.q.out | 4 +-
.../infer_bucket_sort_map_operators.q.out | 4 +-
.../results/clientpositive/input4.q.java1.7.out | 2 +-
.../results/clientpositive/input4.q.java1.8.out | 2 +-
.../results/clientpositive/join0.q.java1.7.out | 2 +-
.../results/clientpositive/join0.q.java1.8.out | 4 +-
ql/src/test/results/clientpositive/join18.q.out | 12 +-
.../clientpositive/join18_multi_distinct.q.out | 12 +-
ql/src/test/results/clientpositive/join31.q.out | 36 +-
.../limit_partition_metadataonly.q.out | 4 +-
.../results/clientpositive/limit_pushdown.q.out | 36 +-
.../test/results/clientpositive/lineage2.q.out | 2 +-
.../test/results/clientpositive/lineage3.q.out | 72 +-
.../list_bucket_dml_6.q.java1.7.out | 12 +-
.../list_bucket_dml_6.q.java1.8.out | 12 +-
.../clientpositive/list_bucket_dml_7.q.out | 12 +-
.../list_bucket_query_multiskew_3.q.out | 2 +-
.../results/clientpositive/load_orc_part.q.out | 26 +
.../clientpositive/mapjoin_mapjoin.q.out | 32 +-
.../clientpositive/metadata_only_queries.q.out | 4 +-
.../results/clientpositive/metadataonly1.q.out | 112 +-
.../results/clientpositive/multiMapJoin2.q.out | 226 +-
.../nonblock_op_deduplicate.q.out | 8 +-
.../results/clientpositive/nonmr_fetch.q.out | 14 +-
.../results/clientpositive/parallel_join0.q.out | 2 +-
.../parquet_mixed_partition_formats.q.out | 303 +
.../clientpositive/parquet_ppd_boolean.q.out | 194 +-
.../clientpositive/parquet_ppd_char.q.out | 224 +-
.../clientpositive/parquet_ppd_date.q.out | 324 +-
.../clientpositive/parquet_ppd_decimal.q.out | 594 +-
.../clientpositive/parquet_ppd_timestamp.q.out | 314 +-
.../clientpositive/parquet_ppd_varchar.q.out | 224 +-
.../parquet_predicate_pushdown.q.out | 118 +-
.../clientpositive/partition_multilevels.q.out | 8 +-
.../clientpositive/plan_json.q.java1.7.out | 2 +-
.../clientpositive/plan_json.q.java1.8.out | 2 +-
.../test/results/clientpositive/ppd_gby.q.out | 12 +-
.../test/results/clientpositive/ppd_gby2.q.out | 60 +-
.../clientpositive/ppd_join_filter.q.out | 98 +-
.../ql_rewrite_gbtoidx_cbo_1.q.out | 168 +-
.../ql_rewrite_gbtoidx_cbo_2.q.out | 94 +-
.../reduce_deduplicate_extended.q.out | 32 +-
.../clientpositive/selectDistinctStar.q.out | 44 +-
.../clientpositive/show_create_database.q.out | 19 +
.../clientpositive/spark/auto_join18.q.out | 10 +-
.../spark/auto_join18_multi_distinct.q.out | 12 +-
.../clientpositive/spark/auto_join27.q.out | 18 +-
.../clientpositive/spark/auto_join32.q.out | 53 +-
.../spark/bucket_map_join_tez1.q.out | 357 +
.../results/clientpositive/spark/count.q.out | 14 +-
.../clientpositive/spark/groupby5_map.q.out | 4 +-
.../spark/groupby5_map_skew.q.out | 4 +-
.../clientpositive/spark/groupby_cube1.q.out | 12 +-
.../clientpositive/spark/groupby_position.q.out | 18 +-
.../spark/groupby_resolution.q.out | 60 +-
.../clientpositive/spark/groupby_rollup1.q.out | 12 +-
.../results/clientpositive/spark/having.q.out | 62 +-
.../spark/infer_bucket_sort_map_operators.q.out | 4 +-
.../results/clientpositive/spark/join18.q.out | 10 +-
.../spark/join18_multi_distinct.q.out | 12 +-
.../results/clientpositive/spark/join31.q.out | 36 +-
.../spark/limit_partition_metadataonly.q.out | 4 +-
.../clientpositive/spark/limit_pushdown.q.out | 34 +-
.../clientpositive/spark/mapjoin_mapjoin.q.out | 24 +-
.../spark/metadata_only_queries.q.out | 4 +-
.../clientpositive/spark/ppd_join_filter.q.out | 90 +-
.../spark/ql_rewrite_gbtoidx_cbo_1.q.out | 168 +-
.../clientpositive/spark/stats_only_null.q.out | 8 +-
.../clientpositive/spark/subquery_in.q.out | 36 +-
.../results/clientpositive/spark/union11.q.out | 42 +-
.../results/clientpositive/spark/union14.q.out | 28 +-
.../results/clientpositive/spark/union15.q.out | 28 +-
.../results/clientpositive/spark/union28.q.out | 4 +-
.../results/clientpositive/spark/union30.q.out | 4 +-
.../results/clientpositive/spark/union33.q.out | 8 +-
.../results/clientpositive/spark/union5.q.out | 34 +-
.../results/clientpositive/spark/union7.q.out | 28 +-
.../clientpositive/spark/union_remove_21.q.out | 4 +-
.../spark/vector_count_distinct.q.out | 4 +-
.../spark/vector_decimal_aggregate.q.out | 12 +-
.../spark/vector_distinct_2.q.out | 28 +-
.../clientpositive/spark/vector_groupby_3.q.out | 30 +-
.../spark/vector_mapjoin_reduce.q.out | 36 +-
.../clientpositive/spark/vector_orderby_5.q.out | 6 +-
.../clientpositive/spark/vectorization_0.q.out | 16 +-
.../clientpositive/spark/vectorization_13.q.out | 32 +-
.../clientpositive/spark/vectorization_15.q.out | 16 +-
.../clientpositive/spark/vectorization_16.q.out | 16 +-
.../clientpositive/spark/vectorization_9.q.out | 16 +-
.../spark/vectorization_pushdown.q.out | 4 +-
.../spark/vectorization_short_regress.q.out | 74 +-
.../spark/vectorized_nested_mapjoin.q.out | 18 +-
.../spark/vectorized_timestamp_funcs.q.out | 12 +-
.../clientpositive/stats_only_null.q.out | 8 +-
.../results/clientpositive/stats_ppr_all.q.out | 16 +-
.../subq_where_serialization.q.out | 18 +-
.../clientpositive/subquery_exists_having.q.out | 48 +-
.../results/clientpositive/subquery_in.q.out | 36 +-
.../clientpositive/subquery_in_having.q.out | 260 +-
.../clientpositive/subquery_notexists.q.out | 18 +-
.../subquery_notexists_having.q.out | 26 +-
.../results/clientpositive/subquery_notin.q.out | 24 +-
.../subquery_notin_having.q.java1.7.out | 50 +-
.../subquery_unqualcolumnrefs.q.out | 74 +-
.../results/clientpositive/subquery_views.q.out | 124 +-
.../tez/bucket_map_join_tez1.q.out | 333 +
.../clientpositive/tez/constprog_dpp.q.out | 4 +-
.../test/results/clientpositive/tez/count.q.out | 14 +-
.../tez/dynamic_partition_pruning.q.out | 88 +-
.../tez/dynpart_sort_opt_vectorization.q.out | 102 +-
.../tez/dynpart_sort_optimization.q.out | 101 +-
.../clientpositive/tez/explainuser_1.q.out | 2799 +-
.../clientpositive/tez/explainuser_2.q.out | 4004 +-
.../clientpositive/tez/explainuser_3.q.out | 10 +-
.../results/clientpositive/tez/having.q.out | 62 +-
.../clientpositive/tez/limit_pushdown.q.out | 34 +-
.../clientpositive/tez/mapjoin_mapjoin.q.out | 24 +-
.../tez/metadata_only_queries.q.out | 4 +-
.../clientpositive/tez/metadataonly1.q.out | 44 +-
.../test/results/clientpositive/tez/mrr.q.out | 94 +-
.../clientpositive/tez/selectDistinctStar.q.out | 44 +-
.../tez/show_create_database.q.out | 19 +
.../clientpositive/tez/stats_only_null.q.out | 8 +-
.../clientpositive/tez/subquery_in.q.out | 36 +-
.../results/clientpositive/tez/tez_dml.q.out | 6 +-
.../results/clientpositive/tez/union5.q.out | 44 +-
.../results/clientpositive/tez/union7.q.out | 28 +-
.../clientpositive/tez/unionDistinct_1.q.out | 8 +-
.../clientpositive/tez/union_fast_stats.q.out | 526 +
.../clientpositive/tez/vector_aggregate_9.q.out | 4 +-
.../tez/vector_binary_join_groupby.q.out | 4 +-
.../clientpositive/tez/vector_char_cast.q.out | 35 +
.../tez/vector_count_distinct.q.out | 4 +-
.../tez/vector_decimal_aggregate.q.out | 12 +-
.../tez/vector_decimal_precision.q.out | 4 +-
.../clientpositive/tez/vector_decimal_udf.q.out | 30 +-
.../clientpositive/tez/vector_distinct_2.q.out | 28 +-
.../clientpositive/tez/vector_groupby_3.q.out | 30 +-
.../tez/vector_groupby_reduce.q.out | 8 +-
.../tez/vector_grouping_sets.q.out | 8 +-
.../tez/vector_mapjoin_reduce.q.out | 36 +-
.../clientpositive/tez/vector_orderby_5.q.out | 6 +-
.../clientpositive/tez/vector_outer_join2.q.out | 20 +-
.../tez/vector_partition_diff_num_cols.q.out | 20 +-
.../tez/vector_partitioned_date_time.q.out | 12 +-
.../tez/vector_reduce_groupby_decimal.q.out | 24 +-
.../clientpositive/tez/vectorization_0.q.out | 16 +-
.../clientpositive/tez/vectorization_13.q.out | 32 +-
.../clientpositive/tez/vectorization_15.q.out | 16 +-
.../clientpositive/tez/vectorization_16.q.out | 16 +-
.../clientpositive/tez/vectorization_9.q.out | 16 +-
.../tez/vectorization_limit.q.out | 14 +-
.../tez/vectorization_pushdown.q.out | 4 +-
.../tez/vectorization_short_regress.q.out | 74 +-
.../tez/vectorized_distinct_gby.q.out | 8 +-
.../vectorized_dynamic_partition_pruning.q.out | 88 +-
.../tez/vectorized_nested_mapjoin.q.out | 18 +-
.../clientpositive/tez/vectorized_parquet.q.out | 6 +-
.../tez/vectorized_timestamp_funcs.q.out | 12 +-
ql/src/test/results/clientpositive/udf8.q.out | 4 +-
.../test/results/clientpositive/udf_count.q.out | 16 +-
.../test/results/clientpositive/union11.q.out | 70 +-
.../test/results/clientpositive/union14.q.out | 32 +-
.../test/results/clientpositive/union15.q.out | 38 +-
.../test/results/clientpositive/union28.q.out | 8 +-
.../test/results/clientpositive/union30.q.out | 8 +-
.../test/results/clientpositive/union33.q.out | 8 +-
ql/src/test/results/clientpositive/union5.q.out | 48 +-
ql/src/test/results/clientpositive/union7.q.out | 32 +-
.../clientpositive/unionDistinct_1.q.out | 8 +-
.../clientpositive/union_fast_stats.q.out | 526 +
.../clientpositive/union_remove_21.q.out | 8 +-
.../clientpositive/vector_aggregate_9.q.out | 4 +-
.../vector_aggregate_without_gby.q.out | 4 +-
.../vector_binary_join_groupby.q.out | 4 +-
.../clientpositive/vector_char_cast.q.out | 35 +
.../clientpositive/vector_count_distinct.q.out | 6 +-
.../vector_decimal_aggregate.q.out | 12 +-
.../vector_decimal_precision.q.out | 4 +-
.../clientpositive/vector_decimal_udf.q.out | 30 +-
.../clientpositive/vector_distinct_2.q.out | 28 +-
.../clientpositive/vector_groupby_3.q.out | 30 +-
.../clientpositive/vector_groupby_reduce.q.out | 8 +-
.../clientpositive/vector_grouping_sets.q.out | 8 +-
.../clientpositive/vector_left_outer_join.q.out | 8 +-
.../clientpositive/vector_mapjoin_reduce.q.out | 36 +-
.../clientpositive/vector_orderby_5.q.out | 6 +-
.../clientpositive/vector_outer_join1.q.out | 8 +-
.../clientpositive/vector_outer_join2.q.out | 28 +-
.../clientpositive/vector_outer_join3.q.out | 24 +-
.../clientpositive/vector_outer_join4.q.out | 8 +-
.../clientpositive/vector_outer_join5.q.out | 48 +-
.../vector_partition_diff_num_cols.q.out | 20 +-
.../vector_partitioned_date_time.q.out | 12 +-
.../vector_reduce_groupby_decimal.q.out | 24 +-
.../clientpositive/vectorization_0.q.out | 16 +-
.../clientpositive/vectorization_13.q.out | 32 +-
.../clientpositive/vectorization_15.q.out | 16 +-
.../clientpositive/vectorization_16.q.out | 16 +-
.../clientpositive/vectorization_9.q.out | 16 +-
.../clientpositive/vectorization_limit.q.out | 16 +-
.../clientpositive/vectorization_pushdown.q.out | 4 +-
.../vectorization_short_regress.q.out | 74 +-
.../vectorized_distinct_gby.q.out | 12 +-
.../vectorized_nested_mapjoin.q.out | 26 +-
.../clientpositive/vectorized_parquet.q.out | 6 +-
.../vectorized_parquet_types.q.out | 6 +-
.../vectorized_timestamp_funcs.q.out | 12 +-
.../results/clientpositive/windowing_udaf.q.out | 12 +
ql/src/test/templates/TestCliDriver.vm | 3 +-
.../hadoop/hive/serde/test/InnerStruct.java | 2 +-
.../hadoop/hive/serde/test/ThriftTestObj.java | 2 +-
.../hadoop/hive/serde2/thrift/test/Complex.java | 2 +-
.../hive/serde2/thrift/test/IntString.java | 2 +-
.../hive/serde2/thrift/test/MegaStruct.java | 2 +-
.../hive/serde2/thrift/test/MiniStruct.java | 2 +-
.../hive/serde2/thrift/test/SetIntString.java | 2 +-
.../hive/serde2/ColumnProjectionUtils.java | 22 +
.../apache/hadoop/hive/serde2/WriteBuffers.java | 10 +-
.../BinarySortableSerDeWithEndPrefix.java | 41 +
.../hadoop/hive/serde2/lazy/LazyByte.java | 4 +
.../hadoop/hive/serde2/lazy/LazyDouble.java | 4 +
.../hadoop/hive/serde2/lazy/LazyFloat.java | 4 +
.../hadoop/hive/serde2/lazy/LazyInteger.java | 4 +
.../hadoop/hive/serde2/lazy/LazyLong.java | 4 +
.../hadoop/hive/serde2/lazy/LazyShort.java | 4 +
.../hadoop/hive/serde2/lazy/LazyUtils.java | 28 +
.../hadoop/hive/service/HiveClusterStatus.java | 2 +-
.../hive/service/HiveServerException.java | 2 +-
.../apache/hadoop/hive/service/ThriftHive.java | 2 +-
.../service/cli/thrift/TArrayTypeEntry.java | 2 +-
.../hive/service/cli/thrift/TBinaryColumn.java | 2 +-
.../hive/service/cli/thrift/TBoolColumn.java | 2 +-
.../hive/service/cli/thrift/TBoolValue.java | 2 +-
.../hive/service/cli/thrift/TByteColumn.java | 2 +-
.../hive/service/cli/thrift/TByteValue.java | 2 +-
.../hive/service/cli/thrift/TCLIService.java | 2 +-
.../cli/thrift/TCancelDelegationTokenReq.java | 2 +-
.../cli/thrift/TCancelDelegationTokenResp.java | 2 +-
.../service/cli/thrift/TCancelOperationReq.java | 2 +-
.../cli/thrift/TCancelOperationResp.java | 2 +-
.../service/cli/thrift/TCloseOperationReq.java | 2 +-
.../service/cli/thrift/TCloseOperationResp.java | 2 +-
.../service/cli/thrift/TCloseSessionReq.java | 2 +-
.../service/cli/thrift/TCloseSessionResp.java | 2 +-
.../hive/service/cli/thrift/TColumnDesc.java | 2 +-
.../hive/service/cli/thrift/TDoubleColumn.java | 2 +-
.../hive/service/cli/thrift/TDoubleValue.java | 2 +-
.../cli/thrift/TExecuteStatementReq.java | 2 +-
.../cli/thrift/TExecuteStatementResp.java | 2 +-
.../service/cli/thrift/TFetchResultsReq.java | 2 +-
.../service/cli/thrift/TFetchResultsResp.java | 2 +-
.../service/cli/thrift/TGetCatalogsReq.java | 2 +-
.../service/cli/thrift/TGetCatalogsResp.java | 2 +-
.../hive/service/cli/thrift/TGetColumnsReq.java | 2 +-
.../service/cli/thrift/TGetColumnsResp.java | 2 +-
.../cli/thrift/TGetDelegationTokenReq.java | 2 +-
.../cli/thrift/TGetDelegationTokenResp.java | 2 +-
.../service/cli/thrift/TGetFunctionsReq.java | 2 +-
.../service/cli/thrift/TGetFunctionsResp.java | 2 +-
.../hive/service/cli/thrift/TGetInfoReq.java | 2 +-
.../hive/service/cli/thrift/TGetInfoResp.java | 2 +-
.../cli/thrift/TGetOperationStatusReq.java | 2 +-
.../cli/thrift/TGetOperationStatusResp.java | 2 +-
.../cli/thrift/TGetResultSetMetadataReq.java | 2 +-
.../cli/thrift/TGetResultSetMetadataResp.java | 2 +-
.../hive/service/cli/thrift/TGetSchemasReq.java | 2 +-
.../service/cli/thrift/TGetSchemasResp.java | 2 +-
.../service/cli/thrift/TGetTableTypesReq.java | 2 +-
.../service/cli/thrift/TGetTableTypesResp.java | 2 +-
.../hive/service/cli/thrift/TGetTablesReq.java | 2 +-
.../hive/service/cli/thrift/TGetTablesResp.java | 2 +-
.../service/cli/thrift/TGetTypeInfoReq.java | 2 +-
.../service/cli/thrift/TGetTypeInfoResp.java | 2 +-
.../service/cli/thrift/THandleIdentifier.java | 2 +-
.../hive/service/cli/thrift/TI16Column.java | 2 +-
.../hive/service/cli/thrift/TI16Value.java | 2 +-
.../hive/service/cli/thrift/TI32Column.java | 2 +-
.../hive/service/cli/thrift/TI32Value.java | 2 +-
.../hive/service/cli/thrift/TI64Column.java | 2 +-
.../hive/service/cli/thrift/TI64Value.java | 2 +-
.../hive/service/cli/thrift/TMapTypeEntry.java | 2 +-
.../service/cli/thrift/TOpenSessionReq.java | 2 +-
.../service/cli/thrift/TOpenSessionResp.java | 2 +-
.../service/cli/thrift/TOperationHandle.java | 2 +-
.../service/cli/thrift/TPrimitiveTypeEntry.java | 2 +-
.../cli/thrift/TRenewDelegationTokenReq.java | 2 +-
.../cli/thrift/TRenewDelegationTokenResp.java | 2 +-
.../apache/hive/service/cli/thrift/TRow.java | 2 +-
.../apache/hive/service/cli/thrift/TRowSet.java | 2 +-
.../hive/service/cli/thrift/TSessionHandle.java | 2 +-
.../apache/hive/service/cli/thrift/TStatus.java | 2 +-
.../hive/service/cli/thrift/TStringColumn.java | 2 +-
.../hive/service/cli/thrift/TStringValue.java | 2 +-
.../service/cli/thrift/TStructTypeEntry.java | 2 +-
.../hive/service/cli/thrift/TTableSchema.java | 2 +-
.../hive/service/cli/thrift/TTypeDesc.java | 2 +-
.../service/cli/thrift/TTypeQualifiers.java | 2 +-
.../service/cli/thrift/TUnionTypeEntry.java | 2 +-
.../cli/thrift/TUserDefinedTypeEntry.java | 2 +-
.../gen-py/hive_service/ThriftHive-remote | 49 +-
.../auth/LdapAuthenticationProviderImpl.java | 82 +-
.../org/apache/hive/service/cli/Column.java | 2 +-
.../thrift/RetryingThriftCLIServiceClient.java | 331 +
.../org/apache/hive/service/cli/TestColumn.java | 129 +
.../cli/TestRetryingThriftCLIServiceClient.java | 133 +
.../hadoop/hive/shims/Hadoop20SShims.java | 5 +-
.../apache/hadoop/hive/shims/Hadoop23Shims.java | 4 +-
.../hive/thrift/HadoopThriftAuthBridge.java | 3 +
.../hive/ql/io/sarg/SearchArgumentFactory.java | 5 +-
.../hive/ql/io/sarg/SearchArgumentImpl.java | 7 +-
testutils/ptest2/src/main/resources/log4j2.xml | 5 +-
721 files changed, 90748 insertions(+), 16405 deletions(-)
----------------------------------------------------------------------
[32/50] [abbrv] hive git commit: HIVE-11572: Datanucleus loads
Log4j1.x Logger from AppClassLoader (Prasanth Jayachandran reviewed by Gopal
V)
Posted by xu...@apache.org.
HIVE-11572: Datanucleus loads Log4j1.x Logger from AppClassLoader (Prasanth Jayachandran reviewed by Gopal V)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e82bf253
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e82bf253
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e82bf253
Branch: refs/heads/beeline-cli
Commit: e82bf253fa62881f6d976e97d1bf4646ad4187c6
Parents: 1c52a7e
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Tue Sep 22 19:06:51 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Tue Sep 22 19:06:51 2015 -0500
----------------------------------------------------------------------
bin/hive | 2 +-
packaging/src/main/assembly/bin.xml | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e82bf253/bin/hive
----------------------------------------------------------------------
diff --git a/bin/hive b/bin/hive
index ad7139e..505aee0 100755
--- a/bin/hive
+++ b/bin/hive
@@ -171,7 +171,7 @@ export HADOOP_HOME_WARN_SUPPRESS=true
# pass classpath to hadoop
if [ "$HADOOP_CLASSPATH" != "" ]; then
- export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${CLASSPATH}"
+ export HADOOP_CLASSPATH="${CLASSPATH}:${HADOOP_CLASSPATH}"
else
export HADOOP_CLASSPATH="$CLASSPATH"
fi
http://git-wip-us.apache.org/repos/asf/hive/blob/e82bf253/packaging/src/main/assembly/bin.xml
----------------------------------------------------------------------
diff --git a/packaging/src/main/assembly/bin.xml b/packaging/src/main/assembly/bin.xml
index 0fa6af8..b21732b 100644
--- a/packaging/src/main/assembly/bin.xml
+++ b/packaging/src/main/assembly/bin.xml
@@ -42,6 +42,7 @@
<exclude>org.apache.hadoop:*</exclude>
<exclude>org.apache.hive.hcatalog:*</exclude>
<exclude>org.slf4j:*</exclude>
+ <exclude>log4j:*</exclude>
<exclude>commons-configuration:commons-configuration</exclude>
</excludes>
</dependencySet>
[50/50] [abbrv] hive git commit: HIVE-11958: Merge branch 'master'
into beeline-cli
Posted by xu...@apache.org.
HIVE-11958: Merge branch 'master' into beeline-cli
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6b3e82d3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6b3e82d3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6b3e82d3
Branch: refs/heads/beeline-cli
Commit: 6b3e82d39c159f11829007fbbeff2fdf1216bb78
Parents: 046c5eb 41a12cb
Author: Ferdinand Xu <ch...@intel.com>
Authored: Thu Sep 24 21:29:17 2015 -0400
Committer: Ferdinand Xu <ch...@intel.com>
Committed: Thu Sep 24 21:34:20 2015 -0400
----------------------------------------------------------------------
NOTICE | 3 +
.../apache/hadoop/hive/ant/GenVectorCode.java | 31 -
.../apache/hadoop/hive/ant/QTestGenTask.java | 11 +
beeline/src/main/resources/beeline-log4j2.xml | 5 +-
bin/beeline | 5 +
bin/ext/hbaseimport.cmd | 35 +
bin/ext/hbaseimport.sh | 27 +
bin/ext/hbaseschematool.sh | 27 +
bin/hive | 2 +-
.../apache/hadoop/hive/common/ObjectPair.java | 5 +
.../hadoop/hive/common/jsonexplain/tez/Op.java | 8 +-
.../hive/common/jsonexplain/tez/Stage.java | 14 +-
.../common/jsonexplain/tez/TezJsonParser.java | 17 +-
.../org/apache/hadoop/hive/conf/HiveConf.java | 113 +-
.../apache/hadoop/hive/ql/log/PerfLogger.java | 196 +
.../apache/hive/common/util/BloomFilter.java | 20 +-
common/src/main/resources/hive-log4j2.xml | 5 +-
.../test/resources/hive-exec-log4j2-test.xml | 5 +-
common/src/test/resources/hive-log4j2-test.xml | 5 +-
data/conf/hive-log4j2.xml | 5 +-
data/conf/tez/hive-site.xml | 10 +
data/files/dynpartdata1.txt | 5 +
data/files/dynpartdata2.txt | 6 +
.../HiveHBaseTableSnapshotInputFormat.java | 21 +-
.../queries/positive/hbase_handler_snapshot.q | 4 +
.../positive/hbase_handler_snapshot.q.out | 22 +
.../deployers/config/hive/hive-log4j2.xml | 5 +-
.../svr/src/main/config/webhcat-log4j2.xml | 5 +-
.../antlr4/org/apache/hive/hplsql/Hplsql.g4 | 70 +-
.../main/java/org/apache/hive/hplsql/Cmp.java | 314 +
.../java/org/apache/hive/hplsql/Column.java | 29 +-
.../main/java/org/apache/hive/hplsql/Conn.java | 21 +
.../main/java/org/apache/hive/hplsql/Copy.java | 50 +-
.../main/java/org/apache/hive/hplsql/Exec.java | 66 +-
.../java/org/apache/hive/hplsql/Expression.java | 33 +-
.../main/java/org/apache/hive/hplsql/File.java | 18 +-
.../main/java/org/apache/hive/hplsql/Meta.java | 28 +-
.../main/java/org/apache/hive/hplsql/Query.java | 18 +
.../java/org/apache/hive/hplsql/Select.java | 23 +-
.../main/java/org/apache/hive/hplsql/Stmt.java | 8 +-
.../main/java/org/apache/hive/hplsql/Var.java | 110 +-
.../apache/hive/hplsql/functions/Function.java | 6 +-
.../hive/hplsql/functions/FunctionMisc.java | 121 +
.../org/apache/hive/hplsql/TestHplsqlLocal.java | 18 +
.../apache/hive/hplsql/TestHplsqlOffline.java | 5 +
hplsql/src/test/queries/db/cmp_row_count.sql | 4 +
hplsql/src/test/queries/db/cmp_sum.sql | 3 +
hplsql/src/test/queries/db/copy_to_file.sql | 2 +
hplsql/src/test/queries/db/copy_to_hdfs.sql | 2 +
hplsql/src/test/queries/db/copy_to_table.sql | 2 +
hplsql/src/test/queries/db/part_count.sql | 17 +
hplsql/src/test/queries/db/part_count_by.sql | 4 +
hplsql/src/test/queries/db/schema.sql | 32 +
hplsql/src/test/queries/db/select_into.sql | 20 +-
hplsql/src/test/queries/db/select_into2.sql | 17 +
.../test/queries/local/create_procedure2.sql | 16 +
hplsql/src/test/queries/local/if2.sql | 5 +
hplsql/src/test/queries/local/include.sql | 2 +
hplsql/src/test/queries/local/include_file.sql | 1 +
hplsql/src/test/queries/local/mult_div.sql | 8 +
hplsql/src/test/queries/offline/select_db2.sql | 5 +
.../src/test/results/db/cmp_row_count.out.txt | 12 +
hplsql/src/test/results/db/cmp_sum.out.txt | 320 +
hplsql/src/test/results/db/copy_to_file.out.txt | 6 +
hplsql/src/test/results/db/copy_to_hdfs.out.txt | 4 +
.../src/test/results/db/copy_to_table.out.txt | 2 +
hplsql/src/test/results/db/part_count.out.txt | 15 +
.../src/test/results/db/part_count_by.out.txt | 13 +
hplsql/src/test/results/db/select_into.out.txt | 58 +-
hplsql/src/test/results/db/select_into2.out.txt | 19 +
.../results/local/create_procedure2.out.txt | 10 +
hplsql/src/test/results/local/if2.out.txt | 4 +
hplsql/src/test/results/local/include.out.txt | 8 +
hplsql/src/test/results/local/mult_div.out.txt | 7 +
.../src/test/results/offline/select_db2.out.txt | 6 +
.../benchmark/serde/LazySimpleSerDeBench.java | 453 +
.../vectorization/VectorizationBench.java | 32 +-
.../hive/thrift/TestHadoop20SAuthBridge.java | 420 -
.../hive/thrift/TestHadoopAuthBridge23.java | 423 +
itests/hive-unit/pom.xml | 35 +
.../hadoop/hive/metastore/TestAdminUser.java | 4 +-
.../hive/metastore/TestHiveMetaStore.java | 3 +
.../metastore/hbase/HBaseIntegrationTests.java | 117 +
.../TestHBaseAggrStatsCacheIntegration.java | 691 +
.../hive/metastore/hbase/TestHBaseImport.java | 650 +
.../metastore/hbase/TestHBaseMetastoreSql.java | 223 +
.../hbase/TestHBaseStoreIntegration.java | 1794 +
.../hbase/TestStorageDescriptorSharing.java | 191 +
.../hive/ql/security/FolderPermissionBase.java | 17 +-
.../org/apache/hive/jdbc/TestJdbcDriver2.java | 80 +-
itests/qtest/pom.xml | 10 +-
.../test/resources/testconfiguration.properties | 4 +
itests/util/pom.xml | 32 +
.../metastore/hbase/HBaseStoreTestUtil.java | 45 +
.../org/apache/hadoop/hive/ql/QTestUtil.java | 50 +-
.../apache/hive/jdbc/HivePreparedStatement.java | 4 +-
.../hive/jdbc/ZooKeeperHiveClientHelper.java | 34 +-
metastore/if/hive_metastore.thrift | 54 +
metastore/pom.xml | 82 +
.../oracle/hive-schema-0.13.0.oracle.sql | 10 +-
.../oracle/hive-schema-0.14.0.oracle.sql | 10 +-
.../oracle/hive-txn-schema-0.13.0.oracle.sql | 10 +-
.../oracle/hive-txn-schema-0.14.0.oracle.sql | 10 +-
.../metastore/hbase/HbaseMetastoreProto.java | 34901 +++++++++++++++++
.../gen/thrift/gen-cpp/ThriftHiveMetastore.cpp | 6919 ++--
.../gen/thrift/gen-cpp/ThriftHiveMetastore.h | 664 +
.../ThriftHiveMetastore_server.skeleton.cpp | 25 +
.../gen/thrift/gen-cpp/hive_metastore_types.cpp | 1294 +-
.../gen/thrift/gen-cpp/hive_metastore_types.h | 371 +
.../hive/metastore/api/AbortTxnRequest.java | 2 +-
.../metastore/api/AddDynamicPartitions.java | 2 +-
.../metastore/api/AddPartitionsRequest.java | 2 +-
.../hive/metastore/api/AddPartitionsResult.java | 2 +-
.../hadoop/hive/metastore/api/AggrStats.java | 2 +-
.../metastore/api/AlreadyExistsException.java | 2 +-
.../metastore/api/BinaryColumnStatsData.java | 2 +-
.../metastore/api/BooleanColumnStatsData.java | 2 +-
.../hive/metastore/api/CheckLockRequest.java | 2 +-
.../metastore/api/ClearFileMetadataRequest.java | 438 +
.../metastore/api/ClearFileMetadataResult.java | 283 +
.../hive/metastore/api/ColumnStatistics.java | 2 +-
.../metastore/api/ColumnStatisticsDesc.java | 2 +-
.../hive/metastore/api/ColumnStatisticsObj.java | 2 +-
.../hive/metastore/api/CommitTxnRequest.java | 2 +-
.../hive/metastore/api/CompactionRequest.java | 2 +-
.../api/ConfigValSecurityException.java | 2 +-
.../api/CurrentNotificationEventId.java | 2 +-
.../hadoop/hive/metastore/api/Database.java | 2 +-
.../apache/hadoop/hive/metastore/api/Date.java | 2 +-
.../hive/metastore/api/DateColumnStatsData.java | 2 +-
.../hadoop/hive/metastore/api/Decimal.java | 2 +-
.../metastore/api/DecimalColumnStatsData.java | 2 +-
.../metastore/api/DoubleColumnStatsData.java | 2 +-
.../hive/metastore/api/DropPartitionsExpr.java | 2 +-
.../metastore/api/DropPartitionsRequest.java | 2 +-
.../metastore/api/DropPartitionsResult.java | 2 +-
.../hive/metastore/api/EnvironmentContext.java | 2 +-
.../hadoop/hive/metastore/api/FieldSchema.java | 2 +-
.../hive/metastore/api/FireEventRequest.java | 2 +-
.../hive/metastore/api/FireEventResponse.java | 2 +-
.../hadoop/hive/metastore/api/Function.java | 2 +-
.../metastore/api/GetAllFunctionsResponse.java | 38 +-
.../api/GetFileMetadataByExprRequest.java | 548 +
.../api/GetFileMetadataByExprResult.java | 703 +
.../metastore/api/GetFileMetadataRequest.java | 438 +
.../metastore/api/GetFileMetadataResult.java | 540 +
.../metastore/api/GetOpenTxnsInfoResponse.java | 2 +-
.../hive/metastore/api/GetOpenTxnsResponse.java | 2 +-
.../api/GetPrincipalsInRoleRequest.java | 2 +-
.../api/GetPrincipalsInRoleResponse.java | 2 +-
.../api/GetRoleGrantsForPrincipalRequest.java | 2 +-
.../api/GetRoleGrantsForPrincipalResponse.java | 2 +-
.../api/GrantRevokePrivilegeRequest.java | 2 +-
.../api/GrantRevokePrivilegeResponse.java | 2 +-
.../metastore/api/GrantRevokeRoleRequest.java | 2 +-
.../metastore/api/GrantRevokeRoleResponse.java | 2 +-
.../hive/metastore/api/HeartbeatRequest.java | 2 +-
.../metastore/api/HeartbeatTxnRangeRequest.java | 2 +-
.../api/HeartbeatTxnRangeResponse.java | 2 +-
.../hive/metastore/api/HiveObjectPrivilege.java | 2 +-
.../hive/metastore/api/HiveObjectRef.java | 2 +-
.../apache/hadoop/hive/metastore/api/Index.java | 2 +-
.../api/IndexAlreadyExistsException.java | 2 +-
.../metastore/api/InsertEventRequestData.java | 2 +-
.../metastore/api/InvalidInputException.java | 2 +-
.../metastore/api/InvalidObjectException.java | 2 +-
.../api/InvalidOperationException.java | 2 +-
.../api/InvalidPartitionException.java | 2 +-
.../hive/metastore/api/LockComponent.java | 2 +-
.../hadoop/hive/metastore/api/LockRequest.java | 2 +-
.../hadoop/hive/metastore/api/LockResponse.java | 2 +-
.../hive/metastore/api/LongColumnStatsData.java | 2 +-
.../hive/metastore/api/MetaException.java | 2 +-
.../hive/metastore/api/MetadataPpdResult.java | 508 +
.../hive/metastore/api/NoSuchLockException.java | 2 +-
.../metastore/api/NoSuchObjectException.java | 2 +-
.../hive/metastore/api/NoSuchTxnException.java | 2 +-
.../hive/metastore/api/NotificationEvent.java | 2 +-
.../metastore/api/NotificationEventRequest.java | 2 +-
.../api/NotificationEventResponse.java | 2 +-
.../hive/metastore/api/OpenTxnRequest.java | 2 +-
.../hive/metastore/api/OpenTxnsResponse.java | 2 +-
.../apache/hadoop/hive/metastore/api/Order.java | 2 +-
.../hadoop/hive/metastore/api/Partition.java | 2 +-
.../api/PartitionListComposingSpec.java | 2 +-
.../hive/metastore/api/PartitionSpec.java | 2 +-
.../api/PartitionSpecWithSharedSD.java | 2 +-
.../hive/metastore/api/PartitionWithoutSD.java | 2 +-
.../metastore/api/PartitionsByExprRequest.java | 2 +-
.../metastore/api/PartitionsByExprResult.java | 2 +-
.../metastore/api/PartitionsStatsRequest.java | 2 +-
.../metastore/api/PartitionsStatsResult.java | 2 +-
.../metastore/api/PrincipalPrivilegeSet.java | 2 +-
.../hadoop/hive/metastore/api/PrivilegeBag.java | 2 +-
.../hive/metastore/api/PrivilegeGrantInfo.java | 2 +-
.../metastore/api/PutFileMetadataRequest.java | 588 +
.../metastore/api/PutFileMetadataResult.java | 283 +
.../hadoop/hive/metastore/api/ResourceUri.java | 2 +-
.../apache/hadoop/hive/metastore/api/Role.java | 2 +-
.../hive/metastore/api/RolePrincipalGrant.java | 2 +-
.../hadoop/hive/metastore/api/Schema.java | 2 +-
.../hadoop/hive/metastore/api/SerDeInfo.java | 2 +-
.../api/SetPartitionsStatsRequest.java | 2 +-
.../hive/metastore/api/ShowCompactRequest.java | 2 +-
.../hive/metastore/api/ShowCompactResponse.java | 2 +-
.../api/ShowCompactResponseElement.java | 2 +-
.../hive/metastore/api/ShowLocksRequest.java | 2 +-
.../hive/metastore/api/ShowLocksResponse.java | 2 +-
.../metastore/api/ShowLocksResponseElement.java | 2 +-
.../hadoop/hive/metastore/api/SkewedInfo.java | 2 +-
.../hive/metastore/api/StorageDescriptor.java | 2 +-
.../metastore/api/StringColumnStatsData.java | 2 +-
.../apache/hadoop/hive/metastore/api/Table.java | 2 +-
.../hive/metastore/api/TableStatsRequest.java | 2 +-
.../hive/metastore/api/TableStatsResult.java | 2 +-
.../hive/metastore/api/ThriftHiveMetastore.java | 8422 ++--
.../hive/metastore/api/TxnAbortedException.java | 2 +-
.../hadoop/hive/metastore/api/TxnInfo.java | 2 +-
.../hive/metastore/api/TxnOpenException.java | 2 +-
.../apache/hadoop/hive/metastore/api/Type.java | 2 +-
.../hive/metastore/api/UnknownDBException.java | 2 +-
.../api/UnknownPartitionException.java | 2 +-
.../metastore/api/UnknownTableException.java | 2 +-
.../hive/metastore/api/UnlockRequest.java | 2 +-
.../hadoop/hive/metastore/api/Version.java | 2 +-
.../gen-php/metastore/ThriftHiveMetastore.php | 2810 +-
.../src/gen/thrift/gen-php/metastore/Types.php | 1009 +-
.../hive_metastore/ThriftHiveMetastore-remote | 49 +-
.../hive_metastore/ThriftHiveMetastore.py | 1563 +-
.../gen/thrift/gen-py/hive_metastore/ttypes.py | 734 +-
.../gen/thrift/gen-rb/hive_metastore_types.rb | 167 +
.../gen/thrift/gen-rb/thrift_hive_metastore.rb | 267 +
.../hadoop/hive/metastore/HiveAlterHandler.java | 38 +-
.../hadoop/hive/metastore/HiveMetaStore.java | 272 +-
.../hive/metastore/HiveMetaStoreClient.java | 112 +-
.../hadoop/hive/metastore/IMetaStoreClient.java | 6 +
.../hadoop/hive/metastore/ObjectStore.java | 453 +-
.../hive/metastore/PartFilterExprUtil.java | 149 +
.../apache/hadoop/hive/metastore/RawStore.java | 66 +-
.../hadoop/hive/metastore/RawStoreProxy.java | 5 +-
.../hive/metastore/RetryingHMSHandler.java | 33 +-
.../hbase/AggrStatsInvalidatorFilter.java | 121 +
.../hadoop/hive/metastore/hbase/Counter.java | 53 +
.../hive/metastore/hbase/HBaseConnection.java | 96 +
.../metastore/hbase/HBaseFilterPlanUtil.java | 612 +
.../hive/metastore/hbase/HBaseImport.java | 535 +
.../hive/metastore/hbase/HBaseReadWrite.java | 2106 +
.../hive/metastore/hbase/HBaseSchemaTool.java | 239 +
.../hadoop/hive/metastore/hbase/HBaseStore.java | 2387 ++
.../hadoop/hive/metastore/hbase/HBaseUtils.java | 1340 +
.../hive/metastore/hbase/ObjectCache.java | 81 +
.../hive/metastore/hbase/PartitionCache.java | 168 +
.../metastore/hbase/PartitionKeyComparator.java | 292 +
.../hbase/SharedStorageDescriptor.java | 251 +
.../hadoop/hive/metastore/hbase/StatsCache.java | 326 +
.../metastore/hbase/TephraHBaseConnection.java | 127 +
.../metastore/hbase/VanillaHBaseConnection.java | 137 +
.../stats/BinaryColumnStatsAggregator.java | 35 +
.../stats/BooleanColumnStatsAggregator.java | 35 +
.../hbase/stats/ColumnStatsAggregator.java | 26 +
.../stats/ColumnStatsAggregatorFactory.java | 94 +
.../stats/DecimalColumnStatsAggregator.java | 43 +
.../stats/DoubleColumnStatsAggregator.java | 36 +
.../hbase/stats/LongColumnStatsAggregator.java | 36 +
.../stats/StringColumnStatsAggregator.java | 36 +
.../hive/metastore/parser/ExpressionTree.java | 9 +-
.../hive/metastore/tools/HiveMetaTool.java | 5 +
.../hadoop/hive/metastore/txn/TxnHandler.java | 66 +-
.../metastore/hbase/hbase_metastore_proto.proto | 282 +
.../DummyRawStoreControlledCommit.java | 56 +-
.../DummyRawStoreForJdoConnection.java | 50 +-
.../hadoop/hive/metastore/TestObjectStore.java | 43 +-
.../hadoop/hive/metastore/hbase/MockUtils.java | 199 +
.../hbase/TestHBaseAggregateStatsCache.java | 316 +
.../hbase/TestHBaseFilterPlanUtil.java | 483 +
.../hive/metastore/hbase/TestHBaseStore.java | 1307 +
.../metastore/hbase/TestHBaseStoreCached.java | 378 +
.../hbase/TestSharedStorageDescriptor.java | 153 +
packaging/src/main/assembly/bin.xml | 1 +
pom.xml | 25 +-
.../hadoop/hive/ql/plan/api/Adjacency.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Graph.java | 2 +-
.../hadoop/hive/ql/plan/api/Operator.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Query.java | 2 +-
.../hadoop/hive/ql/plan/api/QueryPlan.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Stage.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Task.java | 2 +-
.../ExpressionTemplates/IfExprColumnColumn.txt | 186 -
.../java/org/apache/hadoop/hive/ql/Driver.java | 17 +-
.../org/apache/hadoop/hive/ql/ErrorMsg.java | 2 +-
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 333 +-
.../apache/hadoop/hive/ql/exec/ExplainTask.java | 24 +-
.../hadoop/hive/ql/exec/FunctionRegistry.java | 3 +-
.../hadoop/hive/ql/exec/KeyWrapperFactory.java | 4 +
.../hadoop/hive/ql/exec/MapJoinOperator.java | 8 +-
.../apache/hadoop/hive/ql/exec/MoveTask.java | 31 +-
.../apache/hadoop/hive/ql/exec/Operator.java | 3 +-
.../ql/exec/SparkHashTableSinkOperator.java | 3 +-
.../hadoop/hive/ql/exec/StatsNoJobTask.java | 25 +-
.../apache/hadoop/hive/ql/exec/StatsTask.java | 13 +-
.../apache/hadoop/hive/ql/exec/Utilities.java | 11 +-
.../persistence/BytesBytesMultiHashMap.java | 11 +-
.../persistence/HybridHashTableContainer.java | 68 +-
.../ql/exec/persistence/PTFRowContainer.java | 14 +-
.../hive/ql/exec/persistence/RowContainer.java | 12 +-
.../hadoop/hive/ql/exec/spark/SparkPlan.java | 3 +-
.../hive/ql/exec/spark/SparkPlanGenerator.java | 3 +-
.../hive/ql/exec/spark/SparkRecordHandler.java | 3 +-
.../hadoop/hive/ql/exec/spark/SparkTask.java | 2 +-
.../ql/exec/spark/status/SparkJobMonitor.java | 2 +-
.../hadoop/hive/ql/exec/tez/DagUtils.java | 3 +
.../hive/ql/exec/tez/HashTableLoader.java | 7 +-
.../hadoop/hive/ql/exec/tez/InPlaceUpdates.java | 65 +
.../hive/ql/exec/tez/RecordProcessor.java | 3 +-
.../hive/ql/exec/tez/ReduceRecordProcessor.java | 1 -
.../hive/ql/exec/tez/ReduceRecordSource.java | 3 +-
.../hadoop/hive/ql/exec/tez/TezJobMonitor.java | 70 +-
.../hadoop/hive/ql/exec/tez/TezProcessor.java | 3 +-
.../apache/hadoop/hive/ql/exec/tez/TezTask.java | 20 +-
.../ql/exec/tez/tools/KeyValuesInputMerger.java | 1 -
.../ql/exec/vector/VectorGroupByOperator.java | 5 +-
.../exec/vector/VectorSMBMapJoinOperator.java | 15 +-
.../ql/exec/vector/VectorizationContext.java | 213 +-
.../expressions/FilterStringColumnInList.java | 13 +-
.../expressions/FilterStructColumnInList.java | 178 +
.../exec/vector/expressions/IStructInExpr.java | 36 +
.../IfExprDoubleColumnDoubleColumn.java | 167 +
.../expressions/IfExprLongColumnLongColumn.java | 166 +
.../vector/expressions/StringColumnInList.java | 4 +
.../vector/expressions/StructColumnInList.java | 174 +
.../apache/hadoop/hive/ql/hooks/ATSHook.java | 9 +-
.../hadoop/hive/ql/hooks/LineageLogger.java | 95 +-
.../hive/ql/io/CombineHiveInputFormat.java | 10 +-
.../hadoop/hive/ql/io/HiveInputFormat.java | 5 +-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 155 +-
.../apache/hadoop/hive/ql/io/orc/OrcSerde.java | 1 +
.../apache/hadoop/hive/ql/io/orc/OrcStruct.java | 2 +-
.../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 4 +-
.../hive/ql/io/parquet/ProjectionPusher.java | 3 +-
.../serde/ParquetHiveArrayInspector.java | 12 +
.../ql/io/parquet/timestamp/NanoTimeUtils.java | 23 +-
.../hive/ql/io/sarg/ConvertAstToSearchArg.java | 4 +
.../apache/hadoop/hive/ql/lib/RuleRegExp.java | 61 +-
.../apache/hadoop/hive/ql/log/PerfLogger.java | 195 -
.../apache/hadoop/hive/ql/metadata/Hive.java | 34 +-
.../hadoop/hive/ql/metadata/Partition.java | 29 +-
.../ql/optimizer/ColumnPrunerProcFactory.java | 3 +
.../hive/ql/optimizer/ConvertJoinMapJoin.java | 22 +-
.../hive/ql/optimizer/GenMapRedUtils.java | 57 +-
.../ql/optimizer/ReduceSinkMapJoinProc.java | 19 +-
.../functions/HiveSqlCountAggFunction.java | 72 +
.../functions/HiveSqlMinMaxAggFunction.java | 49 +
.../functions/HiveSqlSumAggFunction.java | 125 +
.../calcite/reloperators/HiveBetween.java | 75 +
.../optimizer/calcite/reloperators/HiveIn.java | 41 +
.../calcite/reloperators/HiveLimit.java | 57 -
.../calcite/reloperators/HiveSort.java | 110 -
.../calcite/reloperators/HiveSortLimit.java | 110 +
.../rules/HiveAggregateJoinTransposeRule.java | 372 +
.../rules/HiveAggregateProjectMergeRule.java | 151 +
.../calcite/rules/HivePreFilteringRule.java | 37 +-
.../calcite/rules/HiveRelFieldTrimmer.java | 145 +-
.../calcite/stats/HiveRelMdMemory.java | 9 +-
.../calcite/stats/HiveRelMdParallelism.java | 4 +-
.../calcite/translator/ASTConverter.java | 24 +-
.../calcite/translator/HiveOpConverter.java | 8 +-
.../translator/PlanModifierForASTConv.java | 14 +-
.../translator/PlanModifierForReturnPath.java | 4 -
.../calcite/translator/PlanModifierUtil.java | 4 +-
.../translator/SqlFunctionConverter.java | 56 +-
.../hive/ql/optimizer/lineage/LineageCtx.java | 8 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 87 +-
.../ql/optimizer/physical/Vectorizer.java.orig | 1744 +
.../ql/optimizer/physical/Vectorizer.java.rej | 86 +
.../hive/ql/optimizer/ppr/PartitionPruner.java | 7 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 33 +-
.../hive/ql/parse/DDLSemanticAnalyzer.java | 17 +
.../apache/hadoop/hive/ql/parse/HiveParser.g | 9 +-
.../apache/hadoop/hive/ql/parse/QBSubQuery.java | 7 -
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 12 +-
.../hive/ql/parse/SemanticAnalyzerFactory.java | 2 +
.../hadoop/hive/ql/parse/SubQueryUtils.java | 11 -
.../hive/ql/parse/spark/SparkCompiler.java | 3 +-
.../org/apache/hadoop/hive/ql/plan/DDLWork.java | 21 +
.../hive/ql/plan/ExprNodeGenericFuncDesc.java | 10 +-
.../hadoop/hive/ql/plan/HiveOperation.java | 1 +
.../hive/ql/plan/ShowCreateDatabaseDesc.java | 94 +
.../hadoop/hive/ql/plan/VectorGroupByDesc.java | 10 +
.../AuthorizationPreEventListener.java | 2 +-
.../authorization/plugin/HiveOperationType.java | 1 +
.../plugin/sqlstd/Operation2Privilege.java | 2 +
.../sqlstd/SQLStdHiveAccessController.java | 5 +
.../hadoop/hive/ql/session/SessionState.java | 40 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 269 +-
.../hive/ql/stats/jdbc/JDBCStatsPublisher.java | 13 +-
.../ql/stats/jdbc/JDBCStatsSetupConstants.java | 4 +-
.../org/apache/hadoop/hive/ql/udf/UDFJson.java | 2 +
.../hive/ql/udf/generic/GenericUDAFMax.java | 16 +-
.../hive/ql/udf/generic/GenericUDAFSum.java | 2 +-
.../udf/generic/GenericUDAFSumEmptyIsZero.java | 63 +
.../hive/ql/udf/generic/GenericUDFIf.java | 4 +-
ql/src/main/resources/hive-exec-log4j2.xml | 5 +-
ql/src/main/resources/tez-container-log4j2.xml | 5 +-
.../hive/metastore/TestMetastoreExpr.java | 2 +-
.../persistence/TestBytesBytesMultiHashMap.java | 3 +
.../ql/exec/persistence/TestHashPartition.java | 29 +
.../exec/persistence/TestPTFRowContainer.java | 31 +-
.../exec/vector/TestVectorizationContext.java | 4 +-
.../TestVectorConditionalExpressions.java | 3 +-
.../hive/ql/io/orc/TestInputOutputFormat.java | 19 +
.../hadoop/hive/ql/io/orc/TestOrcStruct.java | 2 +
.../serde/TestParquetTimestampUtils.java | 38 +-
...nMapRedUtilsUsePartitionColumnsNegative.java | 73 +
...nMapRedUtilsUsePartitionColumnsPositive.java | 61 +
.../test/queries/clientnegative/ctasnullcol.q | 2 +
.../authorization_set_show_current_role.q | 3 +
.../clientpositive/bucket_map_join_tez1.q | 31 +
.../queries/clientpositive/cbo_rp_auto_join17.q | 14 +
.../cbo_rp_cross_product_check_2.q | 31 +
.../clientpositive/drop_table_with_index.q | 35 +
.../test/queries/clientpositive/dynpart_merge.q | 28 +
.../dynpart_sort_opt_vectorization.q | 2 +
.../clientpositive/dynpart_sort_optimization.q | 2 +
.../queries/clientpositive/exchgpartition2lel.q | 32 +
.../clientpositive/groupby_join_pushdown.q | 55 +
ql/src/test/queries/clientpositive/lineage3.q | 26 +
.../test/queries/clientpositive/load_orc_part.q | 5 +
.../parquet_mixed_partition_formats.q | 42 +
.../clientpositive/parquet_ppd_boolean.q | 42 +-
.../queries/clientpositive/parquet_ppd_char.q | 46 +-
.../queries/clientpositive/parquet_ppd_date.q | 64 +-
.../clientpositive/parquet_ppd_decimal.q | 106 +-
.../clientpositive/parquet_ppd_timestamp.q | 62 +-
.../clientpositive/parquet_ppd_varchar.q | 46 +-
.../clientpositive/parquet_predicate_pushdown.q | 20 +-
.../clientpositive/show_create_database.q | 3 +
.../queries/clientpositive/subquery_views.q | 22 +-
.../queries/clientpositive/union_fast_stats.q | 68 +
.../clientpositive/vector_auto_smb_mapjoin_14.q | 297 +
.../queries/clientpositive/vector_char_cast.q | 9 +
.../clientpositive/vector_groupby_reduce.q | 62 +-
.../queries/clientpositive/vector_struct_in.q | 247 +
.../clientpositive/vectorization_limit.q | 4 +-
.../queries/clientpositive/windowing_udaf.q | 4 +
.../results/clientnegative/ctasnullcol.q.out | 5 +
.../subquery_exists_implicit_gby.q.out | 8 +-
.../subquery_nested_subquery.q.out | 4 +-
.../subquery_notexists_implicit_gby.q.out | 8 +-
.../subquery_windowing_corr.q.out | 7 +-
.../alter_partition_coltype.q.out | 8 +-
.../clientpositive/annotate_stats_groupby.q.out | 106 +-
.../annotate_stats_groupby2.q.out | 28 +-
.../authorization_explain.q.java1.7.out | 2 +-
.../authorization_explain.q.java1.8.out | 2 +-
.../authorization_set_show_current_role.q.out | 8 +
.../results/clientpositive/auto_join18.q.out | 12 +-
.../auto_join18_multi_distinct.q.out | 12 +-
.../results/clientpositive/auto_join27.q.out | 18 +-
.../results/clientpositive/auto_join32.q.out | 4 +-
.../clientpositive/binarysortable_1.q.out | Bin 4329 -> 4325 bytes
.../clientpositive/cbo_rp_auto_join17.q.out | 118 +
.../cbo_rp_cross_product_check_2.q.out | 699 +
.../clientpositive/correlationoptimizer2.q.out | 220 +-
.../clientpositive/correlationoptimizer6.q.out | 232 +-
ql/src/test/results/clientpositive/count.q.out | 14 +-
.../results/clientpositive/ctas_colname.q.out | 52 +-
.../test/results/clientpositive/database.q.out | 2 +-
.../clientpositive/decimal_precision.q.out | 4 +-
.../results/clientpositive/decimal_udf.q.out | 30 +-
.../results/clientpositive/distinct_stats.q.out | 14 +-
.../clientpositive/drop_table_with_index.q.out | 152 +
.../results/clientpositive/dynpart_merge.q.out | 99 +
.../dynpart_sort_opt_vectorization.q.out | 117 +-
.../dynpart_sort_optimization.q.out | 117 +-
...ryption_select_read_only_encrypted_tbl.q.out | 4 +-
.../clientpositive/exchgpartition2lel.q.out | 182 +
.../clientpositive/explain_dependency.q.out | 18 +-
.../clientpositive/explain_dependency2.q.out | 16 +-
.../clientpositive/explain_logical.q.out | 78 +-
.../clientpositive/fetch_aggregation.q.out | 4 +-
.../test/results/clientpositive/gby_star.q.out | 54 +-
.../test/results/clientpositive/groupby12.q.out | 6 +-
.../results/clientpositive/groupby5_map.q.out | 4 +-
.../clientpositive/groupby5_map_skew.q.out | 4 +-
.../results/clientpositive/groupby_cube1.q.out | 12 +-
.../groupby_distinct_samekey.q.out | 6 +-
.../clientpositive/groupby_grouping_sets2.q.out | 10 +-
.../clientpositive/groupby_grouping_sets3.q.out | 12 +-
.../clientpositive/groupby_grouping_sets5.q.out | 8 +-
.../clientpositive/groupby_grouping_sets6.q.out | 8 +-
.../clientpositive/groupby_join_pushdown.q.out | 1522 +
.../clientpositive/groupby_position.q.out | 36 +-
.../clientpositive/groupby_resolution.q.out | 60 +-
.../clientpositive/groupby_rollup1.q.out | 12 +-
.../clientpositive/groupby_sort_10.q.out | 8 +-
.../clientpositive/groupby_sort_11.q.out | 10 +-
.../results/clientpositive/groupby_sort_8.q.out | 12 +-
ql/src/test/results/clientpositive/having.q.out | 62 +-
.../test/results/clientpositive/having2.q.out | 12 +-
.../clientpositive/index_auto_mult_tables.q.out | 12 +-
.../clientpositive/index_auto_self_join.q.out | 12 +-
.../clientpositive/index_auto_update.q.out | 6 +-
.../index_bitmap_auto_partitioned.q.out | 6 +-
.../index_bitmap_compression.q.out | 6 +-
.../infer_bucket_sort_dyn_part.q.out | 4 +-
.../infer_bucket_sort_map_operators.q.out | 4 +-
.../results/clientpositive/input4.q.java1.7.out | 2 +-
.../results/clientpositive/input4.q.java1.8.out | 2 +-
.../results/clientpositive/join0.q.java1.7.out | 2 +-
.../results/clientpositive/join0.q.java1.8.out | 4 +-
ql/src/test/results/clientpositive/join18.q.out | 12 +-
.../clientpositive/join18_multi_distinct.q.out | 12 +-
ql/src/test/results/clientpositive/join31.q.out | 36 +-
.../limit_partition_metadataonly.q.out | 4 +-
.../results/clientpositive/limit_pushdown.q.out | 36 +-
.../test/results/clientpositive/lineage2.q.out | 2 +-
.../test/results/clientpositive/lineage3.q.out | 72 +-
.../list_bucket_dml_6.q.java1.7.out | 12 +-
.../list_bucket_dml_6.q.java1.8.out | 12 +-
.../clientpositive/list_bucket_dml_7.q.out | 12 +-
.../list_bucket_query_multiskew_3.q.out | 2 +-
.../results/clientpositive/load_orc_part.q.out | 26 +
.../clientpositive/mapjoin_mapjoin.q.out | 32 +-
.../clientpositive/metadata_only_queries.q.out | 4 +-
.../results/clientpositive/metadataonly1.q.out | 112 +-
.../results/clientpositive/multiMapJoin2.q.out | 226 +-
.../nonblock_op_deduplicate.q.out | 8 +-
.../results/clientpositive/nonmr_fetch.q.out | 14 +-
.../results/clientpositive/parallel_join0.q.out | 2 +-
.../parquet_mixed_partition_formats.q.out | 303 +
.../clientpositive/parquet_ppd_boolean.q.out | 194 +-
.../clientpositive/parquet_ppd_char.q.out | 224 +-
.../clientpositive/parquet_ppd_date.q.out | 324 +-
.../clientpositive/parquet_ppd_decimal.q.out | 594 +-
.../clientpositive/parquet_ppd_timestamp.q.out | 314 +-
.../clientpositive/parquet_ppd_varchar.q.out | 224 +-
.../parquet_predicate_pushdown.q.out | 118 +-
.../clientpositive/partition_multilevels.q.out | 8 +-
.../clientpositive/plan_json.q.java1.7.out | 2 +-
.../clientpositive/plan_json.q.java1.8.out | 2 +-
.../test/results/clientpositive/ppd_gby.q.out | 12 +-
.../test/results/clientpositive/ppd_gby2.q.out | 60 +-
.../clientpositive/ppd_join_filter.q.out | 98 +-
.../ql_rewrite_gbtoidx_cbo_1.q.out | 168 +-
.../ql_rewrite_gbtoidx_cbo_2.q.out | 94 +-
.../reduce_deduplicate_extended.q.out | 32 +-
.../clientpositive/selectDistinctStar.q.out | 44 +-
.../clientpositive/show_create_database.q.out | 19 +
.../results/clientpositive/show_functions.q.out | 1 +
.../clientpositive/spark/auto_join18.q.out | 10 +-
.../spark/auto_join18_multi_distinct.q.out | 12 +-
.../clientpositive/spark/auto_join27.q.out | 18 +-
.../clientpositive/spark/auto_join32.q.out | 53 +-
.../spark/bucket_map_join_tez1.q.out | 357 +
.../results/clientpositive/spark/count.q.out | 14 +-
.../clientpositive/spark/groupby5_map.q.out | 4 +-
.../spark/groupby5_map_skew.q.out | 4 +-
.../clientpositive/spark/groupby_cube1.q.out | 12 +-
.../clientpositive/spark/groupby_position.q.out | 18 +-
.../spark/groupby_resolution.q.out | 60 +-
.../clientpositive/spark/groupby_rollup1.q.out | 12 +-
.../results/clientpositive/spark/having.q.out | 62 +-
.../spark/infer_bucket_sort_map_operators.q.out | 4 +-
.../results/clientpositive/spark/join18.q.out | 10 +-
.../spark/join18_multi_distinct.q.out | 12 +-
.../results/clientpositive/spark/join31.q.out | 36 +-
.../spark/limit_partition_metadataonly.q.out | 4 +-
.../clientpositive/spark/limit_pushdown.q.out | 34 +-
.../clientpositive/spark/mapjoin_mapjoin.q.out | 24 +-
.../spark/metadata_only_queries.q.out | 4 +-
.../clientpositive/spark/ppd_join_filter.q.out | 90 +-
.../spark/ql_rewrite_gbtoidx_cbo_1.q.out | 168 +-
.../clientpositive/spark/stats_only_null.q.out | 8 +-
.../clientpositive/spark/subquery_in.q.out | 36 +-
.../results/clientpositive/spark/union11.q.out | 42 +-
.../results/clientpositive/spark/union14.q.out | 28 +-
.../results/clientpositive/spark/union15.q.out | 28 +-
.../results/clientpositive/spark/union28.q.out | 4 +-
.../results/clientpositive/spark/union30.q.out | 4 +-
.../results/clientpositive/spark/union33.q.out | 8 +-
.../results/clientpositive/spark/union5.q.out | 34 +-
.../results/clientpositive/spark/union7.q.out | 28 +-
.../clientpositive/spark/union_remove_21.q.out | 4 +-
.../spark/vector_count_distinct.q.out | 4 +-
.../spark/vector_decimal_aggregate.q.out | 12 +-
.../spark/vector_distinct_2.q.out | 28 +-
.../clientpositive/spark/vector_groupby_3.q.out | 30 +-
.../spark/vector_mapjoin_reduce.q.out | 36 +-
.../clientpositive/spark/vector_orderby_5.q.out | 6 +-
.../clientpositive/spark/vectorization_0.q.out | 16 +-
.../clientpositive/spark/vectorization_13.q.out | 32 +-
.../clientpositive/spark/vectorization_15.q.out | 16 +-
.../clientpositive/spark/vectorization_16.q.out | 16 +-
.../clientpositive/spark/vectorization_9.q.out | 16 +-
.../spark/vectorization_pushdown.q.out | 4 +-
.../spark/vectorization_short_regress.q.out | 74 +-
.../spark/vectorized_nested_mapjoin.q.out | 18 +-
.../spark/vectorized_timestamp_funcs.q.out | 12 +-
.../clientpositive/stats_only_null.q.out | 8 +-
.../results/clientpositive/stats_ppr_all.q.out | 16 +-
.../subq_where_serialization.q.out | 18 +-
.../clientpositive/subquery_exists_having.q.out | 48 +-
.../results/clientpositive/subquery_in.q.out | 36 +-
.../clientpositive/subquery_in_having.q.out | 260 +-
.../clientpositive/subquery_notexists.q.out | 18 +-
.../subquery_notexists_having.q.out | 26 +-
.../results/clientpositive/subquery_notin.q.out | 24 +-
.../subquery_notin_having.q.java1.7.out | 50 +-
.../subquery_unqualcolumnrefs.q.out | 74 +-
.../results/clientpositive/subquery_views.q.out | 124 +-
.../tez/bucket_map_join_tez1.q.out | 333 +
.../clientpositive/tez/constprog_dpp.q.out | 4 +-
.../test/results/clientpositive/tez/count.q.out | 14 +-
.../tez/dynamic_partition_pruning.q.out | 88 +-
.../tez/dynpart_sort_opt_vectorization.q.out | 102 +-
.../tez/dynpart_sort_optimization.q.out | 101 +-
.../clientpositive/tez/explainuser_1.q.out | 2799 +-
.../clientpositive/tez/explainuser_2.q.out | 4004 +-
.../clientpositive/tez/explainuser_3.q.out | 10 +-
.../results/clientpositive/tez/having.q.out | 62 +-
.../clientpositive/tez/limit_pushdown.q.out | 34 +-
.../clientpositive/tez/mapjoin_mapjoin.q.out | 24 +-
.../tez/metadata_only_queries.q.out | 4 +-
.../clientpositive/tez/metadataonly1.q.out | 44 +-
.../test/results/clientpositive/tez/mrr.q.out | 94 +-
.../clientpositive/tez/selectDistinctStar.q.out | 44 +-
.../tez/show_create_database.q.out | 19 +
.../clientpositive/tez/stats_only_null.q.out | 8 +-
.../clientpositive/tez/subquery_in.q.out | 36 +-
.../results/clientpositive/tez/tez_dml.q.out | 6 +-
.../results/clientpositive/tez/union5.q.out | 44 +-
.../results/clientpositive/tez/union7.q.out | 28 +-
.../clientpositive/tez/unionDistinct_1.q.out | 8 +-
.../clientpositive/tez/union_fast_stats.q.out | 526 +
.../clientpositive/tez/vector_aggregate_9.q.out | 4 +-
.../tez/vector_auto_smb_mapjoin_14.q.out | 1576 +
.../tez/vector_binary_join_groupby.q.out | 4 +-
.../clientpositive/tez/vector_char_cast.q.out | 35 +
.../tez/vector_count_distinct.q.out | 4 +-
.../tez/vector_decimal_aggregate.q.out | 12 +-
.../tez/vector_decimal_precision.q.out | 4 +-
.../clientpositive/tez/vector_decimal_udf.q.out | 30 +-
.../clientpositive/tez/vector_distinct_2.q.out | 28 +-
.../clientpositive/tez/vector_groupby_3.q.out | 30 +-
.../tez/vector_groupby_reduce.q.out | 1460 +-
.../tez/vector_grouping_sets.q.out | 8 +-
.../tez/vector_mapjoin_reduce.q.out | 36 +-
.../clientpositive/tez/vector_orderby_5.q.out | 6 +-
.../clientpositive/tez/vector_outer_join2.q.out | 20 +-
.../tez/vector_partition_diff_num_cols.q.out | 20 +-
.../tez/vector_partitioned_date_time.q.out | 12 +-
.../tez/vector_reduce_groupby_decimal.q.out | 24 +-
.../clientpositive/tez/vectorization_0.q.out | 16 +-
.../clientpositive/tez/vectorization_13.q.out | 32 +-
.../clientpositive/tez/vectorization_15.q.out | 16 +-
.../clientpositive/tez/vectorization_16.q.out | 16 +-
.../clientpositive/tez/vectorization_9.q.out | 16 +-
.../tez/vectorization_limit.q.out | 22 +-
.../tez/vectorization_pushdown.q.out | 4 +-
.../tez/vectorization_short_regress.q.out | 74 +-
.../tez/vectorized_distinct_gby.q.out | 8 +-
.../vectorized_dynamic_partition_pruning.q.out | 88 +-
.../tez/vectorized_nested_mapjoin.q.out | 18 +-
.../clientpositive/tez/vectorized_parquet.q.out | 6 +-
.../tez/vectorized_timestamp_funcs.q.out | 12 +-
ql/src/test/results/clientpositive/udf8.q.out | 4 +-
.../test/results/clientpositive/udf_count.q.out | 16 +-
.../test/results/clientpositive/union11.q.out | 70 +-
.../test/results/clientpositive/union14.q.out | 32 +-
.../test/results/clientpositive/union15.q.out | 38 +-
.../test/results/clientpositive/union28.q.out | 8 +-
.../test/results/clientpositive/union30.q.out | 8 +-
.../test/results/clientpositive/union33.q.out | 8 +-
ql/src/test/results/clientpositive/union5.q.out | 48 +-
ql/src/test/results/clientpositive/union7.q.out | 32 +-
.../clientpositive/unionDistinct_1.q.out | 8 +-
.../clientpositive/union_fast_stats.q.out | 526 +
.../clientpositive/union_remove_21.q.out | 8 +-
.../clientpositive/vector_aggregate_9.q.out | 4 +-
.../vector_aggregate_without_gby.q.out | 4 +-
.../vector_auto_smb_mapjoin_14.q.out | 1792 +
.../vector_binary_join_groupby.q.out | 4 +-
.../clientpositive/vector_char_cast.q.out | 35 +
.../clientpositive/vector_count_distinct.q.out | 6 +-
.../vector_decimal_aggregate.q.out | 12 +-
.../vector_decimal_precision.q.out | 4 +-
.../clientpositive/vector_decimal_udf.q.out | 30 +-
.../clientpositive/vector_distinct_2.q.out | 28 +-
.../clientpositive/vector_groupby_3.q.out | 30 +-
.../clientpositive/vector_groupby_reduce.q.out | 1474 +-
.../clientpositive/vector_grouping_sets.q.out | 8 +-
.../clientpositive/vector_left_outer_join.q.out | 8 +-
.../clientpositive/vector_mapjoin_reduce.q.out | 36 +-
.../clientpositive/vector_orderby_5.q.out | 6 +-
.../clientpositive/vector_outer_join1.q.out | 8 +-
.../clientpositive/vector_outer_join2.q.out | 28 +-
.../clientpositive/vector_outer_join3.q.out | 24 +-
.../clientpositive/vector_outer_join4.q.out | 8 +-
.../clientpositive/vector_outer_join5.q.out | 48 +-
.../vector_partition_diff_num_cols.q.out | 20 +-
.../vector_partitioned_date_time.q.out | 12 +-
.../vector_reduce_groupby_decimal.q.out | 24 +-
.../clientpositive/vector_struct_in.q.out | 825 +
.../clientpositive/vectorization_0.q.out | 16 +-
.../clientpositive/vectorization_13.q.out | 32 +-
.../clientpositive/vectorization_15.q.out | 16 +-
.../clientpositive/vectorization_16.q.out | 16 +-
.../clientpositive/vectorization_9.q.out | 16 +-
.../clientpositive/vectorization_limit.q.out | 24 +-
.../clientpositive/vectorization_pushdown.q.out | 4 +-
.../vectorization_short_regress.q.out | 74 +-
.../vectorized_distinct_gby.q.out | 12 +-
.../vectorized_nested_mapjoin.q.out | 26 +-
.../clientpositive/vectorized_parquet.q.out | 6 +-
.../vectorized_parquet_types.q.out | 6 +-
.../vectorized_timestamp_funcs.q.out | 12 +-
.../results/clientpositive/windowing_udaf.q.out | 12 +
ql/src/test/templates/TestCliDriver.vm | 3 +-
.../hadoop/hive/serde/test/InnerStruct.java | 2 +-
.../hadoop/hive/serde/test/ThriftTestObj.java | 2 +-
.../hadoop/hive/serde2/thrift/test/Complex.java | 2 +-
.../hive/serde2/thrift/test/IntString.java | 2 +-
.../hive/serde2/thrift/test/MegaStruct.java | 2 +-
.../hive/serde2/thrift/test/MiniStruct.java | 2 +-
.../hive/serde2/thrift/test/SetIntString.java | 2 +-
.../hive/serde2/ColumnProjectionUtils.java | 22 +
.../apache/hadoop/hive/serde2/WriteBuffers.java | 10 +-
.../BinarySortableSerDeWithEndPrefix.java | 41 +
.../hadoop/hive/serde2/lazy/LazyByte.java | 4 +
.../hadoop/hive/serde2/lazy/LazyDouble.java | 4 +
.../hadoop/hive/serde2/lazy/LazyFloat.java | 4 +
.../hadoop/hive/serde2/lazy/LazyInteger.java | 4 +
.../hadoop/hive/serde2/lazy/LazyLong.java | 4 +
.../hadoop/hive/serde2/lazy/LazyShort.java | 4 +
.../hadoop/hive/serde2/lazy/LazyUtils.java | 28 +
.../hadoop/hive/service/HiveClusterStatus.java | 2 +-
.../hive/service/HiveServerException.java | 2 +-
.../apache/hadoop/hive/service/ThriftHive.java | 2 +-
.../service/cli/thrift/TArrayTypeEntry.java | 2 +-
.../hive/service/cli/thrift/TBinaryColumn.java | 2 +-
.../hive/service/cli/thrift/TBoolColumn.java | 2 +-
.../hive/service/cli/thrift/TBoolValue.java | 2 +-
.../hive/service/cli/thrift/TByteColumn.java | 2 +-
.../hive/service/cli/thrift/TByteValue.java | 2 +-
.../hive/service/cli/thrift/TCLIService.java | 2 +-
.../cli/thrift/TCancelDelegationTokenReq.java | 2 +-
.../cli/thrift/TCancelDelegationTokenResp.java | 2 +-
.../service/cli/thrift/TCancelOperationReq.java | 2 +-
.../cli/thrift/TCancelOperationResp.java | 2 +-
.../service/cli/thrift/TCloseOperationReq.java | 2 +-
.../service/cli/thrift/TCloseOperationResp.java | 2 +-
.../service/cli/thrift/TCloseSessionReq.java | 2 +-
.../service/cli/thrift/TCloseSessionResp.java | 2 +-
.../hive/service/cli/thrift/TColumnDesc.java | 2 +-
.../hive/service/cli/thrift/TDoubleColumn.java | 2 +-
.../hive/service/cli/thrift/TDoubleValue.java | 2 +-
.../cli/thrift/TExecuteStatementReq.java | 2 +-
.../cli/thrift/TExecuteStatementResp.java | 2 +-
.../service/cli/thrift/TFetchResultsReq.java | 2 +-
.../service/cli/thrift/TFetchResultsResp.java | 2 +-
.../service/cli/thrift/TGetCatalogsReq.java | 2 +-
.../service/cli/thrift/TGetCatalogsResp.java | 2 +-
.../hive/service/cli/thrift/TGetColumnsReq.java | 2 +-
.../service/cli/thrift/TGetColumnsResp.java | 2 +-
.../cli/thrift/TGetDelegationTokenReq.java | 2 +-
.../cli/thrift/TGetDelegationTokenResp.java | 2 +-
.../service/cli/thrift/TGetFunctionsReq.java | 2 +-
.../service/cli/thrift/TGetFunctionsResp.java | 2 +-
.../hive/service/cli/thrift/TGetInfoReq.java | 2 +-
.../hive/service/cli/thrift/TGetInfoResp.java | 2 +-
.../cli/thrift/TGetOperationStatusReq.java | 2 +-
.../cli/thrift/TGetOperationStatusResp.java | 2 +-
.../cli/thrift/TGetResultSetMetadataReq.java | 2 +-
.../cli/thrift/TGetResultSetMetadataResp.java | 2 +-
.../hive/service/cli/thrift/TGetSchemasReq.java | 2 +-
.../service/cli/thrift/TGetSchemasResp.java | 2 +-
.../service/cli/thrift/TGetTableTypesReq.java | 2 +-
.../service/cli/thrift/TGetTableTypesResp.java | 2 +-
.../hive/service/cli/thrift/TGetTablesReq.java | 2 +-
.../hive/service/cli/thrift/TGetTablesResp.java | 2 +-
.../service/cli/thrift/TGetTypeInfoReq.java | 2 +-
.../service/cli/thrift/TGetTypeInfoResp.java | 2 +-
.../service/cli/thrift/THandleIdentifier.java | 2 +-
.../hive/service/cli/thrift/TI16Column.java | 2 +-
.../hive/service/cli/thrift/TI16Value.java | 2 +-
.../hive/service/cli/thrift/TI32Column.java | 2 +-
.../hive/service/cli/thrift/TI32Value.java | 2 +-
.../hive/service/cli/thrift/TI64Column.java | 2 +-
.../hive/service/cli/thrift/TI64Value.java | 2 +-
.../hive/service/cli/thrift/TMapTypeEntry.java | 2 +-
.../service/cli/thrift/TOpenSessionReq.java | 2 +-
.../service/cli/thrift/TOpenSessionResp.java | 2 +-
.../service/cli/thrift/TOperationHandle.java | 2 +-
.../service/cli/thrift/TPrimitiveTypeEntry.java | 2 +-
.../cli/thrift/TRenewDelegationTokenReq.java | 2 +-
.../cli/thrift/TRenewDelegationTokenResp.java | 2 +-
.../apache/hive/service/cli/thrift/TRow.java | 2 +-
.../apache/hive/service/cli/thrift/TRowSet.java | 2 +-
.../hive/service/cli/thrift/TSessionHandle.java | 2 +-
.../apache/hive/service/cli/thrift/TStatus.java | 2 +-
.../hive/service/cli/thrift/TStringColumn.java | 2 +-
.../hive/service/cli/thrift/TStringValue.java | 2 +-
.../service/cli/thrift/TStructTypeEntry.java | 2 +-
.../hive/service/cli/thrift/TTableSchema.java | 2 +-
.../hive/service/cli/thrift/TTypeDesc.java | 2 +-
.../service/cli/thrift/TTypeQualifiers.java | 2 +-
.../service/cli/thrift/TUnionTypeEntry.java | 2 +-
.../cli/thrift/TUserDefinedTypeEntry.java | 2 +-
.../gen-py/hive_service/ThriftHive-remote | 49 +-
.../auth/LdapAuthenticationProviderImpl.java | 82 +-
.../org/apache/hive/service/cli/Column.java | 2 +-
.../thrift/RetryingThriftCLIServiceClient.java | 331 +
.../org/apache/hive/service/cli/TestColumn.java | 129 +
.../cli/TestRetryingThriftCLIServiceClient.java | 133 +
.../hadoop/hive/shims/Hadoop20SShims.java | 5 +-
shims/0.23/pom.xml | 1 -
.../apache/hadoop/hive/shims/Hadoop23Shims.java | 27 +-
.../hive/thrift/HadoopThriftAuthBridge.java | 3 +
.../hive/ql/io/sarg/SearchArgumentFactory.java | 5 +-
.../hive/ql/io/sarg/SearchArgumentImpl.java | 7 +-
testutils/ptest2/src/main/resources/log4j2.xml | 5 +-
821 files changed, 105344 insertions(+), 16997 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6b3e82d3/bin/beeline
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6b3e82d3/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6b3e82d3/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------
[41/50] [abbrv] hive git commit: HIVE-11911 : The stats table limits
are too large for innodb (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Posted by xu...@apache.org.
HIVE-11911 : The stats table limits are too large for innodb (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f73157fe
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f73157fe
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f73157fe
Branch: refs/heads/beeline-cli
Commit: f73157fe45a0c9ea7efeef11ca1c02e47136a63c
Parents: cdc65dc
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 23 14:39:23 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 23 14:39:23 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java | 13 +++++++++++--
.../hive/ql/stats/jdbc/JDBCStatsSetupConstants.java | 4 ++--
2 files changed, 13 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f73157fe/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
index 4228957..aeb3d27 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
@@ -289,7 +289,16 @@ public class JDBCStatsPublisher implements StatsPublisher {
boolean tblExists = rs.next();
if (!tblExists) { // Table does not exist, create it
String createTable = JDBCStatsUtils.getCreate("");
- stmt.executeUpdate(createTable);
+ try {
+ stmt.executeUpdate(createTable);
+ } catch (SQLException ex) {
+ String msg = ex.getMessage();
+ if (msg != null && msg.contains("Specified key was too long")) {
+ throw new RuntimeException(msg + "; try using innodb with "
+ + "Barracuda file format and innodb_large_prefix", ex);
+ }
+ throw ex;
+ }
} else {
// Upgrade column name to allow for longer paths.
String idColName = JDBCStatsUtils.getIdColumnName();
@@ -301,7 +310,7 @@ public class JDBCStatsPublisher implements StatsPublisher {
colSize = rs.getInt("COLUMN_SIZE");
if (colSize < JDBCStatsSetupConstants.ID_COLUMN_VARCHAR_SIZE) {
String alterTable = JDBCStatsUtils.getAlterIdColumn();
- stmt.executeUpdate(alterTable);
+ stmt.executeUpdate(alterTable);
}
} else {
LOG.warn("Failed to update " + idColName + " - column not found");
http://git-wip-us.apache.org/repos/asf/hive/blob/f73157fe/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
index 17e109a..e39fc5b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
@@ -34,6 +34,6 @@ public final class JDBCStatsSetupConstants {
public static final String PART_STAT_RAW_DATA_SIZE_COLUMN_NAME = "RAW_DATA_SIZE";
- // MySQL - 65535, SQL Server - 8000, Oracle - 4000, Derby - 32762, Postgres - large.
- public static final int ID_COLUMN_VARCHAR_SIZE = 4000;
+ // MySQL - 3072/3 (innodb+utf8), SQL Server - 8000, Oracle - 4000, Derby - 32762, Postgres - large.
+ public static final int ID_COLUMN_VARCHAR_SIZE = 1000;
}
[19/50] [abbrv] hive git commit: HIVE-11843: Add 'sort by c' to
Parquet PPD q-tests to avoid different output issues with hadoop-1 (Sergio
Pena, reviewed by Ferdinand Xu)
Posted by xu...@apache.org.
HIVE-11843: Add 'sort by c' to Parquet PPD q-tests to avoid different output issues with hadoop-1 (Sergio Pena, reviewed by Ferdinand Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/92b42ae9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/92b42ae9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/92b42ae9
Branch: refs/heads/beeline-cli
Commit: 92b42ae9efaa2da352c123a0bb74adf2d3ea267d
Parents: 2186159
Author: Sergio Pena <se...@cloudera.com>
Authored: Mon Sep 21 10:06:10 2015 -0500
Committer: Sergio Pena <se...@cloudera.com>
Committed: Mon Sep 21 10:06:10 2015 -0500
----------------------------------------------------------------------
.../clientpositive/parquet_ppd_boolean.q | 4 +-
.../queries/clientpositive/parquet_ppd_char.q | 12 +-
.../queries/clientpositive/parquet_ppd_date.q | 16 +-
.../clientpositive/parquet_ppd_decimal.q | 32 +--
.../clientpositive/parquet_ppd_timestamp.q | 16 +-
.../clientpositive/parquet_ppd_varchar.q | 12 +-
.../clientpositive/parquet_ppd_boolean.q.out | 28 +--
.../clientpositive/parquet_ppd_char.q.out | 84 +++----
.../clientpositive/parquet_ppd_date.q.out | 112 +++++-----
.../clientpositive/parquet_ppd_decimal.q.out | 224 +++++++++----------
.../clientpositive/parquet_ppd_timestamp.q.out | 112 +++++-----
.../clientpositive/parquet_ppd_varchar.q.out | 84 +++----
12 files changed, 368 insertions(+), 368 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q
index a7848b4..059da68 100644
--- a/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q
@@ -12,7 +12,7 @@ select * from newtypestbl where b=true;
select * from newtypestbl where b!=true;
select * from newtypestbl where b<true;
select * from newtypestbl where b>true;
-select * from newtypestbl where b<=true;
+select * from newtypestbl where b<=true sort by c;
select * from newtypestbl where b=false;
select * from newtypestbl where b!=false;
@@ -26,7 +26,7 @@ select * from newtypestbl where b=true;
select * from newtypestbl where b!=true;
select * from newtypestbl where b<true;
select * from newtypestbl where b>true;
-select * from newtypestbl where b<=true;
+select * from newtypestbl where b<=true sort by c;
select * from newtypestbl where b=false;
select * from newtypestbl where b!=false;
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/queries/clientpositive/parquet_ppd_char.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_char.q b/ql/src/test/queries/clientpositive/parquet_ppd_char.q
index dcad622..eaddcb4 100644
--- a/ql/src/test/queries/clientpositive/parquet_ppd_char.q
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_char.q
@@ -28,10 +28,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where c<"hello";
set hive.optimize.index.filter=false;
-select * from newtypestbl where c<="hello";
+select * from newtypestbl where c<="hello" sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where c<="hello";
+select * from newtypestbl where c<="hello" sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where c="apple ";
@@ -46,10 +46,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where c in ("apple", "carrot");
set hive.optimize.index.filter=false;
-select * from newtypestbl where c in ("apple", "hello");
+select * from newtypestbl where c in ("apple", "hello") sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where c in ("apple", "hello");
+select * from newtypestbl where c in ("apple", "hello") sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where c in ("carrot");
@@ -64,10 +64,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where c between "apple" and "carrot";
set hive.optimize.index.filter=false;
-select * from newtypestbl where c between "apple" and "zombie";
+select * from newtypestbl where c between "apple" and "zombie" sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where c between "apple" and "zombie";
+select * from newtypestbl where c between "apple" and "zombie" sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where c between "carrot" and "carrot1";
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/queries/clientpositive/parquet_ppd_date.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_date.q b/ql/src/test/queries/clientpositive/parquet_ppd_date.q
index a05d358..41d0d64 100644
--- a/ql/src/test/queries/clientpositive/parquet_ppd_date.q
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_date.q
@@ -41,10 +41,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where da<'1970-02-27';
set hive.optimize.index.filter=false;
-select * from newtypestbl where da<'1970-02-29';
+select * from newtypestbl where da<'1970-02-29' sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where da<'1970-02-29';
+select * from newtypestbl where da<'1970-02-29' sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where da<'1970-02-15';
@@ -59,10 +59,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where da<='1970-02-20';
set hive.optimize.index.filter=false;
-select * from newtypestbl where da<='1970-02-27';
+select * from newtypestbl where da<='1970-02-27' sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where da<='1970-02-27';
+select * from newtypestbl where da<='1970-02-27' sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date));
@@ -71,10 +71,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date));
set hive.optimize.index.filter=false;
-select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date));
+select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date)) sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date));
+select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date)) sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date));
@@ -89,10 +89,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where da between '1970-02-19' and '1970-02-22';
set hive.optimize.index.filter=false;
-select * from newtypestbl where da between '1970-02-19' and '1970-02-28';
+select * from newtypestbl where da between '1970-02-19' and '1970-02-28' sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where da between '1970-02-19' and '1970-02-28';
+select * from newtypestbl where da between '1970-02-19' and '1970-02-28' sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where da between '1970-02-18' and '1970-02-19';
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q
index cf7cba0..dfca486 100644
--- a/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q
@@ -67,22 +67,22 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where d<1;
set hive.optimize.index.filter=false;
-select * from newtypestbl where d<=11.22;
+select * from newtypestbl where d<=11.22 sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where d<=11.22;
+select * from newtypestbl where d<=11.22 sort by c;
set hive.optimize.index.filter=false;
-select * from newtypestbl where d<='11.22';
+select * from newtypestbl where d<='11.22' sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where d<='11.22';
+select * from newtypestbl where d<='11.22' sort by c;
set hive.optimize.index.filter=false;
-select * from newtypestbl where d<=cast('11.22' as float);
+select * from newtypestbl where d<=cast('11.22' as float) sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where d<=cast('11.22' as float);
+select * from newtypestbl where d<=cast('11.22' as float) sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where d<=cast('11.22' as decimal);
@@ -91,16 +91,16 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where d<=cast('11.22' as decimal);
set hive.optimize.index.filter=false;
-select * from newtypestbl where d<=11.22BD;
+select * from newtypestbl where d<=11.22BD sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where d<=11.22BD;
+select * from newtypestbl where d<=11.22BD sort by c;
set hive.optimize.index.filter=false;
-select * from newtypestbl where d<=12;
+select * from newtypestbl where d<=12 sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where d<=12;
+select * from newtypestbl where d<=12 sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where d in ('0.22', '1.0');
@@ -109,10 +109,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where d in ('0.22', '1.0');
set hive.optimize.index.filter=false;
-select * from newtypestbl where d in ('0.22', '11.22');
+select * from newtypestbl where d in ('0.22', '11.22') sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where d in ('0.22', '11.22');
+select * from newtypestbl where d in ('0.22', '11.22') sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where d in ('0.9', '1.0');
@@ -127,10 +127,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where d in ('0.9', 0.22);
set hive.optimize.index.filter=false;
-select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float));
+select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float)) sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float));
+select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float)) sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where d between 0 and 1;
@@ -139,10 +139,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where d between 0 and 1;
set hive.optimize.index.filter=false;
-select * from newtypestbl where d between 0 and 1000;
+select * from newtypestbl where d between 0 and 1000 sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where d between 0 and 1000;
+select * from newtypestbl where d between 0 and 1000 sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where d between 0 and '2.0';
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q
index 6ed1e55..1b9f6ff 100644
--- a/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q
@@ -38,10 +38,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp);
set hive.optimize.index.filter=false;
-select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp);
+select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp) sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp);
+select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp) sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp);
@@ -56,10 +56,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp);
set hive.optimize.index.filter=false;
-select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp);
+select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp) sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp);
+select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp) sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
@@ -68,10 +68,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
set hive.optimize.index.filter=false;
-select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
+select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp)) sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp));
+select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp)) sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp));
@@ -86,10 +86,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp);
set hive.optimize.index.filter=false;
-select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp);
+select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp) sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp);
+select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp) sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp);
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q
index 41bf7df..6449c6d 100644
--- a/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q
@@ -28,10 +28,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where v<"world";
set hive.optimize.index.filter=false;
-select * from newtypestbl where v<="world";
+select * from newtypestbl where v<="world" sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where v<="world";
+select * from newtypestbl where v<="world" sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where v="bee ";
@@ -46,10 +46,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where v in ("bee", "orange");
set hive.optimize.index.filter=false;
-select * from newtypestbl where v in ("bee", "world");
+select * from newtypestbl where v in ("bee", "world") sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where v in ("bee", "world");
+select * from newtypestbl where v in ("bee", "world") sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where v in ("orange");
@@ -64,10 +64,10 @@ set hive.optimize.index.filter=true;
select * from newtypestbl where v between "bee" and "orange";
set hive.optimize.index.filter=false;
-select * from newtypestbl where v between "bee" and "zombie";
+select * from newtypestbl where v between "bee" and "zombie" sort by c;
set hive.optimize.index.filter=true;
-select * from newtypestbl where v between "bee" and "zombie";
+select * from newtypestbl where v between "bee" and "zombie" sort by c;
set hive.optimize.index.filter=false;
select * from newtypestbl where v between "orange" and "pine";
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out
index 51ea879..1355849 100644
--- a/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out
+++ b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out
@@ -65,24 +65,24 @@ POSTHOOK: query: select * from newtypestbl where b>true
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-PREHOOK: query: select * from newtypestbl where b<=true
+PREHOOK: query: select * from newtypestbl where b<=true sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where b<=true
+POSTHOOK: query: select * from newtypestbl where b<=true sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 false
apple bee 0.22 true
-hello world 11.22 false
apple bee 0.22 true
-hello world 11.22 false
apple bee 0.22 true
-hello world 11.22 false
apple bee 0.22 true
-hello world 11.22 false
apple bee 0.22 true
+hello world 11.22 false
+hello world 11.22 false
+hello world 11.22 false
+hello world 11.22 false
+hello world 11.22 false
PREHOOK: query: select * from newtypestbl where b=false
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -190,24 +190,24 @@ POSTHOOK: query: select * from newtypestbl where b>true
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-PREHOOK: query: select * from newtypestbl where b<=true
+PREHOOK: query: select * from newtypestbl where b<=true sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where b<=true
+POSTHOOK: query: select * from newtypestbl where b<=true sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 false
apple bee 0.22 true
-hello world 11.22 false
apple bee 0.22 true
-hello world 11.22 false
apple bee 0.22 true
-hello world 11.22 false
apple bee 0.22 true
-hello world 11.22 false
apple bee 0.22 true
+hello world 11.22 false
+hello world 11.22 false
+hello world 11.22 false
+hello world 11.22 false
+hello world 11.22 false
PREHOOK: query: select * from newtypestbl where b=false
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
index af4a13c..f224870 100644
--- a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
+++ b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
@@ -98,42 +98,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where c<="hello"
+PREHOOK: query: select * from newtypestbl where c<="hello" sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where c<="hello"
+POSTHOOK: query: select * from newtypestbl where c<="hello" sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where c<="hello"
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where c<="hello" sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where c<="hello"
+POSTHOOK: query: select * from newtypestbl where c<="hello" sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where c="apple "
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -176,42 +176,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where c in ("apple", "hello")
+PREHOOK: query: select * from newtypestbl where c in ("apple", "hello") sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where c in ("apple", "hello")
+POSTHOOK: query: select * from newtypestbl where c in ("apple", "hello") sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where c in ("apple", "hello")
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where c in ("apple", "hello") sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where c in ("apple", "hello")
+POSTHOOK: query: select * from newtypestbl where c in ("apple", "hello") sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where c in ("carrot")
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -254,42 +254,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where c between "apple" and "zombie"
+PREHOOK: query: select * from newtypestbl where c between "apple" and "zombie" sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where c between "apple" and "zombie"
+POSTHOOK: query: select * from newtypestbl where c between "apple" and "zombie" sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where c between "apple" and "zombie"
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where c between "apple" and "zombie" sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where c between "apple" and "zombie"
+POSTHOOK: query: select * from newtypestbl where c between "apple" and "zombie" sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where c between "carrot" and "carrot1"
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/results/clientpositive/parquet_ppd_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out
index 60c9a59..e599014 100644
--- a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out
+++ b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out
@@ -163,42 +163,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where da<'1970-02-29'
+PREHOOK: query: select * from newtypestbl where da<'1970-02-29' sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where da<'1970-02-29'
+POSTHOOK: query: select * from newtypestbl where da<'1970-02-29' sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where da<'1970-02-29'
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where da<'1970-02-29' sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where da<'1970-02-29'
+POSTHOOK: query: select * from newtypestbl where da<'1970-02-29' sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where da<'1970-02-15'
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -241,42 +241,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where da<='1970-02-27'
+PREHOOK: query: select * from newtypestbl where da<='1970-02-27' sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where da<='1970-02-27'
+POSTHOOK: query: select * from newtypestbl where da<='1970-02-27' sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where da<='1970-02-27'
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where da<='1970-02-27' sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where da<='1970-02-27'
+POSTHOOK: query: select * from newtypestbl where da<='1970-02-27' sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-27' as date))
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -303,42 +303,42 @@ hello world 11.22 1970-02-27
hello world 11.22 1970-02-27
hello world 11.22 1970-02-27
hello world 11.22 1970-02-27
-PREHOOK: query: select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+PREHOOK: query: select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date)) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+POSTHOOK: query: select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date)) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date)) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date))
+POSTHOOK: query: select * from newtypestbl where da in (cast('1970-02-20' as date), cast('1970-02-27' as date)) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where da in (cast('1970-02-21' as date), cast('1970-02-22' as date))
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -381,42 +381,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where da between '1970-02-19' and '1970-02-28'
+PREHOOK: query: select * from newtypestbl where da between '1970-02-19' and '1970-02-28' sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where da between '1970-02-19' and '1970-02-28'
+POSTHOOK: query: select * from newtypestbl where da between '1970-02-19' and '1970-02-28' sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where da between '1970-02-19' and '1970-02-28'
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where da between '1970-02-19' and '1970-02-28' sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where da between '1970-02-19' and '1970-02-28'
+POSTHOOK: query: select * from newtypestbl where da between '1970-02-19' and '1970-02-28' sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where da between '1970-02-18' and '1970-02-19'
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out
index ec603eb..7c17733 100644
--- a/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out
+++ b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out
@@ -280,114 +280,114 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d<=11.22
+PREHOOK: query: select * from newtypestbl where d<=11.22 sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d<=11.22
+POSTHOOK: query: select * from newtypestbl where d<=11.22 sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d<=11.22
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d<=11.22 sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d<=11.22
+POSTHOOK: query: select * from newtypestbl where d<=11.22 sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d<='11.22'
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d<='11.22' sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d<='11.22'
+POSTHOOK: query: select * from newtypestbl where d<='11.22' sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d<='11.22'
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d<='11.22' sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d<='11.22'
+POSTHOOK: query: select * from newtypestbl where d<='11.22' sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d<=cast('11.22' as float)
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d<=cast('11.22' as float) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d<=cast('11.22' as float)
+POSTHOOK: query: select * from newtypestbl where d<=cast('11.22' as float) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d<=cast('11.22' as float)
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d<=cast('11.22' as float) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d<=cast('11.22' as float)
+POSTHOOK: query: select * from newtypestbl where d<=cast('11.22' as float) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where d<=cast('11.22' as decimal)
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -414,78 +414,78 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d<=11.22BD
+PREHOOK: query: select * from newtypestbl where d<=11.22BD sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d<=11.22BD
+POSTHOOK: query: select * from newtypestbl where d<=11.22BD sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d<=11.22BD
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d<=11.22BD sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d<=11.22BD
+POSTHOOK: query: select * from newtypestbl where d<=11.22BD sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d<=12
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d<=12 sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d<=12
+POSTHOOK: query: select * from newtypestbl where d<=12 sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d<=12
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d<=12 sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d<=12
+POSTHOOK: query: select * from newtypestbl where d<=12 sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where d in ('0.22', '1.0')
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -512,42 +512,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d in ('0.22', '11.22')
+PREHOOK: query: select * from newtypestbl where d in ('0.22', '11.22') sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d in ('0.22', '11.22')
+POSTHOOK: query: select * from newtypestbl where d in ('0.22', '11.22') sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d in ('0.22', '11.22')
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d in ('0.22', '11.22') sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d in ('0.22', '11.22')
+POSTHOOK: query: select * from newtypestbl where d in ('0.22', '11.22') sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where d in ('0.9', '1.0')
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -590,42 +590,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+PREHOOK: query: select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float)) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+POSTHOOK: query: select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float)) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float)) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float))
+POSTHOOK: query: select * from newtypestbl where d in ('0.9', 0.22, cast('11.22' as float)) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where d between 0 and 1
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -652,42 +652,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d between 0 and 1000
+PREHOOK: query: select * from newtypestbl where d between 0 and 1000 sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d between 0 and 1000
+POSTHOOK: query: select * from newtypestbl where d between 0 and 1000 sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where d between 0 and 1000
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where d between 0 and 1000 sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where d between 0 and 1000
+POSTHOOK: query: select * from newtypestbl where d between 0 and 1000 sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where d between 0 and '2.0'
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
index 3693879..e314c10 100644
--- a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
@@ -150,42 +150,42 @@ apple bee 0.22 2011-01-01 01:01:01
apple bee 0.22 2011-01-01 01:01:01
apple bee 0.22 2011-01-01 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-PREHOOK: query: select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+PREHOOK: query: select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+POSTHOOK: query: select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-PREHOOK: query: select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+PREHOOK: query: select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
+POSTHOOK: query: select * from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
PREHOOK: query: select * from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -228,42 +228,42 @@ apple bee 0.22 2011-01-01 01:01:01
apple bee 0.22 2011-01-01 01:01:01
apple bee 0.22 2011-01-01 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-PREHOOK: query: select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+PREHOOK: query: select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+POSTHOOK: query: select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-PREHOOK: query: select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+PREHOOK: query: select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
+POSTHOOK: query: select * from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
PREHOOK: query: select * from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -290,42 +290,42 @@ hello world 11.22 2011-01-20 01:01:01
hello world 11.22 2011-01-20 01:01:01
hello world 11.22 2011-01-20 01:01:01
hello world 11.22 2011-01-20 01:01:01
-PREHOOK: query: select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+PREHOOK: query: select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp)) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+POSTHOOK: query: select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp)) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-PREHOOK: query: select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+PREHOOK: query: select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp)) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
+POSTHOOK: query: select * from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp)) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
PREHOOK: query: select * from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -368,42 +368,42 @@ apple bee 0.22 2011-01-01 01:01:01
apple bee 0.22 2011-01-01 01:01:01
apple bee 0.22 2011-01-01 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-PREHOOK: query: select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+PREHOOK: query: select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+POSTHOOK: query: select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-PREHOOK: query: select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+PREHOOK: query: select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp) sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
+POSTHOOK: query: select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp) sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
-hello world 11.22 2011-01-20 01:01:01
apple bee 0.22 2011-01-01 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
+hello world 11.22 2011-01-20 01:01:01
PREHOOK: query: select * from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
http://git-wip-us.apache.org/repos/asf/hive/blob/92b42ae9/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
index 0574e5d..2e9f72f 100644
--- a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
+++ b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
@@ -98,42 +98,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where v<="world"
+PREHOOK: query: select * from newtypestbl where v<="world" sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where v<="world"
+POSTHOOK: query: select * from newtypestbl where v<="world" sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where v<="world"
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where v<="world" sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where v<="world"
+POSTHOOK: query: select * from newtypestbl where v<="world" sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where v="bee "
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -176,42 +176,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where v in ("bee", "world")
+PREHOOK: query: select * from newtypestbl where v in ("bee", "world") sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where v in ("bee", "world")
+POSTHOOK: query: select * from newtypestbl where v in ("bee", "world") sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where v in ("bee", "world")
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where v in ("bee", "world") sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where v in ("bee", "world")
+POSTHOOK: query: select * from newtypestbl where v in ("bee", "world") sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where v in ("orange")
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -254,42 +254,42 @@ apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where v between "bee" and "zombie"
+PREHOOK: query: select * from newtypestbl where v between "bee" and "zombie" sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where v between "bee" and "zombie"
+POSTHOOK: query: select * from newtypestbl where v between "bee" and "zombie" sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-PREHOOK: query: select * from newtypestbl where v between "bee" and "zombie"
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+PREHOOK: query: select * from newtypestbl where v between "bee" and "zombie" sort by c
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-POSTHOOK: query: select * from newtypestbl where v between "bee" and "zombie"
+POSTHOOK: query: select * from newtypestbl where v between "bee" and "zombie" sort by c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
-hello world 11.22 1970-02-27
apple bee 0.22 1970-02-20
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
+hello world 11.22 1970-02-27
PREHOOK: query: select * from newtypestbl where v between "orange" and "pine"
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
[23/50] [abbrv] hive git commit: HIVE-11875: JDBC Driver does not
honor delegation token mechanism when readings params from ZooKeeper (Vaibhav
Gumashta reviewed by Jason Dere)
Posted by xu...@apache.org.
HIVE-11875: JDBC Driver does not honor delegation token mechanism when readings params from ZooKeeper (Vaibhav Gumashta reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/514ab795
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/514ab795
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/514ab795
Branch: refs/heads/beeline-cli
Commit: 514ab795ffd03a72803f878eac57e3cf82b80045
Parents: 2a65989
Author: Vaibhav Gumashta <vg...@apache.org>
Authored: Mon Sep 21 17:00:24 2015 -0700
Committer: Vaibhav Gumashta <vg...@apache.org>
Committed: Mon Sep 21 17:00:24 2015 -0700
----------------------------------------------------------------------
.../hive/jdbc/ZooKeeperHiveClientHelper.java | 32 ++++++++++++++------
1 file changed, 22 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/514ab795/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
----------------------------------------------------------------------
diff --git a/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
index eeb3cf9..4712d2e 100644
--- a/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
+++ b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
@@ -137,20 +137,32 @@ class ZooKeeperHiveClientHelper {
&& !(connParams.getSessionVars().containsKey(JdbcConnectionParams.USE_SSL))) {
connParams.getSessionVars().put(JdbcConnectionParams.USE_SSL, matcher.group(2));
}
- // Set authentication configs
- // Note that in JDBC driver, we have 3 auth modes: NOSASL, Kerberos and password based
- // The use of "JdbcConnectionParams.AUTH_TYPE=JdbcConnectionParams.AUTH_SIMPLE" picks NOSASL
- // The presence of "JdbcConnectionParams.AUTH_PRINCIPAL=<principal>" picks Kerberos
- // Otherwise password based (which includes NONE, PAM, LDAP, CUSTOM)
- if ((matcher.group(1).equals("hive.server2.authentication"))
- && !(connParams.getSessionVars().containsKey(JdbcConnectionParams.AUTH_TYPE))) {
- if (matcher.group(2).equalsIgnoreCase("NOSASL")) {
+ /**
+ * Note: this is pretty messy, but sticking to the current implementation.
+ * Set authentication configs. Note that in JDBC driver, we have 3 auth modes: NOSASL,
+ * Kerberos (including delegation token mechanism) and password based.
+ * The use of JdbcConnectionParams.AUTH_TYPE==JdbcConnectionParams.AUTH_SIMPLE picks NOSASL.
+ * The presence of JdbcConnectionParams.AUTH_PRINCIPAL==<principal> picks Kerberos.
+ * If principal is absent, the presence of
+ * JdbcConnectionParams.AUTH_TYPE==JdbcConnectionParams.AUTH_TOKEN uses delegation token.
+ * Otherwise password based (which includes NONE, PAM, LDAP, CUSTOM)
+ */
+ if (matcher.group(1).equals("hive.server2.authentication")) {
+ // NOSASL
+ if (matcher.group(2).equalsIgnoreCase("NOSASL")
+ && !(connParams.getSessionVars().containsKey(JdbcConnectionParams.AUTH_TYPE) && connParams
+ .getSessionVars().get(JdbcConnectionParams.AUTH_TYPE)
+ .equalsIgnoreCase(JdbcConnectionParams.AUTH_SIMPLE))) {
connParams.getSessionVars().put(JdbcConnectionParams.AUTH_TYPE,
JdbcConnectionParams.AUTH_SIMPLE);
}
}
- // Set server's kerberos principal
- if ((matcher.group(1).equals("hive.server2.authentication.kerberos.principal"))
+ // KERBEROS
+ // If delegation token is passed from the client side, do not set the principal
+ if (matcher.group(2).equalsIgnoreCase("hive.server2.authentication.kerberos.principal")
+ && !(connParams.getSessionVars().containsKey(JdbcConnectionParams.AUTH_TYPE) && connParams
+ .getSessionVars().get(JdbcConnectionParams.AUTH_TYPE)
+ .equalsIgnoreCase(JdbcConnectionParams.AUTH_TOKEN))
&& !(connParams.getSessionVars().containsKey(JdbcConnectionParams.AUTH_PRINCIPAL))) {
connParams.getSessionVars().put(JdbcConnectionParams.AUTH_PRINCIPAL, matcher.group(2));
}
[48/50] [abbrv] hive git commit: HIVE-11932: JDBC Driver appends an
extra / when configuring connection by reading httpPath from ZooKeeper
(Vaibhav Gumashta, reviewed by Thejas Nair)
Posted by xu...@apache.org.
HIVE-11932: JDBC Driver appends an extra / when configuring connection by reading httpPath from ZooKeeper (Vaibhav Gumashta, reviewed by Thejas Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b92f44b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b92f44b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b92f44b
Branch: refs/heads/beeline-cli
Commit: 7b92f44b674c5455eb3629b75037531efca43126
Parents: 461e38e
Author: Gunther Hagleitner <gu...@apache.org>
Authored: Thu Sep 24 15:20:26 2015 -0700
Committer: Gunther Hagleitner <gu...@apache.org>
Committed: Thu Sep 24 15:24:42 2015 -0700
----------------------------------------------------------------------
jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7b92f44b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
----------------------------------------------------------------------
diff --git a/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
index 4712d2e..6c21423 100644
--- a/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
+++ b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
@@ -130,7 +130,7 @@ class ZooKeeperHiveClientHelper {
// Set http path
if ((matcher.group(1).equals("hive.server2.thrift.http.path"))
&& !(connParams.getSessionVars().containsKey(JdbcConnectionParams.HTTP_PATH))) {
- connParams.getSessionVars().put(JdbcConnectionParams.HTTP_PATH, "/" + matcher.group(2));
+ connParams.getSessionVars().put(JdbcConnectionParams.HTTP_PATH, matcher.group(2));
}
// Set SSL
if ((matcher.group(1) != null) && (matcher.group(1).equals("hive.server2.use.SSL"))
[34/50] [abbrv] hive git commit: HIVE-11217: CTAS statements throws
error,
when the table is stored as ORC File format and select clause has NULL/VOID
type column (Yongzhi Chen reviewed by Prasanth Jayachandran)
Posted by xu...@apache.org.
HIVE-11217: CTAS statements throws error, when the table is stored as ORC File format and select clause has NULL/VOID type column (Yongzhi Chen reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2e8324e4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2e8324e4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2e8324e4
Branch: refs/heads/beeline-cli
Commit: 2e8324e439de02c75e173e27147d208720f51964
Parents: 072c5a0
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Sep 23 00:48:03 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Sep 23 00:48:03 2015 -0500
----------------------------------------------------------------------
ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java | 2 +-
.../org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 8 +++++++-
ql/src/test/queries/clientnegative/ctasnullcol.q | 2 ++
ql/src/test/results/clientnegative/ctasnullcol.q.out | 5 +++++
4 files changed, 15 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2e8324e4/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 7abef0b..87c2830 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -432,7 +432,7 @@ public enum ErrorMsg {
UPDATE_CANNOT_UPDATE_BUCKET_VALUE(10302, "Updating values of bucketing columns is not supported. Column {0}.", true),
IMPORT_INTO_STRICT_REPL_TABLE(10303,"Non-repl import disallowed against table that is a destination of replication."),
CTAS_LOCATION_NONEMPTY(10304, "CREATE-TABLE-AS-SELECT cannot create table with location to a non-empty directory."),
-
+ CTAS_CREATES_VOID_TYPE(10305, "CREATE-TABLE-AS-SELECT creates a VOID type, please use CAST to specify the type, near field: "),
//========================== 20000 range starts here ========================//
SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."),
SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "
http://git-wip-us.apache.org/repos/asf/hive/blob/2e8324e4/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1076dfd..c5f39d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -6477,7 +6477,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
colName = fixCtasColumnName(colName);
col.setName(colName);
- col.setType(colInfo.getType().getTypeName());
+ String typeName = colInfo.getType().getTypeName();
+ // CTAS should NOT create a VOID type
+ if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) {
+ throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE
+ .getMsg(colName));
+ }
+ col.setType(typeName);
field_schemas.add(col);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2e8324e4/ql/src/test/queries/clientnegative/ctasnullcol.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/ctasnullcol.q b/ql/src/test/queries/clientnegative/ctasnullcol.q
new file mode 100644
index 0000000..b03c172
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/ctasnullcol.q
@@ -0,0 +1,2 @@
+drop table if exists orc_table_with_null;
+CREATE TABLE orc_table_with_null STORED AS ORC AS SELECT key, null FROM src;
http://git-wip-us.apache.org/repos/asf/hive/blob/2e8324e4/ql/src/test/results/clientnegative/ctasnullcol.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/ctasnullcol.q.out b/ql/src/test/results/clientnegative/ctasnullcol.q.out
new file mode 100644
index 0000000..6d36bb8
--- /dev/null
+++ b/ql/src/test/results/clientnegative/ctasnullcol.q.out
@@ -0,0 +1,5 @@
+PREHOOK: query: drop table if exists orc_table_with_null
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists orc_table_with_null
+POSTHOOK: type: DROPTABLE
+FAILED: SemanticException [Error 10305]: CREATE-TABLE-AS-SELECT creates a VOID type, please use CAST to specify the type, near field: c1
[07/50] [abbrv] hive git commit: HIVE-11834: Lineage doesn't work
with dynamic partitioning query (Jimmy, reviewed by Szehon)
Posted by xu...@apache.org.
HIVE-11834: Lineage doesn't work with dynamic partitioning query (Jimmy, reviewed by Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2278548e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2278548e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2278548e
Branch: refs/heads/beeline-cli
Commit: 2278548e41bf7b51e7b604fe9c91905b1ca198f1
Parents: 3cf7bd9
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Wed Sep 16 08:09:41 2015 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Fri Sep 18 07:19:23 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/hooks/LineageLogger.java | 93 +++++++++++---------
.../hive/ql/optimizer/lineage/LineageCtx.java | 8 +-
ql/src/test/queries/clientpositive/lineage3.q | 26 ++++++
.../test/results/clientpositive/lineage3.q.out | 68 +++++++++++++-
4 files changed, 153 insertions(+), 42 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2278548e/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java
index f615d81..9988c79 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java
@@ -215,8 +215,7 @@ public class LineageLogger implements ExecuteWithHookContext {
private List<Edge> getEdges(QueryPlan plan, Index index) {
LinkedHashMap<String, ObjectPair<SelectOperator,
org.apache.hadoop.hive.ql.metadata.Table>> finalSelOps = index.getFinalSelectOps();
- Set<Vertex> allTargets = new LinkedHashSet<Vertex>();
- Map<String, Vertex> allSources = new LinkedHashMap<String, Vertex>();
+ Map<String, Vertex> vertexCache = new LinkedHashMap<String, Vertex>();
List<Edge> edges = new ArrayList<Edge>();
for (ObjectPair<SelectOperator,
org.apache.hadoop.hive.ql.metadata.Table> pair: finalSelOps.values()) {
@@ -244,41 +243,46 @@ public class LineageLogger implements ExecuteWithHookContext {
}
}
}
- int fields = fieldSchemas.size();
Map<ColumnInfo, Dependency> colMap = index.getDependencies(finalSelOp);
List<Dependency> dependencies = colMap != null ? Lists.newArrayList(colMap.values()) : null;
+ int fields = fieldSchemas.size();
+ if (t != null && colMap != null && fields < colMap.size()) {
+ // Dynamic partition keys should be added to field schemas.
+ List<FieldSchema> partitionKeys = t.getPartitionKeys();
+ int dynamicKeyCount = colMap.size() - fields;
+ int keyOffset = partitionKeys.size() - dynamicKeyCount;
+ if (keyOffset >= 0) {
+ fields += dynamicKeyCount;
+ for (int i = 0; i < dynamicKeyCount; i++) {
+ FieldSchema field = partitionKeys.get(keyOffset + i);
+ fieldSchemas.add(field);
+ if (colNames != null) {
+ colNames.add(field.getName());
+ }
+ }
+ }
+ }
if (dependencies == null || dependencies.size() != fields) {
log("Result schema has " + fields
+ " fields, but we don't get as many dependencies");
} else {
// Go through each target column, generate the lineage edges.
+ Set<Vertex> targets = new LinkedHashSet<Vertex>();
for (int i = 0; i < fields; i++) {
- Vertex target = new Vertex(
- getTargetFieldName(i, destTableName, colNames, fieldSchemas));
- allTargets.add(target);
+ Vertex target = getOrCreateVertex(vertexCache,
+ getTargetFieldName(i, destTableName, colNames, fieldSchemas),
+ Vertex.Type.COLUMN);
+ targets.add(target);
Dependency dep = dependencies.get(i);
- String expr = dep.getExpr();
- Set<Vertex> sources = createSourceVertices(allSources, dep.getBaseCols());
- Edge edge = findSimilarEdgeBySources(edges, sources, expr, Edge.Type.PROJECTION);
- if (edge == null) {
- Set<Vertex> targets = new LinkedHashSet<Vertex>();
- targets.add(target);
- edges.add(new Edge(sources, targets, expr, Edge.Type.PROJECTION));
- } else {
- edge.targets.add(target);
- }
+ addEdge(vertexCache, edges, dep.getBaseCols(), target,
+ dep.getExpr(), Edge.Type.PROJECTION);
}
Set<Predicate> conds = index.getPredicates(finalSelOp);
if (conds != null && !conds.isEmpty()) {
for (Predicate cond: conds) {
- String expr = cond.getExpr();
- Set<Vertex> sources = createSourceVertices(allSources, cond.getBaseCols());
- Edge edge = findSimilarEdgeByTargets(edges, allTargets, expr, Edge.Type.PREDICATE);
- if (edge == null) {
- edges.add(new Edge(sources, allTargets, expr, Edge.Type.PREDICATE));
- } else {
- edge.sources.addAll(sources);
- }
+ addEdge(vertexCache, edges, cond.getBaseCols(),
+ new LinkedHashSet<Vertex>(targets), cond.getExpr(),
+ Edge.Type.PREDICATE);
}
}
}
@@ -286,12 +290,35 @@ public class LineageLogger implements ExecuteWithHookContext {
return edges;
}
+ private void addEdge(Map<String, Vertex> vertexCache, List<Edge> edges,
+ Set<BaseColumnInfo> srcCols, Vertex target, String expr, Edge.Type type) {
+ Set<Vertex> targets = new LinkedHashSet<Vertex>();
+ targets.add(target);
+ addEdge(vertexCache, edges, srcCols, targets, expr, type);
+ }
+
+ /**
+ * Find an edge from all edges that has the same source vertices.
+ * If found, add the more targets to this edge's target vertex list.
+ * Otherwise, create a new edge and add to edge list.
+ */
+ private void addEdge(Map<String, Vertex> vertexCache, List<Edge> edges,
+ Set<BaseColumnInfo> srcCols, Set<Vertex> targets, String expr, Edge.Type type) {
+ Set<Vertex> sources = createSourceVertices(vertexCache, srcCols);
+ Edge edge = findSimilarEdgeBySources(edges, sources, expr, type);
+ if (edge == null) {
+ edges.add(new Edge(sources, targets, expr, type));
+ } else {
+ edge.targets.addAll(targets);
+ }
+ }
+
/**
* Convert a list of columns to a set of vertices.
* Use cached vertices if possible.
*/
private Set<Vertex> createSourceVertices(
- Map<String, Vertex> srcVertexCache, Collection<BaseColumnInfo> baseCols) {
+ Map<String, Vertex> vertexCache, Collection<BaseColumnInfo> baseCols) {
Set<Vertex> sources = new LinkedHashSet<Vertex>();
if (baseCols != null && !baseCols.isEmpty()) {
for(BaseColumnInfo col: baseCols) {
@@ -308,7 +335,7 @@ public class LineageLogger implements ExecuteWithHookContext {
type = Vertex.Type.COLUMN;
label = tableName + "." + fieldSchema.getName();
}
- sources.add(getOrCreateVertex(srcVertexCache, label, type));
+ sources.add(getOrCreateVertex(vertexCache, label, type));
}
}
return sources;
@@ -342,20 +369,6 @@ public class LineageLogger implements ExecuteWithHookContext {
}
/**
- * Find an edge that has the same type, expression, and targets.
- */
- private Edge findSimilarEdgeByTargets(
- List<Edge> edges, Set<Vertex> targets, String expr, Edge.Type type) {
- for (Edge edge: edges) {
- if (edge.type == type && StringUtils.equals(edge.expr, expr)
- && SetUtils.isEqualSet(edge.targets, targets)) {
- return edge;
- }
- }
- return null;
- }
-
- /**
* Generate normalized name for a given target column.
*/
private String getTargetFieldName(int fieldIndex,
http://git-wip-us.apache.org/repos/asf/hive/blob/2278548e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/LineageCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/LineageCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/LineageCtx.java
index c33d775..2d8b9e3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/LineageCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/LineageCtx.java
@@ -18,13 +18,13 @@
package org.apache.hadoop.hive.ql.optimizer.lineage;
-import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
+import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -171,6 +171,12 @@ public class LineageCtx implements NodeProcessorCtx {
conds = new LinkedHashSet<Predicate>();
condMap.put(op, conds);
}
+ for (Predicate p: conds) {
+ if (StringUtils.equals(cond.getExpr(), p.getExpr())) {
+ p.getBaseCols().addAll(cond.getBaseCols());
+ return;
+ }
+ }
conds.add(cond);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2278548e/ql/src/test/queries/clientpositive/lineage3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/lineage3.q b/ql/src/test/queries/clientpositive/lineage3.q
index c24ff7d..70d4e57 100644
--- a/ql/src/test/queries/clientpositive/lineage3.q
+++ b/ql/src/test/queries/clientpositive/lineage3.q
@@ -176,3 +176,29 @@ alter view dest_v3 as
select * from dest_v3 limit 2;
+drop table if exists src_dp;
+create table src_dp (first string, word string, year int, month int, day int);
+drop table if exists dest_dp1;
+create table dest_dp1 (first string, word string) partitioned by (year int);
+drop table if exists dest_dp2;
+create table dest_dp2 (first string, word string) partitioned by (y int, m int);
+drop table if exists dest_dp3;
+create table dest_dp3 (first string, word string) partitioned by (y int, m int, d int);
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+insert into dest_dp1 partition (year) select first, word, year from src_dp;
+insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp;
+insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0;
+insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0;
+
+drop table if exists src_dp1;
+create table src_dp1 (f string, w string, m int);
+
+from src_dp, src_dp1
+insert into dest_dp1 partition (year) select first, word, year
+insert into dest_dp2 partition (y, m) select first, word, year, month
+insert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2
+insert into dest_dp2 partition (y=1, m) select f, w, m
+insert into dest_dp1 partition (year=0) select f, w;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/2278548e/ql/src/test/results/clientpositive/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/lineage3.q.out b/ql/src/test/results/clientpositive/lineage3.q.out
index 708abee..ad965c8 100644
--- a/ql/src/test/results/clientpositive/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/lineage3.q.out
@@ -25,7 +25,7 @@ PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
PREHOOK: Output: default@d1
PREHOOK: Output: default@d2
-{"version":"1.0","engine":"mr","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(t.x > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.
cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]}
+{"version":"1.0","engine":"mr","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint
"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]}
PREHOOK: query: drop table if exists t
PREHOOK: type: DROPTABLE
PREHOOK: query: create table t as
@@ -320,3 +320,69 @@ PREHOOK: Input: default@dest_v3
{"version":"1.0","engine":"mr","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) csmallint)) (tok_orderby (tok_tabsortcolnameasc (. (tok_table_or_col $hdt$_0) csmallint)))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2],"expression":"(a.cboolean2 = true)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2],"expression":"(a.cfloat > 0.0)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.c
int) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]}
38 216 false
38 229 true
+PREHOOK: query: drop table if exists src_dp
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table src_dp (first string, word string, year int, month int, day int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_dp
+PREHOOK: query: drop table if exists dest_dp1
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table dest_dp1 (first string, word string) partitioned by (year int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_dp1
+PREHOOK: query: drop table if exists dest_dp2
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table dest_dp2 (first string, word string) partitioned by (y int, m int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_dp2
+PREHOOK: query: drop table if exists dest_dp3
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table dest_dp3 (first string, word string) partitioned by (y int, m int, d int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_dp3
+PREHOOK: query: insert into dest_dp1 partition (year) select first, word, year from src_dp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_dp
+PREHOOK: Output: default@dest_dp1
+{"version":"1.0","engine":"mr","hash":"b2d38401a3281e74a003d9650df97060","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]}
+PREHOOK: query: insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_dp
+PREHOOK: Output: default@dest_dp2
+{"version":"1.0","engine":"mr","hash":"237302d8ffd62b5b71d9544b22de7770","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]}
+PREHOOK: query: insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_dp
+PREHOOK: Output: default@dest_dp2@y=0
+{"version":"1.0","engine":"mr","hash":"63e990b47e7ab4eb6f2ea09dfb7453ff","queryText":"insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[6],"targets":[0,1,2],"expression":"(src_dp.year = 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]}
+PREHOOK: query: insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_dp
+PREHOOK: Output: default@dest_dp3@y=0
+{"version":"1.0","engine":"mr","hash":"6bf71a9d02c0612c63b6f40b15c1e8b3","queryText":"insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2,3],"expression":"(src_dp.year = 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":7,"vertexType":"CO
LUMN","vertexId":"default.src_dp.day"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]}
+PREHOOK: query: drop table if exists src_dp1
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table src_dp1 (f string, w string, m int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_dp1
+Warning: Shuffle Join JOIN[4][tables = [src_dp, src_dp1]] in Stage 'Stage-5:MAPRED' is a cross product
+PREHOOK: query: from src_dp, src_dp1
+insert into dest_dp1 partition (year) select first, word, year
+insert into dest_dp2 partition (y, m) select first, word, year, month
+insert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2
+insert into dest_dp2 partition (y=1, m) select f, w, m
+insert into dest_dp1 partition (year=0) select f, w
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_dp
+PREHOOK: Input: default@src_dp1
+PREHOOK: Output: default@dest_dp1
+PREHOOK: Output: default@dest_dp1@year=0
+PREHOOK: Output: default@dest_dp2
+PREHOOK: Output: default@dest_dp2@y=1
+PREHOOK: Output: default@dest_dp3@y=2
+{"version":"1.0","engine":"mr","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(src_dp.year = 2)","edgeType":"PREDI
CATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month
"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]}
[04/50] [abbrv] hive git commit: HIVE-11838: Another positive test
case for HIVE-11658 (Prasanth Jayachandran reviewed by Jason Dere)
Posted by xu...@apache.org.
HIVE-11838: Another positive test case for HIVE-11658 (Prasanth Jayachandran reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a12e5f5b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a12e5f5b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a12e5f5b
Branch: refs/heads/beeline-cli
Commit: a12e5f5bbc554b2b49d6aa726721aaba9c299409
Parents: 7201c26
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Thu Sep 17 13:36:25 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Thu Sep 17 13:36:25 2015 -0500
----------------------------------------------------------------------
.../test/queries/clientpositive/load_orc_part.q | 5 ++++
.../results/clientpositive/load_orc_part.q.out | 26 ++++++++++++++++++++
2 files changed, 31 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a12e5f5b/ql/src/test/queries/clientpositive/load_orc_part.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/load_orc_part.q b/ql/src/test/queries/clientpositive/load_orc_part.q
index 2ff884d..2902c72 100644
--- a/ql/src/test/queries/clientpositive/load_orc_part.q
+++ b/ql/src/test/queries/clientpositive/load_orc_part.q
@@ -17,3 +17,8 @@ alter table orc_test add partition(ds='11');
alter table orc_test partition(ds='11') set fileformat textfile;
load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11');
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=11/;
+
+alter table orc_test add partition(ds='12');
+alter table orc_test partition(ds='12') set fileformat textfile;
+load data local inpath '../../data/files/types/primitives' into table orc_test partition(ds='12');
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=12/;
http://git-wip-us.apache.org/repos/asf/hive/blob/a12e5f5b/ql/src/test/results/clientpositive/load_orc_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/load_orc_part.q.out b/ql/src/test/results/clientpositive/load_orc_part.q.out
index 2e02c2e..16346cd 100644
--- a/ql/src/test/results/clientpositive/load_orc_part.q.out
+++ b/ql/src/test/results/clientpositive/load_orc_part.q.out
@@ -86,3 +86,29 @@ POSTHOOK: type: LOAD
POSTHOOK: Output: default@orc_test@ds=11
Found 1 items
#### A masked pattern was here ####
+PREHOOK: query: alter table orc_test add partition(ds='12')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@orc_test
+POSTHOOK: query: alter table orc_test add partition(ds='12')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@orc_test
+POSTHOOK: Output: default@orc_test@ds=12
+PREHOOK: query: alter table orc_test partition(ds='12') set fileformat textfile
+PREHOOK: type: ALTERPARTITION_FILEFORMAT
+PREHOOK: Input: default@orc_test
+PREHOOK: Output: default@orc_test@ds=12
+POSTHOOK: query: alter table orc_test partition(ds='12') set fileformat textfile
+POSTHOOK: type: ALTERPARTITION_FILEFORMAT
+POSTHOOK: Input: default@orc_test
+POSTHOOK: Input: default@orc_test@ds=12
+POSTHOOK: Output: default@orc_test@ds=12
+PREHOOK: query: load data local inpath '../../data/files/types/primitives' into table orc_test partition(ds='12')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@orc_test@ds=12
+POSTHOOK: query: load data local inpath '../../data/files/types/primitives' into table orc_test partition(ds='12')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@orc_test@ds=12
+Found 4 items
+#### A masked pattern was here ####
[03/50] [abbrv] hive git commit: HIVE-11789: Better support for
functions recognition in CBO (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Posted by xu...@apache.org.
HIVE-11789: Better support for functions recognition in CBO (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7201c264
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7201c264
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7201c264
Branch: refs/heads/beeline-cli
Commit: 7201c264a1fe8347fd87fc8c1bb835083e9aac75
Parents: 79244ab
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Sep 17 17:48:01 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Sep 17 17:48:01 2015 +0100
----------------------------------------------------------------------
.../calcite/reloperators/HiveBetween.java | 75 ++++++++++++++++++++
.../optimizer/calcite/reloperators/HiveIn.java | 41 +++++++++++
.../calcite/rules/HivePreFilteringRule.java | 37 +++-------
.../translator/SqlFunctionConverter.java | 16 ++++-
4 files changed, 142 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java
new file mode 100644
index 0000000..2388939
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.sql.SqlCallBinding;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlSpecialOperator;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlTypeName;
+
+public class HiveBetween extends SqlSpecialOperator {
+
+ public static final SqlSpecialOperator INSTANCE =
+ new HiveBetween();
+
+ private HiveBetween() {
+ super(
+ "BETWEEN",
+ SqlKind.BETWEEN,
+ 30,
+ true,
+ ReturnTypes.BOOLEAN_NULLABLE,
+ FIRST_BOOLEAN_THEN_FIRST_KNOWN,
+ null);
+ }
+
+ /**
+ * Operand type-inference strategy where an unknown operand type is derived
+ * from the first operand with a known type, but the first operand is a boolean.
+ */
+ public static final SqlOperandTypeInference FIRST_BOOLEAN_THEN_FIRST_KNOWN =
+ new SqlOperandTypeInference() {
+ public void inferOperandTypes(
+ SqlCallBinding callBinding,
+ RelDataType returnType,
+ RelDataType[] operandTypes) {
+ final RelDataType unknownType =
+ callBinding.getValidator().getUnknownType();
+ RelDataType knownType = unknownType;
+ for (int i = 1; i < callBinding.getCall().getOperandList().size(); i++) {
+ SqlNode operand = callBinding.getCall().getOperandList().get(i);
+ knownType = callBinding.getValidator().deriveType(
+ callBinding.getScope(), operand);
+ if (!knownType.equals(unknownType)) {
+ break;
+ }
+ }
+
+ RelDataTypeFactory typeFactory = callBinding.getTypeFactory();
+ operandTypes[0] = typeFactory.createSqlType(SqlTypeName.BOOLEAN);
+ for (int i = 1; i < operandTypes.length; ++i) {
+ operandTypes[i] = knownType;
+ }
+ }
+ };
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java
new file mode 100644
index 0000000..6d87003
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlSpecialOperator;
+import org.apache.calcite.sql.type.InferTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
+
+public class HiveIn extends SqlSpecialOperator {
+
+ public static final SqlSpecialOperator INSTANCE =
+ new HiveIn();
+
+ private HiveIn() {
+ super(
+ "IN",
+ SqlKind.IN,
+ 30,
+ true,
+ ReturnTypes.BOOLEAN_NULLABLE,
+ InferTypes.FIRST_KNOWN,
+ null);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
index dde6288..3e2311c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.EnumSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
@@ -41,22 +42,11 @@ import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlKind;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
-import com.google.common.collect.Sets;
public class HivePreFilteringRule extends RelOptRule {
@@ -71,18 +61,13 @@ public class HivePreFilteringRule extends RelOptRule {
private final FilterFactory filterFactory;
- private static final Set<String> COMPARISON_UDFS = Sets.newHashSet(
- GenericUDFOPEqual.class.getAnnotation(Description.class).name(),
- GenericUDFOPEqualNS.class.getAnnotation(Description.class).name(),
- GenericUDFOPEqualOrGreaterThan.class.getAnnotation(Description.class).name(),
- GenericUDFOPEqualOrLessThan.class.getAnnotation(Description.class).name(),
- GenericUDFOPGreaterThan.class.getAnnotation(Description.class).name(),
- GenericUDFOPLessThan.class.getAnnotation(Description.class).name(),
- GenericUDFOPNotEqual.class.getAnnotation(Description.class).name());
- private static final String IN_UDF =
- GenericUDFIn.class.getAnnotation(Description.class).name();
- private static final String BETWEEN_UDF =
- GenericUDFBetween.class.getAnnotation(Description.class).name();
+ private static final Set<SqlKind> COMPARISON = EnumSet.of(
+ SqlKind.EQUALS,
+ SqlKind.GREATER_THAN_OR_EQUAL,
+ SqlKind.LESS_THAN_OR_EQUAL,
+ SqlKind.GREATER_THAN,
+ SqlKind.LESS_THAN,
+ SqlKind.NOT_EQUALS);
private HivePreFilteringRule() {
@@ -176,7 +161,7 @@ public class HivePreFilteringRule extends RelOptRule {
continue;
}
RexCall conjCall = (RexCall) conjunction;
- if(COMPARISON_UDFS.contains(conjCall.getOperator().getName())) {
+ if(COMPARISON.contains(conjCall.getOperator().getKind())) {
if (conjCall.operands.get(0) instanceof RexInputRef &&
conjCall.operands.get(1) instanceof RexLiteral) {
reductionCondition.put(conjCall.operands.get(0).toString(),
@@ -188,11 +173,11 @@ public class HivePreFilteringRule extends RelOptRule {
conjCall);
addedToReductionCondition = true;
}
- } else if(conjCall.getOperator().getName().equals(IN_UDF)) {
+ } else if(conjCall.getOperator().getKind().equals(SqlKind.IN)) {
reductionCondition.put(conjCall.operands.get(0).toString(),
conjCall);
addedToReductionCondition = true;
- } else if(conjCall.getOperator().getName().equals(BETWEEN_UDF)) {
+ } else if(conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
reductionCondition.put(conjCall.operands.get(1).toString(),
conjCall);
addedToReductionCondition = true;
http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index 219289c..fd78824 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -45,6 +45,8 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
@@ -193,7 +195,16 @@ public class SqlFunctionConverter {
HiveToken hToken = calciteToHiveToken.get(op);
ASTNode node;
if (hToken != null) {
- node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text);
+ switch (op.kind) {
+ case IN:
+ case BETWEEN:
+ case ROW:
+ node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
+ node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text));
+ break;
+ default:
+ node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text);
+ }
} else {
node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
if (op.kind != SqlKind.CAST) {
@@ -296,6 +307,9 @@ public class SqlFunctionConverter {
hToken(HiveParser.GREATERTHANOREQUALTO, ">="));
registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not"));
registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>"));
+ registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in"));
+ registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between"));
+ registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct"));
}
private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) {
[20/50] [abbrv] hive git commit: HIVE-11826:
'hadoop.proxyuser.hive.groups' configuration doesn't prevent unauthorized
user to access metastore
Posted by xu...@apache.org.
HIVE-11826: 'hadoop.proxyuser.hive.groups' configuration doesn't prevent unauthorized user to access metastore
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/262bae6b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/262bae6b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/262bae6b
Branch: refs/heads/beeline-cli
Commit: 262bae6b1e93abba6947e31785e6174b072904dc
Parents: 92b42ae
Author: Aihua Xu <ai...@gmail.com>
Authored: Mon Sep 21 09:44:57 2015 -0700
Committer: Chao Sun <su...@apache.org>
Committed: Mon Sep 21 09:44:57 2015 -0700
----------------------------------------------------------------------
.../hive/thrift/TestHadoop20SAuthBridge.java | 420 ------------------
.../hive/thrift/TestHadoopAuthBridge23.java | 423 +++++++++++++++++++
.../hive/thrift/HadoopThriftAuthBridge.java | 3 +
3 files changed, 426 insertions(+), 420 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/262bae6b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java
deleted file mode 100644
index f6029b1..0000000
--- a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java
+++ /dev/null
@@ -1,420 +0,0 @@
-package org.apache.hadoop.hive.thrift;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.NetworkInterface;
-import java.net.ServerSocket;
-import java.security.PrivilegedExceptionAction;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Enumeration;
-import java.util.List;
-import java.util.Map;
-
-import junit.framework.TestCase;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStore;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.security.SaslRpcServer;
-import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
-import org.apache.hadoop.security.authorize.AuthorizationException;
-import org.apache.hadoop.security.authorize.DefaultImpersonationProvider;
-import org.apache.hadoop.security.authorize.ProxyUsers;
-import org.apache.hadoop.security.token.SecretManager.InvalidToken;
-import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation;
-import org.apache.hadoop.security.token.delegation.DelegationKey;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.thrift.transport.TSaslServerTransport;
-import org.apache.thrift.transport.TTransportException;
-import org.apache.thrift.transport.TTransportFactory;
-
-public class TestHadoop20SAuthBridge extends TestCase {
-
- /**
- * set to true when metastore token manager has intitialized token manager
- * through call to HadoopThriftAuthBridge20S.Server.startDelegationTokenSecretManager
- */
- static volatile boolean isMetastoreTokenManagerInited;
-
- private static class MyHadoopThriftAuthBridge20S extends HadoopThriftAuthBridge {
- @Override
- public Server createServer(String keytabFile, String principalConf)
- throws TTransportException {
- //Create a Server that doesn't interpret any Kerberos stuff
- return new Server();
- }
-
- static class Server extends HadoopThriftAuthBridge.Server {
- public Server() throws TTransportException {
- super();
- }
- @Override
- public TTransportFactory createTransportFactory(Map<String, String> saslProps)
- throws TTransportException {
- TSaslServerTransport.Factory transFactory =
- new TSaslServerTransport.Factory();
- transFactory.addServerDefinition(AuthMethod.DIGEST.getMechanismName(),
- null, SaslRpcServer.SASL_DEFAULT_REALM,
- saslProps,
- new SaslDigestCallbackHandler(secretManager));
-
- return new TUGIAssumingTransportFactory(transFactory, realUgi);
- }
- static DelegationTokenStore TOKEN_STORE = new MemoryTokenStore();
-
- @Override
- protected DelegationTokenStore getTokenStore(Configuration conf) throws IOException {
- return TOKEN_STORE;
- }
-
- @Override
- public void startDelegationTokenSecretManager(Configuration conf, Object hms, ServerMode sm)
- throws IOException{
- super.startDelegationTokenSecretManager(conf, hms, sm);
- isMetastoreTokenManagerInited = true;
- }
-
- }
- }
-
-
- private HiveConf conf;
-
- private void configureSuperUserIPAddresses(Configuration conf,
- String superUserShortName) throws IOException {
- List<String> ipList = new ArrayList<String>();
- Enumeration<NetworkInterface> netInterfaceList = NetworkInterface
- .getNetworkInterfaces();
- while (netInterfaceList.hasMoreElements()) {
- NetworkInterface inf = netInterfaceList.nextElement();
- Enumeration<InetAddress> addrList = inf.getInetAddresses();
- while (addrList.hasMoreElements()) {
- InetAddress addr = addrList.nextElement();
- ipList.add(addr.getHostAddress());
- }
- }
- StringBuilder builder = new StringBuilder();
- for (String ip : ipList) {
- builder.append(ip);
- builder.append(',');
- }
- builder.append("127.0.1.1,");
- builder.append(InetAddress.getLocalHost().getCanonicalHostName());
- conf.setStrings(DefaultImpersonationProvider.getTestProvider().getProxySuperuserIpConfKey(superUserShortName),
- builder.toString());
- }
-
- public void setup() throws Exception {
- isMetastoreTokenManagerInited = false;
- int port = findFreePort();
- System.setProperty(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL.varname,
- "true");
- System.setProperty(HiveConf.ConfVars.METASTOREURIS.varname,
- "thrift://localhost:" + port);
- System.setProperty(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(
- System.getProperty("test.build.data", "/tmp")).toString());
- conf = new HiveConf(TestHadoop20SAuthBridge.class);
- MetaStoreUtils.startMetaStore(port, new MyHadoopThriftAuthBridge20S());
- }
-
- /**
- * Test delegation token store/load from shared store.
- * @throws Exception
- */
- public void testDelegationTokenSharedStore() throws Exception {
- UserGroupInformation clientUgi = UserGroupInformation.getCurrentUser();
-
- TokenStoreDelegationTokenSecretManager tokenManager =
- new TokenStoreDelegationTokenSecretManager(0, 60*60*1000, 60*60*1000, 0,
- MyHadoopThriftAuthBridge20S.Server.TOKEN_STORE);
- // initializes current key
- tokenManager.startThreads();
- tokenManager.stopThreads();
-
- String tokenStrForm = tokenManager.getDelegationToken(clientUgi.getShortUserName());
- Token<DelegationTokenIdentifier> t= new Token<DelegationTokenIdentifier>();
- t.decodeFromUrlString(tokenStrForm);
-
- //check whether the username in the token is what we expect
- DelegationTokenIdentifier d = new DelegationTokenIdentifier();
- d.readFields(new DataInputStream(new ByteArrayInputStream(
- t.getIdentifier())));
- assertTrue("Usernames don't match",
- clientUgi.getShortUserName().equals(d.getUser().getShortUserName()));
-
- DelegationTokenInformation tokenInfo = MyHadoopThriftAuthBridge20S.Server.TOKEN_STORE
- .getToken(d);
- assertNotNull("token not in store", tokenInfo);
- assertFalse("duplicate token add",
- MyHadoopThriftAuthBridge20S.Server.TOKEN_STORE.addToken(d, tokenInfo));
-
- // check keys are copied from token store when token is loaded
- TokenStoreDelegationTokenSecretManager anotherManager =
- new TokenStoreDelegationTokenSecretManager(0, 0, 0, 0,
- MyHadoopThriftAuthBridge20S.Server.TOKEN_STORE);
- assertEquals("master keys empty on init", 0,
- anotherManager.getAllKeys().length);
- assertNotNull("token loaded",
- anotherManager.retrievePassword(d));
- anotherManager.renewToken(t, clientUgi.getShortUserName());
- assertEquals("master keys not loaded from store",
- MyHadoopThriftAuthBridge20S.Server.TOKEN_STORE.getMasterKeys().length,
- anotherManager.getAllKeys().length);
-
- // cancel the delegation token
- tokenManager.cancelDelegationToken(tokenStrForm);
- assertNull("token not removed from store after cancel",
- MyHadoopThriftAuthBridge20S.Server.TOKEN_STORE.getToken(d));
- assertFalse("token removed (again)",
- MyHadoopThriftAuthBridge20S.Server.TOKEN_STORE.removeToken(d));
- try {
- anotherManager.retrievePassword(d);
- fail("InvalidToken expected after cancel");
- } catch (InvalidToken ex) {
- // expected
- }
-
- // token expiration
- MyHadoopThriftAuthBridge20S.Server.TOKEN_STORE.addToken(d,
- new DelegationTokenInformation(0, t.getPassword()));
- assertNotNull(MyHadoopThriftAuthBridge20S.Server.TOKEN_STORE.getToken(d));
- anotherManager.removeExpiredTokens();
- assertNull("Expired token not removed",
- MyHadoopThriftAuthBridge20S.Server.TOKEN_STORE.getToken(d));
-
- // key expiration - create an already expired key
- anotherManager.startThreads(); // generates initial key
- anotherManager.stopThreads();
- DelegationKey expiredKey = new DelegationKey(-1, 0, anotherManager.getAllKeys()[0].getKey());
- anotherManager.logUpdateMasterKey(expiredKey); // updates key with sequence number
- assertTrue("expired key not in allKeys",
- anotherManager.reloadKeys().containsKey(expiredKey.getKeyId()));
- anotherManager.rollMasterKeyExt();
- assertFalse("Expired key not removed",
- anotherManager.reloadKeys().containsKey(expiredKey.getKeyId()));
- }
-
- public void testSaslWithHiveMetaStore() throws Exception {
- setup();
- UserGroupInformation clientUgi = UserGroupInformation.getCurrentUser();
- obtainTokenAndAddIntoUGI(clientUgi, null);
- obtainTokenAndAddIntoUGI(clientUgi, "tokenForFooTablePartition");
- }
-
- public void testMetastoreProxyUser() throws Exception {
- setup();
-
- final String proxyUserName = "proxyUser";
- //set the configuration up such that proxyUser can act on
- //behalf of all users belonging to the group foo_bar_group (
- //a dummy group)
- String[] groupNames =
- new String[] { "foo_bar_group" };
- setGroupsInConf(groupNames, proxyUserName);
-
- final UserGroupInformation delegationTokenUser =
- UserGroupInformation.getCurrentUser();
-
- final UserGroupInformation proxyUserUgi =
- UserGroupInformation.createRemoteUser(proxyUserName);
- String tokenStrForm = proxyUserUgi.doAs(new PrivilegedExceptionAction<String>() {
- public String run() throws Exception {
- try {
- //Since the user running the test won't belong to a non-existent group
- //foo_bar_group, the call to getDelegationTokenStr will fail
- return getDelegationTokenStr(delegationTokenUser, proxyUserUgi);
- } catch (AuthorizationException ae) {
- return null;
- }
- }
- });
- assertTrue("Expected the getDelegationToken call to fail",
- tokenStrForm == null);
-
- //set the configuration up such that proxyUser can act on
- //behalf of all users belonging to the real group(s) that the
- //user running the test belongs to
- setGroupsInConf(UserGroupInformation.getCurrentUser().getGroupNames(),
- proxyUserName);
- tokenStrForm = proxyUserUgi.doAs(new PrivilegedExceptionAction<String>() {
- public String run() throws Exception {
- try {
- //Since the user running the test belongs to the group
- //obtained above the call to getDelegationTokenStr will succeed
- return getDelegationTokenStr(delegationTokenUser, proxyUserUgi);
- } catch (AuthorizationException ae) {
- return null;
- }
- }
- });
- assertTrue("Expected the getDelegationToken call to not fail",
- tokenStrForm != null);
- Token<DelegationTokenIdentifier> t= new Token<DelegationTokenIdentifier>();
- t.decodeFromUrlString(tokenStrForm);
- //check whether the username in the token is what we expect
- DelegationTokenIdentifier d = new DelegationTokenIdentifier();
- d.readFields(new DataInputStream(new ByteArrayInputStream(
- t.getIdentifier())));
- assertTrue("Usernames don't match",
- delegationTokenUser.getShortUserName().equals(d.getUser().getShortUserName()));
-
- }
-
- private void setGroupsInConf(String[] groupNames, String proxyUserName)
- throws IOException {
- conf.set(
- DefaultImpersonationProvider.getTestProvider().getProxySuperuserGroupConfKey(proxyUserName),
- StringUtils.join(",", Arrays.asList(groupNames)));
- configureSuperUserIPAddresses(conf, proxyUserName);
- ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
- }
-
- private String getDelegationTokenStr(UserGroupInformation ownerUgi,
- UserGroupInformation realUgi) throws Exception {
- //obtain a token by directly invoking the metastore operation(without going
- //through the thrift interface). Obtaining a token makes the secret manager
- //aware of the user and that it gave the token to the user
- //also set the authentication method explicitly to KERBEROS. Since the
- //metastore checks whether the authentication method is KERBEROS or not
- //for getDelegationToken, and the testcases don't use
- //kerberos, this needs to be done
-
- waitForMetastoreTokenInit();
-
- HadoopThriftAuthBridge.Server.authenticationMethod
- .set(AuthenticationMethod.KERBEROS);
- HadoopThriftAuthBridge.Server.remoteAddress.set(InetAddress.getLocalHost());
- return
- HiveMetaStore.getDelegationToken(ownerUgi.getShortUserName(),
- realUgi.getShortUserName());
- }
-
- /**
- * Wait for metastore to have initialized token manager
- * This does not have to be done in other metastore test cases as they
- * use metastore client which will retry few times on failure
- * @throws InterruptedException
- */
- private void waitForMetastoreTokenInit() throws InterruptedException {
- int waitAttempts = 30;
- while(waitAttempts > 0 && !isMetastoreTokenManagerInited){
- Thread.sleep(1000);
- waitAttempts--;
- }
- }
-
- private void obtainTokenAndAddIntoUGI(UserGroupInformation clientUgi,
- String tokenSig) throws Exception {
- String tokenStrForm = getDelegationTokenStr(clientUgi, clientUgi);
- Token<DelegationTokenIdentifier> t= new Token<DelegationTokenIdentifier>();
- t.decodeFromUrlString(tokenStrForm);
-
- //check whether the username in the token is what we expect
- DelegationTokenIdentifier d = new DelegationTokenIdentifier();
- d.readFields(new DataInputStream(new ByteArrayInputStream(
- t.getIdentifier())));
- assertTrue("Usernames don't match",
- clientUgi.getShortUserName().equals(d.getUser().getShortUserName()));
-
- if (tokenSig != null) {
- conf.set("hive.metastore.token.signature", tokenSig);
- t.setService(new Text(tokenSig));
- }
- //add the token to the clientUgi for securely talking to the metastore
- clientUgi.addToken(t);
- //Create the metastore client as the clientUgi. Doing so this
- //way will give the client access to the token that was added earlier
- //in the clientUgi
- HiveMetaStoreClient hiveClient =
- clientUgi.doAs(new PrivilegedExceptionAction<HiveMetaStoreClient>() {
- public HiveMetaStoreClient run() throws Exception {
- HiveMetaStoreClient hiveClient =
- new HiveMetaStoreClient(conf);
- return hiveClient;
- }
- });
-
- assertTrue("Couldn't connect to metastore", hiveClient != null);
-
- //try out some metastore operations
- createDBAndVerifyExistence(hiveClient);
-
- //check that getDelegationToken fails since we are not authenticating
- //over kerberos
- boolean pass = false;
- try {
- hiveClient.getDelegationToken(clientUgi.getUserName());
- } catch (MetaException ex) {
- pass = true;
- }
- assertTrue("Expected the getDelegationToken call to fail", pass == true);
- hiveClient.close();
-
- //Now cancel the delegation token
- HiveMetaStore.cancelDelegationToken(tokenStrForm);
-
- //now metastore connection should fail
- hiveClient =
- clientUgi.doAs(new PrivilegedExceptionAction<HiveMetaStoreClient>() {
- public HiveMetaStoreClient run() {
- try {
- HiveMetaStoreClient hiveClient =
- new HiveMetaStoreClient(conf);
- return hiveClient;
- } catch (MetaException e) {
- return null;
- }
- }
- });
- assertTrue("Expected metastore operations to fail", hiveClient == null);
- }
-
- private void createDBAndVerifyExistence(HiveMetaStoreClient client)
- throws Exception {
- String dbName = "simpdb";
- Database db = new Database();
- db.setName(dbName);
- client.createDatabase(db);
- Database db1 = client.getDatabase(dbName);
- client.dropDatabase(dbName);
- assertTrue("Databases do not match", db1.getName().equals(db.getName()));
- }
-
- private int findFreePort() throws IOException {
- ServerSocket socket= new ServerSocket(0);
- int port = socket.getLocalPort();
- socket.close();
- return port;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/262bae6b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoopAuthBridge23.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoopAuthBridge23.java b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoopAuthBridge23.java
new file mode 100644
index 0000000..40b161a
--- /dev/null
+++ b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoopAuthBridge23.java
@@ -0,0 +1,423 @@
+package org.apache.hadoop.hive.thrift;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.NetworkInterface;
+import java.net.ServerSocket;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Enumeration;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStore;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.security.SaslRpcServer;
+import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
+import org.apache.hadoop.security.authorize.AuthorizationException;
+import org.apache.hadoop.security.authorize.DefaultImpersonationProvider;
+import org.apache.hadoop.security.authorize.ProxyUsers;
+import org.apache.hadoop.security.token.SecretManager.InvalidToken;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.thrift.transport.TSaslServerTransport;
+import org.apache.thrift.transport.TTransportException;
+import org.apache.thrift.transport.TTransportFactory;
+
+public class TestHadoopAuthBridge23 extends TestCase {
+
+ /**
+ * set to true when metastore token manager has intitialized token manager
+ * through call to HadoopThriftAuthBridge23.Server.startDelegationTokenSecretManager
+ */
+ static volatile boolean isMetastoreTokenManagerInited;
+
+ private static class MyHadoopThriftAuthBridge23 extends HadoopThriftAuthBridge23 {
+ @Override
+ public Server createServer(String keytabFile, String principalConf)
+ throws TTransportException {
+ //Create a Server that doesn't interpret any Kerberos stuff
+ return new Server();
+ }
+
+ static class Server extends HadoopThriftAuthBridge.Server {
+ public Server() throws TTransportException {
+ super();
+ }
+ @Override
+ public TTransportFactory createTransportFactory(Map<String, String> saslProps)
+ throws TTransportException {
+ TSaslServerTransport.Factory transFactory =
+ new TSaslServerTransport.Factory();
+ transFactory.addServerDefinition(AuthMethod.DIGEST.getMechanismName(),
+ null, SaslRpcServer.SASL_DEFAULT_REALM,
+ saslProps,
+ new SaslDigestCallbackHandler(secretManager));
+
+ return new TUGIAssumingTransportFactory(transFactory, realUgi);
+ }
+ static DelegationTokenStore TOKEN_STORE = new MemoryTokenStore();
+
+ @Override
+ protected DelegationTokenStore getTokenStore(Configuration conf) throws IOException {
+ return TOKEN_STORE;
+ }
+
+ @Override
+ public void startDelegationTokenSecretManager(Configuration conf, Object hms, ServerMode sm)
+ throws IOException{
+ super.startDelegationTokenSecretManager(conf, hms, sm);
+ isMetastoreTokenManagerInited = true;
+ }
+
+ }
+ }
+
+
+ private HiveConf conf;
+
+ private void configureSuperUserIPAddresses(Configuration conf,
+ String superUserShortName) throws IOException {
+ List<String> ipList = new ArrayList<String>();
+ Enumeration<NetworkInterface> netInterfaceList = NetworkInterface
+ .getNetworkInterfaces();
+ while (netInterfaceList.hasMoreElements()) {
+ NetworkInterface inf = netInterfaceList.nextElement();
+ Enumeration<InetAddress> addrList = inf.getInetAddresses();
+ while (addrList.hasMoreElements()) {
+ InetAddress addr = addrList.nextElement();
+ ipList.add(addr.getHostAddress());
+ }
+ }
+ StringBuilder builder = new StringBuilder();
+ for (String ip : ipList) {
+ builder.append(ip);
+ builder.append(',');
+ }
+ builder.append("127.0.1.1,");
+ builder.append(InetAddress.getLocalHost().getCanonicalHostName());
+ conf.setStrings(DefaultImpersonationProvider.getTestProvider().getProxySuperuserIpConfKey(superUserShortName),
+ builder.toString());
+ }
+
+ public void setup() throws Exception {
+ isMetastoreTokenManagerInited = false;
+ int port = findFreePort();
+ System.setProperty(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL.varname,
+ "true");
+ System.setProperty(HiveConf.ConfVars.METASTOREURIS.varname,
+ "thrift://localhost:" + port);
+ System.setProperty(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(
+ System.getProperty("test.build.data", "/tmp")).toString());
+ conf = new HiveConf(TestHadoopAuthBridge23.class);
+ MetaStoreUtils.startMetaStore(port, new MyHadoopThriftAuthBridge23());
+ }
+
+ /**
+ * Test delegation token store/load from shared store.
+ * @throws Exception
+ */
+ public void testDelegationTokenSharedStore() throws Exception {
+ UserGroupInformation clientUgi = UserGroupInformation.getCurrentUser();
+
+ TokenStoreDelegationTokenSecretManager tokenManager =
+ new TokenStoreDelegationTokenSecretManager(0, 60*60*1000, 60*60*1000, 0,
+ MyHadoopThriftAuthBridge23.Server.TOKEN_STORE);
+ // initializes current key
+ tokenManager.startThreads();
+ tokenManager.stopThreads();
+
+ String tokenStrForm = tokenManager.getDelegationToken(clientUgi.getShortUserName());
+ Token<DelegationTokenIdentifier> t= new Token<DelegationTokenIdentifier>();
+ t.decodeFromUrlString(tokenStrForm);
+
+ //check whether the username in the token is what we expect
+ DelegationTokenIdentifier d = new DelegationTokenIdentifier();
+ d.readFields(new DataInputStream(new ByteArrayInputStream(
+ t.getIdentifier())));
+ assertTrue("Usernames don't match",
+ clientUgi.getShortUserName().equals(d.getUser().getShortUserName()));
+
+ DelegationTokenInformation tokenInfo = MyHadoopThriftAuthBridge23.Server.TOKEN_STORE
+ .getToken(d);
+ assertNotNull("token not in store", tokenInfo);
+ assertFalse("duplicate token add",
+ MyHadoopThriftAuthBridge23.Server.TOKEN_STORE.addToken(d, tokenInfo));
+
+ // check keys are copied from token store when token is loaded
+ TokenStoreDelegationTokenSecretManager anotherManager =
+ new TokenStoreDelegationTokenSecretManager(0, 0, 0, 0,
+ MyHadoopThriftAuthBridge23.Server.TOKEN_STORE);
+ assertEquals("master keys empty on init", 0,
+ anotherManager.getAllKeys().length);
+ assertNotNull("token loaded",
+ anotherManager.retrievePassword(d));
+ anotherManager.renewToken(t, clientUgi.getShortUserName());
+ assertEquals("master keys not loaded from store",
+ MyHadoopThriftAuthBridge23.Server.TOKEN_STORE.getMasterKeys().length,
+ anotherManager.getAllKeys().length);
+
+ // cancel the delegation token
+ tokenManager.cancelDelegationToken(tokenStrForm);
+ assertNull("token not removed from store after cancel",
+ MyHadoopThriftAuthBridge23.Server.TOKEN_STORE.getToken(d));
+ assertFalse("token removed (again)",
+ MyHadoopThriftAuthBridge23.Server.TOKEN_STORE.removeToken(d));
+ try {
+ anotherManager.retrievePassword(d);
+ fail("InvalidToken expected after cancel");
+ } catch (InvalidToken ex) {
+ // expected
+ }
+
+ // token expiration
+ MyHadoopThriftAuthBridge23.Server.TOKEN_STORE.addToken(d,
+ new DelegationTokenInformation(0, t.getPassword()));
+ assertNotNull(MyHadoopThriftAuthBridge23.Server.TOKEN_STORE.getToken(d));
+ anotherManager.removeExpiredTokens();
+ assertNull("Expired token not removed",
+ MyHadoopThriftAuthBridge23.Server.TOKEN_STORE.getToken(d));
+
+ // key expiration - create an already expired key
+ anotherManager.startThreads(); // generates initial key
+ anotherManager.stopThreads();
+ DelegationKey expiredKey = new DelegationKey(-1, 0, anotherManager.getAllKeys()[0].getKey());
+ anotherManager.logUpdateMasterKey(expiredKey); // updates key with sequence number
+ assertTrue("expired key not in allKeys",
+ anotherManager.reloadKeys().containsKey(expiredKey.getKeyId()));
+ anotherManager.rollMasterKeyExt();
+ assertFalse("Expired key not removed",
+ anotherManager.reloadKeys().containsKey(expiredKey.getKeyId()));
+ }
+
+ public void testSaslWithHiveMetaStore() throws Exception {
+ setup();
+
+ final String proxyUserName = UserGroupInformation.getCurrentUser().getShortUserName();
+ setGroupsInConf(new String[] {"*"}, proxyUserName);
+
+ UserGroupInformation clientUgi = UserGroupInformation.getCurrentUser();
+ obtainTokenAndAddIntoUGI(clientUgi, null);
+ obtainTokenAndAddIntoUGI(clientUgi, "tokenForFooTablePartition");
+ }
+
+ public void testMetastoreProxyUser() throws Exception {
+ setup();
+
+ final String proxyUserName = "proxyUser";
+ //set the configuration up such that proxyUser can act on
+ //behalf of all users belonging to the group foo_bar_group (
+ //a dummy group)
+ String[] groupNames =
+ new String[] { "foo_bar_group" };
+ setGroupsInConf(groupNames, proxyUserName);
+
+ final UserGroupInformation delegationTokenUser =
+ UserGroupInformation.getCurrentUser();
+
+ final UserGroupInformation proxyUserUgi =
+ UserGroupInformation.createRemoteUser(proxyUserName);
+ String tokenStrForm = proxyUserUgi.doAs(new PrivilegedExceptionAction<String>() {
+ public String run() throws Exception {
+ try {
+ //Since the user running the test won't belong to a non-existent group
+ //foo_bar_group, the call to getDelegationTokenStr will fail
+ return getDelegationTokenStr(delegationTokenUser, proxyUserUgi);
+ } catch (AuthorizationException ae) {
+ return null;
+ }
+ }
+ });
+ assertTrue("Expected the getDelegationToken call to fail",
+ tokenStrForm == null);
+
+ //set the configuration up such that proxyUser can act on
+ //behalf of all users belonging to the real group(s) that the
+ //user running the test belongs to
+ setGroupsInConf(UserGroupInformation.getCurrentUser().getGroupNames(),
+ proxyUserName);
+ tokenStrForm = proxyUserUgi.doAs(new PrivilegedExceptionAction<String>() {
+ public String run() throws Exception {
+ try {
+ //Since the user running the test belongs to the group
+ //obtained above the call to getDelegationTokenStr will succeed
+ return getDelegationTokenStr(delegationTokenUser, proxyUserUgi);
+ } catch (AuthorizationException ae) {
+ return null;
+ }
+ }
+ });
+ assertTrue("Expected the getDelegationToken call to not fail",
+ tokenStrForm != null);
+ Token<DelegationTokenIdentifier> t= new Token<DelegationTokenIdentifier>();
+ t.decodeFromUrlString(tokenStrForm);
+ //check whether the username in the token is what we expect
+ DelegationTokenIdentifier d = new DelegationTokenIdentifier();
+ d.readFields(new DataInputStream(new ByteArrayInputStream(
+ t.getIdentifier())));
+ assertTrue("Usernames don't match",
+ delegationTokenUser.getShortUserName().equals(d.getUser().getShortUserName()));
+
+ }
+
+ private void setGroupsInConf(String[] groupNames, String proxyUserName)
+ throws IOException {
+ conf.set(
+ DefaultImpersonationProvider.getTestProvider().getProxySuperuserGroupConfKey(proxyUserName),
+ StringUtils.join(",", Arrays.asList(groupNames)));
+ configureSuperUserIPAddresses(conf, proxyUserName);
+ ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
+ }
+
+ private String getDelegationTokenStr(UserGroupInformation ownerUgi,
+ UserGroupInformation realUgi) throws Exception {
+ //obtain a token by directly invoking the metastore operation(without going
+ //through the thrift interface). Obtaining a token makes the secret manager
+ //aware of the user and that it gave the token to the user
+ //also set the authentication method explicitly to KERBEROS. Since the
+ //metastore checks whether the authentication method is KERBEROS or not
+ //for getDelegationToken, and the testcases don't use
+ //kerberos, this needs to be done
+
+ waitForMetastoreTokenInit();
+
+ HadoopThriftAuthBridge.Server.authenticationMethod
+ .set(AuthenticationMethod.KERBEROS);
+ HadoopThriftAuthBridge.Server.remoteAddress.set(InetAddress.getLocalHost());
+ return
+ HiveMetaStore.getDelegationToken(ownerUgi.getShortUserName(),
+ realUgi.getShortUserName());
+ }
+
+ /**
+ * Wait for metastore to have initialized token manager
+ * This does not have to be done in other metastore test cases as they
+ * use metastore client which will retry few times on failure
+ * @throws InterruptedException
+ */
+ private void waitForMetastoreTokenInit() throws InterruptedException {
+ int waitAttempts = 30;
+ while(waitAttempts > 0 && !isMetastoreTokenManagerInited){
+ Thread.sleep(1000);
+ waitAttempts--;
+ }
+ }
+
+ private void obtainTokenAndAddIntoUGI(UserGroupInformation clientUgi,
+ String tokenSig) throws Exception {
+ String tokenStrForm = getDelegationTokenStr(clientUgi, clientUgi);
+ Token<DelegationTokenIdentifier> t= new Token<DelegationTokenIdentifier>();
+ t.decodeFromUrlString(tokenStrForm);
+
+ //check whether the username in the token is what we expect
+ DelegationTokenIdentifier d = new DelegationTokenIdentifier();
+ d.readFields(new DataInputStream(new ByteArrayInputStream(
+ t.getIdentifier())));
+ assertTrue("Usernames don't match",
+ clientUgi.getShortUserName().equals(d.getUser().getShortUserName()));
+
+ if (tokenSig != null) {
+ conf.set("hive.metastore.token.signature", tokenSig);
+ t.setService(new Text(tokenSig));
+ }
+ //add the token to the clientUgi for securely talking to the metastore
+ clientUgi.addToken(t);
+ //Create the metastore client as the clientUgi. Doing so this
+ //way will give the client access to the token that was added earlier
+ //in the clientUgi
+ HiveMetaStoreClient hiveClient =
+ clientUgi.doAs(new PrivilegedExceptionAction<HiveMetaStoreClient>() {
+ public HiveMetaStoreClient run() throws Exception {
+ HiveMetaStoreClient hiveClient =
+ new HiveMetaStoreClient(conf);
+ return hiveClient;
+ }
+ });
+
+ assertTrue("Couldn't connect to metastore", hiveClient != null);
+
+ //try out some metastore operations
+ createDBAndVerifyExistence(hiveClient);
+
+ //check that getDelegationToken fails since we are not authenticating
+ //over kerberos
+ boolean pass = false;
+ try {
+ hiveClient.getDelegationToken(clientUgi.getUserName());
+ } catch (MetaException ex) {
+ pass = true;
+ }
+ assertTrue("Expected the getDelegationToken call to fail", pass == true);
+ hiveClient.close();
+
+ //Now cancel the delegation token
+ HiveMetaStore.cancelDelegationToken(tokenStrForm);
+
+ //now metastore connection should fail
+ hiveClient =
+ clientUgi.doAs(new PrivilegedExceptionAction<HiveMetaStoreClient>() {
+ public HiveMetaStoreClient run() {
+ try {
+ HiveMetaStoreClient hiveClient =
+ new HiveMetaStoreClient(conf);
+ return hiveClient;
+ } catch (MetaException e) {
+ return null;
+ }
+ }
+ });
+ assertTrue("Expected metastore operations to fail", hiveClient == null);
+ }
+
+ private void createDBAndVerifyExistence(HiveMetaStoreClient client)
+ throws Exception {
+ String dbName = "simpdb";
+ Database db = new Database();
+ db.setName(dbName);
+ client.createDatabase(db);
+ Database db1 = client.getDatabase(dbName);
+ client.dropDatabase(dbName);
+ assertTrue("Databases do not match", db1.getName().equals(db.getName()));
+ }
+
+ private int findFreePort() throws IOException {
+ ServerSocket socket= new ServerSocket(0);
+ int port = socket.getLocalPort();
+ socket.close();
+ return port;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/262bae6b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java
----------------------------------------------------------------------
diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java
index 9d49ad5..7ed7265 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java
@@ -671,6 +671,9 @@ public class HadoopThriftAuthBridge {
if (useProxy) {
clientUgi = UserGroupInformation.createProxyUser(
endUser, UserGroupInformation.getLoginUser());
+
+ ProxyUsers.authorize(clientUgi, getRemoteAddress().getHostAddress(), null);
+
remoteUser.set(clientUgi.getShortUserName());
LOG.debug("Set remoteUser :" + remoteUser.get());
return clientUgi.doAs(new PrivilegedExceptionAction<Boolean>() {
[30/50] [abbrv] hive git commit: HIVE-11902 - Abort txn cleanup
thread throws SyntaxErrorException (Deepesh Khandelwal via Eugene Koifman)
Posted by xu...@apache.org.
HIVE-11902 - Abort txn cleanup thread throws SyntaxErrorException (Deepesh Khandelwal via Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5a5539c3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5a5539c3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5a5539c3
Branch: refs/heads/beeline-cli
Commit: 5a5539c36ef2e473edb143dc4320f33e7f380891
Parents: 44741da
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Tue Sep 22 15:44:16 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Tue Sep 22 15:44:32 2015 -0700
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5a5539c3/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 9ecb82a..8597d9f 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -1302,6 +1302,9 @@ public class TxnHandler {
private int abortTxns(Connection dbConn, List<Long> txnids) throws SQLException {
Statement stmt = null;
int updateCnt = 0;
+ if (txnids.isEmpty()) {
+ return 0;
+ }
try {
stmt = dbConn.createStatement();
@@ -1921,7 +1924,7 @@ public class TxnHandler {
abortTxns(dbConn, batchToAbort);
dbConn.commit();
//todo: add TXNS.COMMENT filed and set it to 'aborted by system due to timeout'
- LOG.info("Aborted the following transactions due to timeout: " + timedOutTxns.toString());
+ LOG.info("Aborted the following transactions due to timeout: " + batchToAbort.toString());
}
int numTxnsAborted = (timedOutTxns.size() - 1) * TIMED_OUT_TXN_ABORT_BATCH_SIZE +
timedOutTxns.get(timedOutTxns.size() - 1).size();
[42/50] [abbrv] hive git commit: HIVE-11922: Better error message
when ORC split generation fails (Prasanth Jayachandran reviewed by Sergey
Shelukhin)
Posted by xu...@apache.org.
HIVE-11922: Better error message when ORC split generation fails (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/648f2c6b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/648f2c6b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/648f2c6b
Branch: refs/heads/beeline-cli
Commit: 648f2c6bd47c9fcb555fcaea64c15f8b03a48ab4
Parents: f73157f
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Sep 23 20:02:00 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Sep 23 20:02:00 2015 -0500
----------------------------------------------------------------------
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 6 +-----
.../hive/ql/io/orc/TestInputOutputFormat.java | 19 +++++++++++++++++++
2 files changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/648f2c6b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 2500fb6..52e1b06 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.io.orc;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -50,7 +49,6 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.io.AcidInputFormat;
-import org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaMetaData;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.AcidUtils.Directory;
@@ -59,12 +57,10 @@ import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -1060,7 +1056,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
} catch (Exception e) {
cancelFutures(pathFutures);
cancelFutures(splitFutures);
- throw new RuntimeException("serious problem", e);
+ throw new RuntimeException("ORC split generation failed with exception: " + e.getMessage(), e);
}
if (context.cacheStripeDetails) {
http://git-wip-us.apache.org/repos/asf/hive/blob/648f2c6b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 8ba4d2e..f451fce 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -1311,6 +1311,25 @@ public class TestInputOutputFormat {
assertEquals(null, serde.getSerDeStats());
}
+ @Test(expected = RuntimeException.class)
+ public void testSplitGenFailure() throws IOException {
+ Properties properties = new Properties();
+ HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
+ org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer =
+ outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
+ properties, Reporter.NULL);
+ writer.close(true);
+ InputFormat<?,?> in = new OrcInputFormat();
+ fs.setPermission(testFilePath, FsPermission.createImmutable((short) 0333));
+ FileInputFormat.setInputPaths(conf, testFilePath.toString());
+ try {
+ in.getSplits(conf, 1);
+ } catch (RuntimeException e) {
+ assertEquals(true, e.getMessage().contains("Permission denied"));
+ throw e;
+ }
+ }
+
static class StringRow implements Writable {
String str;
String str2;