You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/09/25 20:42:11 UTC
[01/25] hive git commit: HIVE-11897 : JDO rollback can throw
pointless exceptions (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/llap 3b64bd6c1 -> 3c5b4ceda
HIVE-11897 : JDO rollback can throw pointless exceptions (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/44741dab
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/44741dab
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/44741dab
Branch: refs/heads/llap
Commit: 44741dabf0a4e7a9bf21fb2ee1a0b00d9d8eeddb
Parents: 451381c
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Sep 22 15:39:47 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Sep 22 15:39:47 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/metastore/ObjectStore.java | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/44741dab/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 5d2dc29..d9ed883 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -43,6 +43,7 @@ import java.util.concurrent.locks.ReentrantLock;
import java.util.regex.Pattern;
import javax.jdo.JDODataStoreException;
+import javax.jdo.JDOException;
import javax.jdo.JDOHelper;
import javax.jdo.JDOObjectNotFoundException;
import javax.jdo.PersistenceManager;
@@ -2431,7 +2432,20 @@ public class ObjectStore implements RawStore, Configurable {
throw new MetaException(ex.getMessage());
}
if (!isInTxn) {
- rollbackTransaction();
+ JDOException rollbackEx = null;
+ try {
+ rollbackTransaction();
+ } catch (JDOException jex) {
+ rollbackEx = jex;
+ }
+ if (rollbackEx != null) {
+ // Datanucleus propagates some pointless exceptions and rolls back in the finally.
+ if (currentTransaction != null && currentTransaction.isActive()) {
+ throw rollbackEx; // Throw if the tx wasn't rolled back.
+ }
+ LOG.info("Ignoring exception, rollback succeeded: " + rollbackEx.getMessage());
+ }
+
start = doTrace ? System.nanoTime() : 0;
openTransaction();
if (table != null) {
[24/25] hive git commit: HIVE-11827: STORED AS AVRO fails SELECT
COUNT(*) when empty (Yongzhi via Xuefu)
Posted by pr...@apache.org.
HIVE-11827: STORED AS AVRO fails SELECT COUNT(*) when empty (Yongzhi via Xuefu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6c2d71cb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6c2d71cb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6c2d71cb
Branch: refs/heads/llap
Commit: 6c2d71cb43b2138a8740a28863b30cfc16973424
Parents: 1c0a314
Author: Xuefu Zhang <xz...@Cloudera.com>
Authored: Fri Sep 25 10:18:28 2015 -0700
Committer: Xuefu Zhang <xz...@Cloudera.com>
Committed: Fri Sep 25 10:19:06 2015 -0700
----------------------------------------------------------------------
.../queries/clientpositive/avrocountemptytbl.q | 8 +++
.../clientpositive/avrocountemptytbl.q.out | 58 ++++++++++++++++++++
.../hadoop/hive/serde2/avro/AvroSerdeUtils.java | 24 +++++++-
3 files changed, 89 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6c2d71cb/ql/src/test/queries/clientpositive/avrocountemptytbl.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/avrocountemptytbl.q b/ql/src/test/queries/clientpositive/avrocountemptytbl.q
new file mode 100644
index 0000000..9ecfb05
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/avrocountemptytbl.q
@@ -0,0 +1,8 @@
+drop table if exists emptyavro;
+create table emptyavro (a int) stored as avro;
+select count(*) from emptyavro;
+insert into emptyavro select count(*) from emptyavro;
+select count(*) from emptyavro;
+insert into emptyavro select key from src where key = 100 limit 1;
+select * from emptyavro;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/6c2d71cb/ql/src/test/results/clientpositive/avrocountemptytbl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avrocountemptytbl.q.out b/ql/src/test/results/clientpositive/avrocountemptytbl.q.out
new file mode 100644
index 0000000..72e8489
--- /dev/null
+++ b/ql/src/test/results/clientpositive/avrocountemptytbl.q.out
@@ -0,0 +1,58 @@
+PREHOOK: query: drop table if exists emptyavro
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists emptyavro
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table emptyavro (a int) stored as avro
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emptyavro
+POSTHOOK: query: create table emptyavro (a int) stored as avro
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emptyavro
+PREHOOK: query: select count(*) from emptyavro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from emptyavro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+0
+PREHOOK: query: insert into emptyavro select count(*) from emptyavro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emptyavro
+PREHOOK: Output: default@emptyavro
+POSTHOOK: query: insert into emptyavro select count(*) from emptyavro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emptyavro
+POSTHOOK: Output: default@emptyavro
+POSTHOOK: Lineage: emptyavro.a EXPRESSION [(emptyavro)emptyavro.null, ]
+PREHOOK: query: select count(*) from emptyavro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from emptyavro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+1
+PREHOOK: query: insert into emptyavro select key from src where key = 100 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@emptyavro
+POSTHOOK: query: insert into emptyavro select key from src where key = 100 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@emptyavro
+POSTHOOK: Lineage: emptyavro.a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: select * from emptyavro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emptyavro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+0
+100
http://git-wip-us.apache.org/repos/asf/hive/blob/6c2d71cb/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
index 4edf654..903ac95 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
@@ -27,6 +27,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.mapred.JobConf;
import java.io.File;
@@ -38,6 +41,7 @@ import java.net.URISyntaxException;
import java.net.URL;
import java.nio.Buffer;
import java.nio.ByteBuffer;
+import java.util.Arrays;
import java.util.List;
import java.util.Properties;
@@ -105,8 +109,26 @@ public class AvroSerdeUtils {
// Try pulling directly from URL
schemaString = properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName());
- if(schemaString == null || schemaString.equals(SCHEMA_NONE))
+ if (schemaString == null) {
+ final String columnNameProperty = properties.getProperty(serdeConstants.LIST_COLUMNS);
+ final String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
+ final String columnCommentProperty = properties.getProperty(AvroSerDe.LIST_COLUMN_COMMENTS);
+ if (columnNameProperty == null || columnNameProperty.isEmpty()
+ || columnTypeProperty == null || columnTypeProperty.isEmpty() ) {
+ throw new AvroSerdeException(EXCEPTION_MESSAGE);
+ }
+ // Get column names and types
+ List<String> columnNames = Arrays.asList(columnNameProperty.split(","));
+ List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+
+ Schema schema = AvroSerDe.getSchemaFromCols(properties, columnNames, columnTypes, columnCommentProperty);
+ properties.setProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString());
+ if (conf != null)
+ conf.set(AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false));
+ return schema;
+ } else if(schemaString.equals(SCHEMA_NONE)) {
throw new AvroSerdeException(EXCEPTION_MESSAGE);
+ }
try {
Schema s = getSchemaFromFS(schemaString, conf);
[19/25] hive git commit: HIVE-11517 Vectorized auto_smb_mapjoin_14.q
produces different results (Matt McCline, reviewed by Vikram Dixit K)
Posted by pr...@apache.org.
HIVE-11517 Vectorized auto_smb_mapjoin_14.q produces different results (Matt McCline, reviewed by Vikram Dixit K)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/461e38ec
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/461e38ec
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/461e38ec
Branch: refs/heads/llap
Commit: 461e38ecee8b9fd1d829ff0884f78c1a75013bd3
Parents: 68d6cfd
Author: Matt McCline <mm...@hortonworks.com>
Authored: Thu Sep 24 15:23:50 2015 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu Sep 24 15:23:50 2015 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../exec/vector/VectorSMBMapJoinOperator.java | 15 +-
.../clientpositive/vector_auto_smb_mapjoin_14.q | 297 +++
.../tez/vector_auto_smb_mapjoin_14.q.out | 1576 +++++++++++++++
.../vector_auto_smb_mapjoin_14.q.out | 1792 ++++++++++++++++++
5 files changed, 3679 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/461e38ec/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index b47d1b5..4f7b25f 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -190,6 +190,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
update_two_cols.q,\
vector_acid3.q,\
vector_aggregate_9.q,\
+ vector_auto_smb_mapjoin_14.q,\
vector_between_in.q,\
vector_binary_join_groupby.q,\
vector_bucket.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/461e38ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
index a2f8091..804ba17 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
@@ -38,7 +38,9 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
/**
* VectorSMBJoinOperator.
@@ -123,8 +125,17 @@ public class VectorSMBMapJoinOperator extends SMBMapJoinOperator implements Vect
@Override
protected List<Object> smbJoinComputeKeys(Object row, byte alias) throws HiveException {
if (alias == this.posBigTable) {
- VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
- return keyEvaluator.evaluate(keyValues[batchIndex]);
+
+ // The keyEvaluate reuses storage. That doesn't work with SMB MapJoin because it
+ // holds references to keys as it is merging.
+ List<Object> singletonListAndObjects = keyEvaluator.evaluate(keyValues[batchIndex]);
+ ArrayList<Object> result = new ArrayList<Object>(singletonListAndObjects.size());
+ for (int i = 0; i < singletonListAndObjects.size(); i++) {
+ result.add(ObjectInspectorUtils.copyToStandardObject(singletonListAndObjects.get(i),
+ joinKeysObjectInspectors[alias].get(i),
+ ObjectInspectorCopyOption.WRITABLE));
+ }
+ return result;
} else {
return super.smbJoinComputeKeys(row, alias);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/461e38ec/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q b/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q
new file mode 100644
index 0000000..32be5ee
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q
@@ -0,0 +1,297 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC;
+
+insert overwrite table tbl1
+select * from src where key < 10;
+
+insert overwrite table tbl2
+select * from src where key < 10;
+
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+
+set hive.auto.convert.sortmerge.join=true;
+
+-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1;
+
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1;
+
+-- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2;
+
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2;
+
+-- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key;
+
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key;
+
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key;
+
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key;
+
+-- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key;
+
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key;
+
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key;
+
+-- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
+-- join should be performed
+explain
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key;
+
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key;
+
+-- One of the tables is a sub-query and the other is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key;
+
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key;
+
+-- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key);
+
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key);
+
+-- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a;
+
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a;
+
+CREATE TABLE dest1(key int, value string);
+CREATE TABLE dest2(key int, val1 string, val2 string);
+
+-- The join is followed by a multi-table insert. It should be converted to
+-- a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2;
+
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2;
+
+select * from dest1;
+select * from dest2;
+
+DROP TABLE dest2;
+CREATE TABLE dest2(key int, cnt int);
+
+-- The join is followed by a multi-table insert, and one of the inserts involves a reducer.
+-- It should be converted to a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key;
+
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key;
+
+select * from dest1;
+select * from dest2;
http://git-wip-us.apache.org/repos/asf/hive/blob/461e38ec/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out
new file mode 100644
index 0000000..480c4e1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out
@@ -0,0 +1,1576 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl1
+PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl2
+PREHOOK: query: insert overwrite table tbl1
+select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: insert overwrite table tbl1
+select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl1
+POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table tbl2
+select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: insert overwrite table tbl2
+select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl2
+POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_13]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_20]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_10]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_9]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_18]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_17]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_1]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Filter Operator [FIL_16]
+ predicate:key is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+22
+PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3
+ File Output Operator [FS_18]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_28]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 2 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_15]
+ sort order:
+ Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [OP_27]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator [OP_26]
+ Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator [OP_25]
+ | aggregations:["count(VALUE._col0)"]
+ | keys:KEY._col0 (type: int)
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_10]
+ key expressions:_col0 (type: int)
+ Map-reduce partition columns:_col0 (type: int)
+ sort order:+
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col1 (type: bigint)
+ Group By Operator [GBY_9]
+ aggregations:["count()"]
+ keys:_col0 (type: int)
+ outputColumnNames:["_col0","_col1"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_23]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_22]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_1]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Filter Operator [FIL_21]
+ predicate:key is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+6
+PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3
+ File Output Operator [FS_32]
+ compressed:false
+ Statistics:Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Select Operator [SEL_31]
+ outputColumnNames:["_col0","_col1","_col2"]
+ Statistics:Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_49]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+ | outputColumnNames:["_col0","_col1","_col3"]
+ | Statistics:Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 2 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_51]
+ | key expressions:_col0 (type: int)
+ | Map-reduce partition columns:_col0 (type: int)
+ | sort order:+
+ | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col1 (type: bigint)
+ | Group By Operator [OP_50]
+ | | aggregations:["count(VALUE._col0)"]
+ | | keys:KEY._col0 (type: int)
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 1 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_10]
+ | key expressions:_col0 (type: int)
+ | Map-reduce partition columns:_col0 (type: int)
+ | sort order:+
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col1 (type: bigint)
+ | Group By Operator [GBY_9]
+ | aggregations:["count()"]
+ | keys:_col0 (type: int)
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ | Merge Join Operator [MERGEJOIN_45]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | | outputColumnNames:["_col0"]
+ | | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ | |
+ | |<-Filter Operator [FIL_42]
+ | | predicate:key is not null (type: boolean)
+ | | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_1]
+ | | alias:b
+ | | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ | |<-Filter Operator [FIL_41]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_0]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 6 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_53]
+ key expressions:_col0 (type: int)
+ Map-reduce partition columns:_col0 (type: int)
+ sort order:+
+ Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col1 (type: bigint)
+ Group By Operator [OP_52]
+ | aggregations:["count(VALUE._col0)"]
+ | keys:KEY._col0 (type: int)
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 5 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_23]
+ key expressions:_col0 (type: int)
+ Map-reduce partition columns:_col0 (type: int)
+ sort order:+
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col1 (type: bigint)
+ Group By Operator [GBY_22]
+ aggregations:["count()"]
+ keys:_col0 (type: int)
+ outputColumnNames:["_col0","_col1"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_47]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_44]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_14]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Filter Operator [FIL_43]
+ predicate:key is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_13]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+0 9 9
+2 1 1
+4 1 1
+5 9 9
+8 1 1
+9 1 1
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_16]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_23]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_13]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_12]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_21]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+ | Statistics:Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Select Operator [SEL_5]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_20]
+ | predicate:(key < 6) (type: boolean)
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_3]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_19]
+ predicate:(key < 6) (type: boolean)
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_16]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_23]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_13]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_12]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_21]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"key (type: int)"}
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_20]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_5]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_19]
+ predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_20]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_27]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_17]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_16]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_25]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+ | Statistics:Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Select Operator [SEL_7]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_24]
+ | predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_5]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_23]
+ predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_16]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_23]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_13]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_12]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_21]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+ | Statistics:Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Select Operator [SEL_5]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_20]
+ | predicate:(key < 8) (type: boolean)
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_3]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_19]
+ predicate:(key < 8) (type: boolean)
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
+-- join should be performed
+explain
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
+-- join should be performed
+explain
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3
+ File Output Operator [FS_14]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_26]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 2 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_11]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_10]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_19]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_22]
+ | key expressions:_col0 (type: int)
+ | Map-reduce partition columns:_col0 (type: int)
+ | sort order:+
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_21]
+ | predicate:_col0 is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | Select Operator [OP_20]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_0]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 4 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_25]
+ key expressions:_col0 (type: int)
+ Map-reduce partition columns:_col0 (type: int)
+ sort order:+
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_24]
+ predicate:_col0 is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Select Operator [OP_23]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_2]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+22
+PREHOOK: query: -- One of the tables is a sub-query and the other is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- One of the tables is a sub-query and the other is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_14]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_21]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_11]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_10]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_19]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"key (type: int)"}
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_18]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_3]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_17]
+ predicate:(key < 6) (type: boolean)
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_21]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_34]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_18]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_17]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_31]
+ | condition map:[{"":"Inner Join 0 to 1"},{"":"Inner Join 0 to 2"}]
+ | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)","2":"_col0 (type: int)"}
+ | Statistics:Num rows: 6 Data size: 613 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Select Operator [SEL_5]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_29]
+ | predicate:(key < 6) (type: boolean)
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_3]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Select Operator [SEL_8]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_30]
+ | predicate:(key < 6) (type: boolean)
+ | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_6]
+ | alias:a
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_28]
+ predicate:(key < 6) (type: boolean)
+ Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+56
+PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+POSTHOOK: type: QUERY
+Plan not optimized by CBO due to missing statistics. Please check log for more details.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_17]
+ compressed:false
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+ Group By Operator [OP_24]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_14]
+ sort order:
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_13]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_22]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"_col0 (type: int)","1":"key (type: int)"}
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_21]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_5]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Select Operator [SEL_2]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator [FIL_20]
+ predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+
+PREHOOK: query: select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: CREATE TABLE dest1(key int, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(key int, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest2
+POSTHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest2
+PREHOOK: query: -- The join is followed by a multi-table insert. It should be converted to
+-- a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is followed by a multi-table insert. It should be converted to
+-- a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+POSTHOOK: type: QUERY
+Plan not optimized by CBO.
+
+Stage-4
+ Stats-Aggr Operator
+ Stage-0
+ Move Operator
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest1"}
+ Stage-3
+ Dependency Collection{}
+ Stage-2
+ Map 1
+ File Output Operator [FS_9]
+ compressed:false
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest1"}
+ Select Operator [SEL_8]
+ outputColumnNames:["_col0","_col1"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Select Operator [SEL_7]
+ outputColumnNames:["_col0","_col1","_col2"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_16]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | outputColumnNames:["_col0","_col1","_col6"]
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_15]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_1]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Filter Operator [FIL_14]
+ predicate:key is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator [FS_11]
+ compressed:false
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest2"}
+ Please refer to the previous Select Operator [SEL_7]
+Stage-5
+ Stats-Aggr Operator
+ Stage-1
+ Move Operator
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest2"}
+ Please refer to the previous Stage-3
+
+PREHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+2 val_2
+4 val_4
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+8 val_8
+9 val_9
+PREHOOK: query: select * from dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+2 val_2 val_2
+4 val_4 val_4
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+8 val_8 val_8
+9 val_9 val_9
+PREHOOK: query: DROP TABLE dest2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest2
+PREHOOK: Output: default@dest2
+POSTHOOK: query: DROP TABLE dest2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest2
+POSTHOOK: Output: default@dest2
+PREHOOK: query: CREATE TABLE dest2(key int, cnt int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest2
+POSTHOOK: query: CREATE TABLE dest2(key int, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest2
+PREHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer.
+-- It should be converted to a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer.
+-- It should be converted to a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+POSTHOOK: type: QUERY
+Plan not optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-4
+ Stats-Aggr Operator
+ Stage-0
+ Move Operator
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest1"}
+ Stage-3
+ Dependency Collection{}
+ Stage-2
+ Reducer 2
+ File Output Operator [FS_25]
+ compressed:false
+ Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest2"}
+ Select Operator [OP_24]
+ outputColumnNames:["_col0","_col1"]
+ Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator [OP_23]
+ | aggregations:["count(VALUE._col0)"]
+ | keys:KEY._col0 (type: int)
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 1 [SIMPLE_EDGE]
+ File Output Operator [FS_9]
+ compressed:false
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest1"}
+ Merge Join Operator [MERGEJOIN_21]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"0":"key (type: int)","1":"key (type: int)"}
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ |
+ |<-Filter Operator [FIL_20]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_1]
+ | alias:b
+ | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ |<-Filter Operator [FIL_19]
+ predicate:key is not null (type: boolean)
+ Statistics:Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_0]
+ alias:a
+ Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator [RS_12]
+ key expressions:_col0 (type: int)
+ Map-reduce partition columns:_col0 (type: int)
+ sort order:+
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col1 (type: bigint)
+ Group By Operator [GBY_11]
+ aggregations:["count()"]
+ keys:_col0 (type: int)
+ outputColumnNames:["_col0","_col1"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Select Operator [SEL_10]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Please refer to the previous Merge Join Operator [MERGEJOIN_21]
+Stage-5
+ Stats-Aggr Operator
+ Stage-1
+ Move Operator
+ table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest2"}
+ Please refer to the previous Stage-3
+
+PREHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(tbl1)a.null, (tbl2)b.null, ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+2 val_2
+4 val_4
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+8 val_8
+9 val_9
+PREHOOK: query: select * from dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+0 9
+2 1
+4 1
+5 9
+8 1
+9 1
[07/25] hive git commit: HIVE-11468: Vectorize Struct IN() clauses
(Matt McCline, via Gopal V)
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/test/results/clientpositive/vector_struct_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_struct_in.q.out b/ql/src/test/results/clientpositive/vector_struct_in.q.out
new file mode 100644
index 0000000..c78b428
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_struct_in.q.out
@@ -0,0 +1,825 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+-- 2 Strings
+create table test_1 (`id` string, `lineid` string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+-- 2 Strings
+create table test_1 (`id` string, `lineid` string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_1
+PREHOOK: query: insert into table test_1 values ('one','1'), ('seven','1')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test_1
+POSTHOOK: query: insert into table test_1 values ('one','1'), ('seven','1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test_1
+POSTHOOK: Lineage: test_1.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test_1.lineid SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_1
+ Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), lineid (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_1
+#### A masked pattern was here ####
+one 1
+seven 1
+PREHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_1
+ Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), lineid (type: string), (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_1
+#### A masked pattern was here ####
+POSTHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_1
+#### A masked pattern was here ####
+one 1 true
+seven 1 true
+PREHOOK: query: -- 2 Integers
+create table test_2 (`id` int, `lineid` int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_2
+POSTHOOK: query: -- 2 Integers
+create table test_2 (`id` int, `lineid` int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_2
+PREHOOK: query: insert into table test_2 values (1,1), (7,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@test_2
+POSTHOOK: query: insert into table test_2 values (1,1), (7,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@test_2
+POSTHOOK: Lineage: test_2.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test_2.lineid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_2
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), lineid (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_2
+#### A masked pattern was here ####
+1 1
+7 1
+PREHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_2
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), lineid (type: int), (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_2
+#### A masked pattern was here ####
+POSTHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_2
+#### A masked pattern was here ####
+1 1 true
+7 1 true
+PREHOOK: query: -- 1 String and 1 Integer
+create table test_3 (`id` string, `lineid` int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_3
+POSTHOOK: query: -- 1 String and 1 Integer
+create table test_3 (`id` string, `lineid` int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_3
+PREHOOK: query: insert into table test_3 values ('one',1), ('seven',1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@test_3
+POSTHOOK: query: insert into table test_3 values ('one',1), ('seven',1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@test_3
+POSTHOOK: Lineage: test_3.id SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test_3.lineid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_3
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), lineid (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_3
+#### A masked pattern was here ####
+one 1
+seven 1
+PREHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_3
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), lineid (type: int), (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_3
+#### A masked pattern was here ####
+POSTHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_3
+#### A masked pattern was here ####
+one 1 true
+seven 1 true
+PREHOOK: query: -- 1 Integer and 1 String and 1 Double
+create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_4
+POSTHOOK: query: -- 1 Integer and 1 String and 1 Double
+create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_4
+PREHOOK: query: insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@test_4
+POSTHOOK: query: insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@test_4
+POSTHOOK: Lineage: test_4.my_bigint EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test_4.my_double EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test_4.my_string SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_4
+ Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_4
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_4
+#### A masked pattern was here ####
+1 a 0.5
+PREHOOK: query: explain
+select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_4
+ Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double), (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_4
+#### A masked pattern was here ####
+POSTHOOK: query: select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_4
+#### A masked pattern was here ####
+1 a 0.5 true
+1 b 1.5 false
+2 b 1.5 false
[17/25] hive git commit: HIVE-10785 : Support aggregate push down
through joins (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Posted by pr...@apache.org.
HIVE-10785 : Support aggregate push down through joins (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/68d6cfda
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/68d6cfda
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/68d6cfda
Branch: refs/heads/llap
Commit: 68d6cfda78b3ec6b42cf0d42df62aa1f2716d414
Parents: 1528135
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Thu Sep 17 21:49:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Thu Sep 24 13:58:50 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 2 +-
.../hadoop/hive/ql/exec/FunctionRegistry.java | 3 +-
.../functions/HiveSqlCountAggFunction.java | 72 +
.../functions/HiveSqlMinMaxAggFunction.java | 49 +
.../functions/HiveSqlSumAggFunction.java | 125 ++
.../rules/HiveAggregateJoinTransposeRule.java | 372 +++++
.../translator/SqlFunctionConverter.java | 40 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +
.../hive/ql/udf/generic/GenericUDAFSum.java | 2 +-
.../udf/generic/GenericUDAFSumEmptyIsZero.java | 63 +
.../clientpositive/groupby_join_pushdown.q | 55 +
.../clientpositive/groupby_join_pushdown.q.out | 1522 ++++++++++++++++++
.../results/clientpositive/show_functions.q.out | 1 +
13 files changed, 2297 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f3e2168..dffdb5c 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -814,7 +814,7 @@ public class HiveConf extends Configuration {
+ " expressed as multiple of Local FS write cost"),
HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;"
+ " expressed as multiple of Local FS read cost"),
-
+ AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"),
// hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row,
// need to remove by hive .13. Also, do not change default (see SMB operator)
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index f1fe30d..218b2df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -370,6 +370,7 @@ public final class FunctionRegistry {
system.registerGenericUDAF("min", new GenericUDAFMin());
system.registerGenericUDAF("sum", new GenericUDAFSum());
+ system.registerGenericUDAF("$SUM0", new GenericUDAFSumEmptyIsZero());
system.registerGenericUDAF("count", new GenericUDAFCount());
system.registerGenericUDAF("avg", new GenericUDAFAverage());
system.registerGenericUDAF("std", new GenericUDAFStd());
@@ -960,7 +961,7 @@ public final class FunctionRegistry {
GenericUDAFParameterInfo paramInfo =
new SimpleGenericUDAFParameterInfo(
args, isDistinct, isAllColumns);
-
+
GenericUDAFEvaluator udafEvaluator;
if (udafResolver instanceof GenericUDAFResolver2) {
udafEvaluator =
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
new file mode 100644
index 0000000..7937040
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlSplittableAggFunction;
+import org.apache.calcite.sql.SqlSplittableAggFunction.CountSplitter;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.util.ImmutableIntList;
+
+public class HiveSqlCountAggFunction extends SqlAggFunction {
+
+ final SqlReturnTypeInference returnTypeInference;
+ final SqlOperandTypeInference operandTypeInference;
+ final SqlOperandTypeChecker operandTypeChecker;
+
+ public HiveSqlCountAggFunction(SqlReturnTypeInference returnTypeInference,
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) {
+ super(
+ "count",
+ SqlKind.OTHER_FUNCTION,
+ returnTypeInference,
+ operandTypeInference,
+ operandTypeChecker,
+ SqlFunctionCategory.NUMERIC);
+ this.returnTypeInference = returnTypeInference;
+ this.operandTypeChecker = operandTypeChecker;
+ this.operandTypeInference = operandTypeInference;
+ }
+
+ @Override
+ public <T> T unwrap(Class<T> clazz) {
+ if (clazz == SqlSplittableAggFunction.class) {
+ return clazz.cast(new HiveCountSplitter());
+ }
+ return super.unwrap(clazz);
+ }
+
+ class HiveCountSplitter extends CountSplitter {
+
+ @Override
+ public AggregateCall other(RelDataTypeFactory typeFactory, AggregateCall e) {
+
+ return AggregateCall.create(
+ new HiveSqlCountAggFunction(returnTypeInference, operandTypeInference, operandTypeChecker),
+ false, ImmutableIntList.of(), -1,
+ typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true), "count");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java
new file mode 100644
index 0000000..77dca1f
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlSplittableAggFunction;
+import org.apache.calcite.sql.SqlSplittableAggFunction.SelfSplitter;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+
+public class HiveSqlMinMaxAggFunction extends SqlAggFunction {
+
+ public HiveSqlMinMaxAggFunction(SqlReturnTypeInference returnTypeInference,
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, boolean isMin) {
+ super(
+ isMin ? "min" : "max",
+ SqlKind.OTHER_FUNCTION,
+ returnTypeInference,
+ operandTypeInference,
+ operandTypeChecker,
+ SqlFunctionCategory.NUMERIC);
+ }
+
+ @Override
+ public <T> T unwrap(Class<T> clazz) {
+ if (clazz == SqlSplittableAggFunction.class) {
+ return clazz.cast(SelfSplitter.INSTANCE);
+ }
+ return super.unwrap(clazz);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
new file mode 100644
index 0000000..8f62970
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlSplittableAggFunction;
+import org.apache.calcite.sql.SqlSplittableAggFunction.SumSplitter;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.util.ImmutableIntList;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * <code>Sum</code> is an aggregator which returns the sum of the values which
+ * go into it. It has precisely one argument of numeric type (<code>int</code>,
+ * <code>long</code>, <code>float</code>, <code>double</code>), and the result
+ * is the same type.
+ */
+public class HiveSqlSumAggFunction extends SqlAggFunction {
+
+ final SqlReturnTypeInference returnTypeInference;
+ final SqlOperandTypeInference operandTypeInference;
+ final SqlOperandTypeChecker operandTypeChecker;
+
+ //~ Constructors -----------------------------------------------------------
+
+ public HiveSqlSumAggFunction(SqlReturnTypeInference returnTypeInference,
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) {
+ super(
+ "sum",
+ SqlKind.OTHER_FUNCTION,
+ returnTypeInference,
+ operandTypeInference,
+ operandTypeChecker,
+ SqlFunctionCategory.NUMERIC);
+ this.returnTypeInference = returnTypeInference;
+ this.operandTypeChecker = operandTypeChecker;
+ this.operandTypeInference = operandTypeInference;
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+
+ @Override
+ public <T> T unwrap(Class<T> clazz) {
+ if (clazz == SqlSplittableAggFunction.class) {
+ return clazz.cast(new HiveSumSplitter());
+ }
+ return super.unwrap(clazz);
+ }
+
+ class HiveSumSplitter extends SumSplitter {
+
+ @Override
+ public AggregateCall other(RelDataTypeFactory typeFactory, AggregateCall e) {
+ RelDataType countRetType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true);
+ return AggregateCall.create(
+ new HiveSqlCountAggFunction(ReturnTypes.explicit(countRetType), operandTypeInference, operandTypeChecker),
+ false, ImmutableIntList.of(), -1, countRetType, "count");
+ }
+
+ @Override
+ public AggregateCall topSplit(RexBuilder rexBuilder,
+ Registry<RexNode> extra, int offset, RelDataType inputRowType,
+ AggregateCall aggregateCall, int leftSubTotal, int rightSubTotal) {
+ final List<RexNode> merges = new ArrayList<>();
+ final List<RelDataTypeField> fieldList = inputRowType.getFieldList();
+ if (leftSubTotal >= 0) {
+ final RelDataType type = fieldList.get(leftSubTotal).getType();
+ merges.add(rexBuilder.makeInputRef(type, leftSubTotal));
+ }
+ if (rightSubTotal >= 0) {
+ final RelDataType type = fieldList.get(rightSubTotal).getType();
+ merges.add(rexBuilder.makeInputRef(type, rightSubTotal));
+ }
+ RexNode node;
+ switch (merges.size()) {
+ case 1:
+ node = merges.get(0);
+ break;
+ case 2:
+ node = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, merges);
+ node = rexBuilder.makeAbstractCast(aggregateCall.type, node);
+ break;
+ default:
+ throw new AssertionError("unexpected count " + merges);
+ }
+ int ordinal = extra.register(node);
+ return AggregateCall.create(new HiveSqlSumAggFunction(returnTypeInference, operandTypeInference, operandTypeChecker),
+ false, ImmutableList.of(ordinal), -1, aggregateCall.type, aggregateCall.name);
+ }
+ }
+}
+
+// End SqlSumAggFunction.java
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
new file mode 100644
index 0000000..211b6fa
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
@@ -0,0 +1,372 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.rules.AggregateJoinTransposeRule;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlSplittableAggFunction;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Planner rule that pushes an
+ * {@link org.apache.calcite.rel.core.Aggregate}
+ * past a {@link org.apache.calcite.rel.core.Join}.
+ */
+public class HiveAggregateJoinTransposeRule extends AggregateJoinTransposeRule {
+
+ /** Extended instance of the rule that can push down aggregate functions. */
+ public static final HiveAggregateJoinTransposeRule INSTANCE =
+ new HiveAggregateJoinTransposeRule(HiveAggregate.class, HiveAggregate.HIVE_AGGR_REL_FACTORY,
+ HiveJoin.class, HiveJoin.HIVE_JOIN_FACTORY, HiveProject.DEFAULT_PROJECT_FACTORY, true);
+
+ private final RelFactories.AggregateFactory aggregateFactory;
+
+ private final RelFactories.JoinFactory joinFactory;
+
+ private final RelFactories.ProjectFactory projectFactory;
+
+ private final boolean allowFunctions;
+
+ /** Creates an AggregateJoinTransposeRule that may push down functions. */
+ private HiveAggregateJoinTransposeRule(Class<? extends Aggregate> aggregateClass,
+ RelFactories.AggregateFactory aggregateFactory,
+ Class<? extends Join> joinClass,
+ RelFactories.JoinFactory joinFactory,
+ RelFactories.ProjectFactory projectFactory,
+ boolean allowFunctions) {
+ super(aggregateClass, aggregateFactory, joinClass, joinFactory, projectFactory, true);
+ this.aggregateFactory = aggregateFactory;
+ this.joinFactory = joinFactory;
+ this.projectFactory = projectFactory;
+ this.allowFunctions = allowFunctions;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final Aggregate aggregate = call.rel(0);
+ final Join join = call.rel(1);
+ final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
+
+ // If any aggregate functions do not support splitting, bail out
+ // If any aggregate call has a filter, bail out
+ for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
+ if (aggregateCall.getAggregation().unwrap(SqlSplittableAggFunction.class)
+ == null) {
+ return;
+ }
+ if (aggregateCall.filterArg >= 0) {
+ return;
+ }
+ }
+
+ // If it is not an inner join, we do not push the
+ // aggregate operator
+ if (join.getJoinType() != JoinRelType.INNER) {
+ return;
+ }
+
+ if (!allowFunctions && !aggregate.getAggCallList().isEmpty()) {
+ return;
+ }
+
+ // Do the columns used by the join appear in the output of the aggregate?
+ final ImmutableBitSet aggregateColumns = aggregate.getGroupSet();
+ final ImmutableBitSet keyColumns = keyColumns(aggregateColumns,
+ RelMetadataQuery.getPulledUpPredicates(join).pulledUpPredicates);
+ final ImmutableBitSet joinColumns =
+ RelOptUtil.InputFinder.bits(join.getCondition());
+ final boolean allColumnsInAggregate =
+ keyColumns.contains(joinColumns);
+ final ImmutableBitSet belowAggregateColumns =
+ aggregateColumns.union(joinColumns);
+
+ // Split join condition
+ final List<Integer> leftKeys = Lists.newArrayList();
+ final List<Integer> rightKeys = Lists.newArrayList();
+ RexNode nonEquiConj =
+ RelOptUtil.splitJoinCondition(join.getLeft(), join.getRight(),
+ join.getCondition(), leftKeys, rightKeys);
+ // If it contains non-equi join conditions, we bail out
+ if (!nonEquiConj.isAlwaysTrue()) {
+ return;
+ }
+
+ // Push each aggregate function down to each side that contains all of its
+ // arguments. Note that COUNT(*), because it has no arguments, can go to
+ // both sides.
+ final Map<Integer, Integer> map = new HashMap<>();
+ final List<Side> sides = new ArrayList<>();
+ int uniqueCount = 0;
+ int offset = 0;
+ int belowOffset = 0;
+ for (int s = 0; s < 2; s++) {
+ final Side side = new Side();
+ final RelNode joinInput = join.getInput(s);
+ int fieldCount = joinInput.getRowType().getFieldCount();
+ final ImmutableBitSet fieldSet =
+ ImmutableBitSet.range(offset, offset + fieldCount);
+ final ImmutableBitSet belowAggregateKeyNotShifted =
+ belowAggregateColumns.intersect(fieldSet);
+ for (Ord<Integer> c : Ord.zip(belowAggregateKeyNotShifted)) {
+ map.put(c.e, belowOffset + c.i);
+ }
+ final ImmutableBitSet belowAggregateKey =
+ belowAggregateKeyNotShifted.shift(-offset);
+ final boolean unique;
+ if (!allowFunctions) {
+ assert aggregate.getAggCallList().isEmpty();
+ // If there are no functions, it doesn't matter as much whether we
+ // aggregate the inputs before the join, because there will not be
+ // any functions experiencing a cartesian product effect.
+ //
+ // But finding out whether the input is already unique requires a call
+ // to areColumnsUnique that currently (until [CALCITE-794] "Detect
+ // cycles when computing statistics" is fixed) places a heavy load on
+ // the metadata system.
+ //
+ // So we choose to imagine the the input is already unique, which is
+ // untrue but harmless.
+ //
+ unique = true;
+ } else {
+ final Boolean unique0 =
+ RelMetadataQuery.areColumnsUnique(joinInput, belowAggregateKey);
+ unique = unique0 != null && unique0;
+ }
+ if (unique) {
+ ++uniqueCount;
+ side.newInput = joinInput;
+ } else {
+ List<AggregateCall> belowAggCalls = new ArrayList<>();
+ final SqlSplittableAggFunction.Registry<AggregateCall>
+ belowAggCallRegistry = registry(belowAggCalls);
+ final Mappings.TargetMapping mapping =
+ s == 0
+ ? Mappings.createIdentity(fieldCount)
+ : Mappings.createShiftMapping(fieldCount + offset, 0, offset,
+ fieldCount);
+ for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
+ final SqlAggFunction aggregation = aggCall.e.getAggregation();
+ final SqlSplittableAggFunction splitter =
+ Preconditions.checkNotNull(
+ aggregation.unwrap(SqlSplittableAggFunction.class));
+ final AggregateCall call1;
+ if (fieldSet.contains(ImmutableBitSet.of(aggCall.e.getArgList()))) {
+ call1 = splitter.split(aggCall.e, mapping);
+ } else {
+ call1 = splitter.other(rexBuilder.getTypeFactory(), aggCall.e);
+ }
+ if (call1 != null) {
+ side.split.put(aggCall.i,
+ belowAggregateKey.cardinality()
+ + belowAggCallRegistry.register(call1));
+ }
+ }
+ side.newInput = aggregateFactory.createAggregate(joinInput, false,
+ belowAggregateKey, null, belowAggCalls);
+ }
+ offset += fieldCount;
+ belowOffset += side.newInput.getRowType().getFieldCount();
+ sides.add(side);
+ }
+
+ if (uniqueCount == 2) {
+ // Both inputs to the join are unique. There is nothing to be gained by
+ // this rule. In fact, this aggregate+join may be the result of a previous
+ // invocation of this rule; if we continue we might loop forever.
+ return;
+ }
+
+ // Update condition
+ final Mapping mapping = (Mapping) Mappings.target(
+ new Function<Integer, Integer>() {
+ @Override
+ public Integer apply(Integer a0) {
+ return map.get(a0);
+ }
+ },
+ join.getRowType().getFieldCount(),
+ belowOffset);
+ final RexNode newCondition =
+ RexUtil.apply(mapping, join.getCondition());
+
+ // Create new join
+ RelNode newJoin = joinFactory.createJoin(sides.get(0).newInput,
+ sides.get(1).newInput, newCondition, join.getJoinType(),
+ join.getVariablesStopped(), join.isSemiJoinDone());
+
+ // Aggregate above to sum up the sub-totals
+ final List<AggregateCall> newAggCalls = new ArrayList<>();
+ final int groupIndicatorCount =
+ aggregate.getGroupCount() + aggregate.getIndicatorCount();
+ final int newLeftWidth = sides.get(0).newInput.getRowType().getFieldCount();
+ final List<RexNode> projects =
+ new ArrayList<>(rexBuilder.identityProjects(newJoin.getRowType()));
+ for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
+ final SqlAggFunction aggregation = aggCall.e.getAggregation();
+ final SqlSplittableAggFunction splitter =
+ Preconditions.checkNotNull(
+ aggregation.unwrap(SqlSplittableAggFunction.class));
+ final Integer leftSubTotal = sides.get(0).split.get(aggCall.i);
+ final Integer rightSubTotal = sides.get(1).split.get(aggCall.i);
+ newAggCalls.add(
+ splitter.topSplit(rexBuilder, registry(projects),
+ groupIndicatorCount, newJoin.getRowType(), aggCall.e,
+ leftSubTotal == null ? -1 : leftSubTotal,
+ rightSubTotal == null ? -1 : rightSubTotal + newLeftWidth));
+ }
+ RelNode r = newJoin;
+ b:
+ if (allColumnsInAggregate && newAggCalls.isEmpty() &&
+ RelOptUtil.areRowTypesEqual(r.getRowType(), aggregate.getRowType(), false)) {
+ // no need to aggregate
+ } else {
+ r = RelOptUtil.createProject(r, projects, null, true, projectFactory);
+ if (allColumnsInAggregate) {
+ // let's see if we can convert
+ List<RexNode> projects2 = new ArrayList<>();
+ for (int key : Mappings.apply(mapping, aggregate.getGroupSet())) {
+ projects2.add(rexBuilder.makeInputRef(r, key));
+ }
+ for (AggregateCall newAggCall : newAggCalls) {
+ final SqlSplittableAggFunction splitter =
+ newAggCall.getAggregation()
+ .unwrap(SqlSplittableAggFunction.class);
+ if (splitter != null) {
+ projects2.add(
+ splitter.singleton(rexBuilder, r.getRowType(), newAggCall));
+ }
+ }
+ if (projects2.size()
+ == aggregate.getGroupSet().cardinality() + newAggCalls.size()) {
+ // We successfully converted agg calls into projects.
+ r = RelOptUtil.createProject(r, projects2, null, true, projectFactory);
+ break b;
+ }
+ }
+ r = aggregateFactory.createAggregate(r, aggregate.indicator,
+ Mappings.apply(mapping, aggregate.getGroupSet()),
+ Mappings.apply2(mapping, aggregate.getGroupSets()), newAggCalls);
+ }
+ call.transformTo(r);
+ }
+
+ /** Computes the closure of a set of columns according to a given list of
+ * constraints. Each 'x = y' constraint causes bit y to be set if bit x is
+ * set, and vice versa. */
+ private static ImmutableBitSet keyColumns(ImmutableBitSet aggregateColumns,
+ ImmutableList<RexNode> predicates) {
+ SortedMap<Integer, BitSet> equivalence = new TreeMap<>();
+ for (RexNode pred : predicates) {
+ populateEquivalences(equivalence, pred);
+ }
+ ImmutableBitSet keyColumns = aggregateColumns;
+ for (Integer aggregateColumn : aggregateColumns) {
+ final BitSet bitSet = equivalence.get(aggregateColumn);
+ if (bitSet != null) {
+ keyColumns = keyColumns.union(bitSet);
+ }
+ }
+ return keyColumns;
+ }
+
+ private static void populateEquivalences(Map<Integer, BitSet> equivalence,
+ RexNode predicate) {
+ switch (predicate.getKind()) {
+ case EQUALS:
+ RexCall call = (RexCall) predicate;
+ final List<RexNode> operands = call.getOperands();
+ if (operands.get(0) instanceof RexInputRef) {
+ final RexInputRef ref0 = (RexInputRef) operands.get(0);
+ if (operands.get(1) instanceof RexInputRef) {
+ final RexInputRef ref1 = (RexInputRef) operands.get(1);
+ populateEquivalence(equivalence, ref0.getIndex(), ref1.getIndex());
+ populateEquivalence(equivalence, ref1.getIndex(), ref0.getIndex());
+ }
+ }
+ }
+ }
+
+ private static void populateEquivalence(Map<Integer, BitSet> equivalence,
+ int i0, int i1) {
+ BitSet bitSet = equivalence.get(i0);
+ if (bitSet == null) {
+ bitSet = new BitSet();
+ equivalence.put(i0, bitSet);
+ }
+ bitSet.set(i1);
+ }
+
+ /** Creates a {@link org.apache.calcite.sql.SqlSplittableAggFunction.Registry}
+ * that is a view of a list. */
+ private static <E> SqlSplittableAggFunction.Registry<E>
+ registry(final List<E> list) {
+ return new SqlSplittableAggFunction.Registry<E>() {
+ @Override
+ public int register(E e) {
+ int i = list.indexOf(e);
+ if (i < 0) {
+ i = list.size();
+ list.add(e);
+ }
+ return i;
+ }
+ };
+ }
+
+ /** Work space for an input to a join. */
+ private static class Side {
+ final Map<Integer, Integer> split = new HashMap<>();
+ RelNode newInput;
+ }
+}
+
+// End AggregateJoinTransposeRule.java
+
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index fd78824..d59c6bb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -45,6 +45,9 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
import org.apache.hadoop.hive.ql.parse.ASTNode;
@@ -310,6 +313,7 @@ public class SqlFunctionConverter {
registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in"));
registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between"));
registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct"));
+
}
private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) {
@@ -339,8 +343,7 @@ public class SqlFunctionConverter {
// UDAF is assumed to be deterministic
public static class CalciteUDAF extends SqlAggFunction {
public CalciteUDAF(String opName, SqlReturnTypeInference returnTypeInference,
- SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker,
- ImmutableList<RelDataType> argTypes, RelDataType retType) {
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) {
super(opName, SqlKind.OTHER_FUNCTION, returnTypeInference, operandTypeInference,
operandTypeChecker, SqlFunctionCategory.USER_DEFINED_FUNCTION);
}
@@ -367,8 +370,6 @@ public class SqlFunctionConverter {
private SqlReturnTypeInference returnTypeInference;
private SqlOperandTypeInference operandTypeInference;
private SqlOperandTypeChecker operandTypeChecker;
- private ImmutableList<RelDataType> argTypes;
- private RelDataType retType;
}
private static CalciteUDFInfo getUDFInfo(String hiveUdfName,
@@ -382,10 +383,6 @@ public class SqlFunctionConverter {
typeFamilyBuilder.add(Util.first(at.getSqlTypeName().getFamily(), SqlTypeFamily.ANY));
}
udfInfo.operandTypeChecker = OperandTypes.family(typeFamilyBuilder.build());
-
- udfInfo.argTypes = ImmutableList.<RelDataType> copyOf(calciteArgTypes);
- udfInfo.retType = calciteRetType;
-
return udfInfo;
}
@@ -413,13 +410,34 @@ public class SqlFunctionConverter {
public static SqlAggFunction getCalciteAggFn(String hiveUdfName,
ImmutableList<RelDataType> calciteArgTypes, RelDataType calciteRetType) {
SqlAggFunction calciteAggFn = (SqlAggFunction) hiveToCalcite.get(hiveUdfName);
+
if (calciteAggFn == null) {
CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType);
- calciteAggFn = new CalciteUDAF(uInf.udfName, uInf.returnTypeInference,
- uInf.operandTypeInference, uInf.operandTypeChecker, uInf.argTypes, uInf.retType);
- }
+ switch (hiveUdfName.toLowerCase()) {
+ case "sum":
+ calciteAggFn = new HiveSqlSumAggFunction(uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker);
+ break;
+ case "count":
+ calciteAggFn = new HiveSqlCountAggFunction(uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker);
+ break;
+ case "min":
+ calciteAggFn = new HiveSqlMinMaxAggFunction(uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker, true);
+ break;
+ case "max":
+ calciteAggFn = new HiveSqlMinMaxAggFunction(uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker, false);
+ break;
+ default:
+ calciteAggFn = new CalciteUDAF(uInf.udfName, uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker);
+ break;
+ }
+ }
return calciteAggFn;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 0a7ce3a..9c731b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -63,6 +63,7 @@ import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.metadata.CachingRelMetadataProvider;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.rules.AggregateJoinTransposeRule;
import org.apache.calcite.rel.rules.FilterAggregateTransposeRule;
import org.apache.calcite.rel.rules.FilterProjectTransposeRule;
import org.apache.calcite.rel.rules.JoinToMultiJoinRule;
@@ -134,6 +135,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule;
@@ -885,6 +887,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE);
hepPgmBldr.addRuleInstance(new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY));
hepPgmBldr.addRuleInstance(HiveAggregateProjectMergeRule.INSTANCE);
+ if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) {
+ hepPgmBldr.addRuleInstance(HiveAggregateJoinTransposeRule.INSTANCE);
+ }
hepPgm = hepPgmBldr.build();
HepPlanner hepPlanner = new HepPlanner(hepPgm);
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
index 5a5846e..c6ffbec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
@@ -356,7 +356,7 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver {
*/
public static class GenericUDAFSumLong extends GenericUDAFEvaluator {
private PrimitiveObjectInspector inputOI;
- private LongWritable result;
+ protected LongWritable result;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
new file mode 100644
index 0000000..ab7ab04
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+@Description(name = "$SUM0", value = "_FUNC_(x) - Returns the sum of a set of numbers, zero if empty")
+public class GenericUDAFSumEmptyIsZero extends GenericUDAFSum {
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
+ throws SemanticException {
+ if (parameters.length != 1) {
+ throw new UDFArgumentTypeException(parameters.length - 1,
+ "Exactly one argument is expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0,
+ "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case LONG:
+ return new SumZeroIfEmpty();
+ default:
+ throw new UDFArgumentTypeException(0,
+ "Only bigint type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ public static class SumZeroIfEmpty extends GenericUDAFSumLong {
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ SumLongAgg myagg = (SumLongAgg) agg;
+ result.set(myagg.sum);
+ return result;
+ }
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/test/queries/clientpositive/groupby_join_pushdown.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_join_pushdown.q b/ql/src/test/queries/clientpositive/groupby_join_pushdown.q
new file mode 100644
index 0000000..bf1ae4b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_join_pushdown.q
@@ -0,0 +1,55 @@
+set hive.transpose.aggr.join=true;
+EXPLAIN
+SELECT f.key, g.key, count(g.key)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key;
+
+EXPLAIN
+SELECT f.key, g.key
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key;
+
+EXPLAIN
+SELECT DISTINCT f.value, g.value
+FROM src f JOIN src g ON(f.value = g.value);
+
+EXPLAIN
+SELECT f.key, g.key, COUNT(*)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key;
+
+EXPLAIN
+SELECT f.ctinyint, g.ctinyint, SUM(f.cbigint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cint = g.cint)
+GROUP BY f.ctinyint, g.ctinyint ;
+
+EXPLAIN
+SELECT f.cbigint, g.cbigint, MAX(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cbigint = g.cbigint)
+GROUP BY f.cbigint, g.cbigint ;
+
+explain
+SELECT f.ctinyint, g.ctinyint, MIN(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint;
+
+explain
+SELECT MIN(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint;
+
+explain
+SELECT count(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint;
+
+explain
+SELECT count(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint;
+
+explain
+SELECT sum(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint;
+
[08/25] hive git commit: HIVE-11468: Vectorize Struct IN() clauses
(Matt McCline, via Gopal V)
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig
new file mode 100644
index 0000000..0d4c1d8
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig
@@ -0,0 +1,1744 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.Stack;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.ql.exec.*;
+import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
+import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
+import org.apache.hadoop.hive.ql.exec.tez.TezTask;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker;
+import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.lib.TaskGraphWalker;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
+import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
+import org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc;
+import org.apache.hadoop.hive.ql.plan.SparkWork;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.UDFAcos;
+import org.apache.hadoop.hive.ql.udf.UDFAsin;
+import org.apache.hadoop.hive.ql.udf.UDFAtan;
+import org.apache.hadoop.hive.ql.udf.UDFBin;
+import org.apache.hadoop.hive.ql.udf.UDFConv;
+import org.apache.hadoop.hive.ql.udf.UDFCos;
+import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
+import org.apache.hadoop.hive.ql.udf.UDFDegrees;
+import org.apache.hadoop.hive.ql.udf.UDFExp;
+import org.apache.hadoop.hive.ql.udf.UDFHex;
+import org.apache.hadoop.hive.ql.udf.UDFHour;
+import org.apache.hadoop.hive.ql.udf.UDFLength;
+import org.apache.hadoop.hive.ql.udf.UDFLike;
+import org.apache.hadoop.hive.ql.udf.UDFLn;
+import org.apache.hadoop.hive.ql.udf.UDFLog;
+import org.apache.hadoop.hive.ql.udf.UDFLog10;
+import org.apache.hadoop.hive.ql.udf.UDFLog2;
+import org.apache.hadoop.hive.ql.udf.UDFMinute;
+import org.apache.hadoop.hive.ql.udf.UDFMonth;
+import org.apache.hadoop.hive.ql.udf.UDFRadians;
+import org.apache.hadoop.hive.ql.udf.UDFRand;
+import org.apache.hadoop.hive.ql.udf.UDFSecond;
+import org.apache.hadoop.hive.ql.udf.UDFSign;
+import org.apache.hadoop.hive.ql.udf.UDFSin;
+import org.apache.hadoop.hive.ql.udf.UDFSqrt;
+import org.apache.hadoop.hive.ql.udf.UDFSubstr;
+import org.apache.hadoop.hive.ql.udf.UDFTan;
+import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
+import org.apache.hadoop.hive.ql.udf.UDFToByte;
+import org.apache.hadoop.hive.ql.udf.UDFToDouble;
+import org.apache.hadoop.hive.ql.udf.UDFToFloat;
+import org.apache.hadoop.hive.ql.udf.UDFToInteger;
+import org.apache.hadoop.hive.ql.udf.UDFToLong;
+import org.apache.hadoop.hive.ql.udf.UDFToShort;
+import org.apache.hadoop.hive.ql.udf.UDFToString;
+import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
+import org.apache.hadoop.hive.ql.udf.UDFYear;
+import org.apache.hadoop.hive.ql.udf.generic.*;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+public class Vectorizer implements PhysicalPlanResolver {
+
+ protected static transient final Log LOG = LogFactory.getLog(Vectorizer.class);
+
+ Pattern supportedDataTypesPattern;
+ List<Task<? extends Serializable>> vectorizableTasks =
+ new ArrayList<Task<? extends Serializable>>();
+ Set<Class<?>> supportedGenericUDFs = new HashSet<Class<?>>();
+
+ Set<String> supportedAggregationUdfs = new HashSet<String>();
+
+ private HiveConf hiveConf;
+
+ public Vectorizer() {
+
+ StringBuilder patternBuilder = new StringBuilder();
+ patternBuilder.append("int");
+ patternBuilder.append("|smallint");
+ patternBuilder.append("|tinyint");
+ patternBuilder.append("|bigint");
+ patternBuilder.append("|integer");
+ patternBuilder.append("|long");
+ patternBuilder.append("|short");
+ patternBuilder.append("|timestamp");
+ patternBuilder.append("|" + serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME);
+ patternBuilder.append("|" + serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME);
+ patternBuilder.append("|boolean");
+ patternBuilder.append("|binary");
+ patternBuilder.append("|string");
+ patternBuilder.append("|byte");
+ patternBuilder.append("|float");
+ patternBuilder.append("|double");
+ patternBuilder.append("|date");
+ patternBuilder.append("|void");
+
+ // Decimal types can be specified with different precision and scales e.g. decimal(10,5),
+ // as opposed to other data types which can be represented by constant strings.
+ // The regex matches only the "decimal" prefix of the type.
+ patternBuilder.append("|decimal.*");
+
+ // CHAR and VARCHAR types can be specified with maximum length.
+ patternBuilder.append("|char.*");
+ patternBuilder.append("|varchar.*");
+
+ supportedDataTypesPattern = Pattern.compile(patternBuilder.toString());
+
+ supportedGenericUDFs.add(GenericUDFOPPlus.class);
+ supportedGenericUDFs.add(GenericUDFOPMinus.class);
+ supportedGenericUDFs.add(GenericUDFOPMultiply.class);
+ supportedGenericUDFs.add(GenericUDFOPDivide.class);
+ supportedGenericUDFs.add(GenericUDFOPMod.class);
+ supportedGenericUDFs.add(GenericUDFOPNegative.class);
+ supportedGenericUDFs.add(GenericUDFOPPositive.class);
+
+ supportedGenericUDFs.add(GenericUDFOPEqualOrLessThan.class);
+ supportedGenericUDFs.add(GenericUDFOPEqualOrGreaterThan.class);
+ supportedGenericUDFs.add(GenericUDFOPGreaterThan.class);
+ supportedGenericUDFs.add(GenericUDFOPLessThan.class);
+ supportedGenericUDFs.add(GenericUDFOPNot.class);
+ supportedGenericUDFs.add(GenericUDFOPNotEqual.class);
+ supportedGenericUDFs.add(GenericUDFOPNotNull.class);
+ supportedGenericUDFs.add(GenericUDFOPNull.class);
+ supportedGenericUDFs.add(GenericUDFOPOr.class);
+ supportedGenericUDFs.add(GenericUDFOPAnd.class);
+ supportedGenericUDFs.add(GenericUDFOPEqual.class);
+ supportedGenericUDFs.add(UDFLength.class);
+
+ supportedGenericUDFs.add(UDFYear.class);
+ supportedGenericUDFs.add(UDFMonth.class);
+ supportedGenericUDFs.add(UDFDayOfMonth.class);
+ supportedGenericUDFs.add(UDFHour.class);
+ supportedGenericUDFs.add(UDFMinute.class);
+ supportedGenericUDFs.add(UDFSecond.class);
+ supportedGenericUDFs.add(UDFWeekOfYear.class);
+ supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class);
+
+ supportedGenericUDFs.add(GenericUDFDateAdd.class);
+ supportedGenericUDFs.add(GenericUDFDateSub.class);
+ supportedGenericUDFs.add(GenericUDFDate.class);
+ supportedGenericUDFs.add(GenericUDFDateDiff.class);
+
+ supportedGenericUDFs.add(UDFLike.class);
+ supportedGenericUDFs.add(GenericUDFRegExp.class);
+ supportedGenericUDFs.add(UDFSubstr.class);
+ supportedGenericUDFs.add(GenericUDFLTrim.class);
+ supportedGenericUDFs.add(GenericUDFRTrim.class);
+ supportedGenericUDFs.add(GenericUDFTrim.class);
+
+ supportedGenericUDFs.add(UDFSin.class);
+ supportedGenericUDFs.add(UDFCos.class);
+ supportedGenericUDFs.add(UDFTan.class);
+ supportedGenericUDFs.add(UDFAsin.class);
+ supportedGenericUDFs.add(UDFAcos.class);
+ supportedGenericUDFs.add(UDFAtan.class);
+ supportedGenericUDFs.add(UDFDegrees.class);
+ supportedGenericUDFs.add(UDFRadians.class);
+ supportedGenericUDFs.add(GenericUDFFloor.class);
+ supportedGenericUDFs.add(GenericUDFCeil.class);
+ supportedGenericUDFs.add(UDFExp.class);
+ supportedGenericUDFs.add(UDFLn.class);
+ supportedGenericUDFs.add(UDFLog2.class);
+ supportedGenericUDFs.add(UDFLog10.class);
+ supportedGenericUDFs.add(UDFLog.class);
+ supportedGenericUDFs.add(GenericUDFPower.class);
+ supportedGenericUDFs.add(GenericUDFRound.class);
+ supportedGenericUDFs.add(GenericUDFBRound.class);
+ supportedGenericUDFs.add(GenericUDFPosMod.class);
+ supportedGenericUDFs.add(UDFSqrt.class);
+ supportedGenericUDFs.add(UDFSign.class);
+ supportedGenericUDFs.add(UDFRand.class);
+ supportedGenericUDFs.add(UDFBin.class);
+ supportedGenericUDFs.add(UDFHex.class);
+ supportedGenericUDFs.add(UDFConv.class);
+
+ supportedGenericUDFs.add(GenericUDFLower.class);
+ supportedGenericUDFs.add(GenericUDFUpper.class);
+ supportedGenericUDFs.add(GenericUDFConcat.class);
+ supportedGenericUDFs.add(GenericUDFAbs.class);
+ supportedGenericUDFs.add(GenericUDFBetween.class);
+ supportedGenericUDFs.add(GenericUDFIn.class);
+ supportedGenericUDFs.add(GenericUDFCase.class);
+ supportedGenericUDFs.add(GenericUDFWhen.class);
+ supportedGenericUDFs.add(GenericUDFCoalesce.class);
+ supportedGenericUDFs.add(GenericUDFElt.class);
+ supportedGenericUDFs.add(GenericUDFInitCap.class);
+
+ // For type casts
+ supportedGenericUDFs.add(UDFToLong.class);
+ supportedGenericUDFs.add(UDFToInteger.class);
+ supportedGenericUDFs.add(UDFToShort.class);
+ supportedGenericUDFs.add(UDFToByte.class);
+ supportedGenericUDFs.add(UDFToBoolean.class);
+ supportedGenericUDFs.add(UDFToFloat.class);
+ supportedGenericUDFs.add(UDFToDouble.class);
+ supportedGenericUDFs.add(UDFToString.class);
+ supportedGenericUDFs.add(GenericUDFTimestamp.class);
+ supportedGenericUDFs.add(GenericUDFToDecimal.class);
+ supportedGenericUDFs.add(GenericUDFToDate.class);
+ supportedGenericUDFs.add(GenericUDFToChar.class);
+ supportedGenericUDFs.add(GenericUDFToVarchar.class);
+ supportedGenericUDFs.add(GenericUDFToIntervalYearMonth.class);
+ supportedGenericUDFs.add(GenericUDFToIntervalDayTime.class);
+
+ // For conditional expressions
+ supportedGenericUDFs.add(GenericUDFIf.class);
+
+ supportedAggregationUdfs.add("min");
+ supportedAggregationUdfs.add("max");
+ supportedAggregationUdfs.add("count");
+ supportedAggregationUdfs.add("sum");
+ supportedAggregationUdfs.add("avg");
+ supportedAggregationUdfs.add("variance");
+ supportedAggregationUdfs.add("var_pop");
+ supportedAggregationUdfs.add("var_samp");
+ supportedAggregationUdfs.add("std");
+ supportedAggregationUdfs.add("stddev");
+ supportedAggregationUdfs.add("stddev_pop");
+ supportedAggregationUdfs.add("stddev_samp");
+ }
+
+ class VectorizationDispatcher implements Dispatcher {
+
+ private List<String> reduceColumnNames;
+ private List<TypeInfo> reduceTypeInfos;
+
+ public VectorizationDispatcher(PhysicalContext physicalContext) {
+ reduceColumnNames = null;
+ reduceTypeInfos = null;
+ }
+
+ @Override
+ public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs)
+ throws SemanticException {
+ Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
+ if (currTask instanceof MapRedTask) {
+ convertMapWork(((MapRedTask) currTask).getWork().getMapWork(), false);
+ } else if (currTask instanceof TezTask) {
+ TezWork work = ((TezTask) currTask).getWork();
+ for (BaseWork w: work.getAllWork()) {
+ if (w instanceof MapWork) {
+ convertMapWork((MapWork) w, true);
+ } else if (w instanceof ReduceWork) {
+ // We are only vectorizing Reduce under Tez.
+ if (HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) {
+ convertReduceWork((ReduceWork) w, true);
+ }
+ }
+ }
+ } else if (currTask instanceof SparkTask) {
+ SparkWork sparkWork = (SparkWork) currTask.getWork();
+ for (BaseWork baseWork : sparkWork.getAllWork()) {
+ if (baseWork instanceof MapWork) {
+ convertMapWork((MapWork) baseWork, false);
+ } else if (baseWork instanceof ReduceWork
+ && HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) {
+ convertReduceWork((ReduceWork) baseWork, false);
+ }
+ }
+ }
+ return null;
+ }
+
+ private void convertMapWork(MapWork mapWork, boolean isTez) throws SemanticException {
+ boolean ret = validateMapWork(mapWork, isTez);
+ if (ret) {
+ vectorizeMapWork(mapWork, isTez);
+ }
+ }
+
+ private void addMapWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
+ opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*"
+ + FileSinkOperator.getOperatorName()), np);
+ opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
+ + ReduceSinkOperator.getOperatorName()), np);
+ }
+
+ private boolean validateMapWork(MapWork mapWork, boolean isTez) throws SemanticException {
+ LOG.info("Validating MapWork...");
+
+ // Eliminate MR plans with more than one TableScanOperator.
+ LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = mapWork.getAliasToWork();
+ if ((aliasToWork == null) || (aliasToWork.size() == 0)) {
+ return false;
+ }
+ int tableScanCount = 0;
+ for (Operator<?> op : aliasToWork.values()) {
+ if (op == null) {
+ LOG.warn("Map work has invalid aliases to work with. Fail validation!");
+ return false;
+ }
+ if (op instanceof TableScanOperator) {
+ tableScanCount++;
+ }
+ }
+ if (tableScanCount > 1) {
+ LOG.warn("Map work has more than 1 TableScanOperator aliases to work with. Fail validation!");
+ return false;
+ }
+
+ // Validate the input format
+ for (String path : mapWork.getPathToPartitionInfo().keySet()) {
+ PartitionDesc pd = mapWork.getPathToPartitionInfo().get(path);
+ List<Class<?>> interfaceList =
+ Arrays.asList(pd.getInputFileFormatClass().getInterfaces());
+ if (!interfaceList.contains(VectorizedInputFormatInterface.class)) {
+ LOG.info("Input format: " + pd.getInputFileFormatClassName()
+ + ", doesn't provide vectorized input");
+ return false;
+ }
+ }
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTez);
+ addMapWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+
+ // iterator the mapper operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(mapWork.getAliasToWork().values());
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+ for (Node n : nodeOutput.keySet()) {
+ if (nodeOutput.get(n) != null) {
+ if (!((Boolean)nodeOutput.get(n)).booleanValue()) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private void vectorizeMapWork(MapWork mapWork, boolean isTez) throws SemanticException {
+ LOG.info("Vectorizing MapWork...");
+ mapWork.setVectorMode(true);
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ MapWorkVectorizationNodeProcessor vnp = new MapWorkVectorizationNodeProcessor(mapWork, isTez);
+ addMapWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new PreOrderOnceWalker(disp);
+ // iterator the mapper operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(mapWork.getAliasToWork().values());
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+
+ mapWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap());
+ mapWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap());
+ mapWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap());
+
+ if (LOG.isDebugEnabled()) {
+ debugDisplayAllMaps(mapWork);
+ }
+
+ return;
+ }
+
+ private void convertReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException {
+ boolean ret = validateReduceWork(reduceWork);
+ if (ret) {
+ vectorizeReduceWork(reduceWork, isTez);
+ }
+ }
+
+ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws SemanticException {
+ try {
+ // Check key ObjectInspector.
+ ObjectInspector keyObjectInspector = reduceWork.getKeyObjectInspector();
+ if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector;
+ List<? extends StructField> keyFields = keyStructObjectInspector.getAllStructFieldRefs();
+
+ // Tez doesn't use tagging...
+ if (reduceWork.getNeedsTagging()) {
+ return false;
+ }
+
+ // Check value ObjectInspector.
+ ObjectInspector valueObjectInspector = reduceWork.getValueObjectInspector();
+ if (valueObjectInspector == null ||
+ !(valueObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector;
+ List<? extends StructField> valueFields = valueStructObjectInspector.getAllStructFieldRefs();
+
+ reduceColumnNames = new ArrayList<String>();
+ reduceTypeInfos = new ArrayList<TypeInfo>();
+
+ for (StructField field: keyFields) {
+ reduceColumnNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName());
+ reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName()));
+ }
+ for (StructField field: valueFields) {
+ reduceColumnNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName());
+ reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName()));
+ }
+ } catch (Exception e) {
+ throw new SemanticException(e);
+ }
+ return true;
+ }
+
+ private void addReduceWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
+ opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + ".*"), np);
+ opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + ".*"), np);
+ }
+
+ private boolean validateReduceWork(ReduceWork reduceWork) throws SemanticException {
+ LOG.info("Validating ReduceWork...");
+
+ // Validate input to ReduceWork.
+ if (!getOnlyStructObjectInspectors(reduceWork)) {
+ return false;
+ }
+ // Now check the reduce operator tree.
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ ReduceWorkValidationNodeProcessor vnp = new ReduceWorkValidationNodeProcessor();
+ addReduceWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+ // iterator the reduce operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.add(reduceWork.getReducer());
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+ for (Node n : nodeOutput.keySet()) {
+ if (nodeOutput.get(n) != null) {
+ if (!((Boolean)nodeOutput.get(n)).booleanValue()) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private void vectorizeReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException {
+ LOG.info("Vectorizing ReduceWork...");
+ reduceWork.setVectorMode(true);
+
+ // For some reason, the DefaultGraphWalker does not descend down from the reducer Operator as
+ // expected. We need to descend down, otherwise it breaks our algorithm that determines
+ // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker.
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ ReduceWorkVectorizationNodeProcessor vnp =
+ new ReduceWorkVectorizationNodeProcessor(reduceColumnNames, reduceTypeInfos, isTez);
+ addReduceWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new PreOrderWalker(disp);
+ // iterator the reduce operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.add(reduceWork.getReducer());
+ LOG.info("vectorizeReduceWork reducer Operator: " +
+ reduceWork.getReducer().getName() + "...");
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+
+ // Necessary since we are vectorizing the root operator in reduce.
+ reduceWork.setReducer(vnp.getRootVectorOp());
+
+ reduceWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap());
+ reduceWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap());
+ reduceWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap());
+
+ if (LOG.isDebugEnabled()) {
+ debugDisplayAllMaps(reduceWork);
+ }
+ }
+ }
+
+ class MapWorkValidationNodeProcessor implements NodeProcessor {
+
+ private final MapWork mapWork;
+ private final boolean isTez;
+
+ public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) {
+ this.mapWork = mapWork;
+ this.isTez = isTez;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ for (Node n : stack) {
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n;
+ if (nonVectorizableChildOfGroupBy(op)) {
+ return new Boolean(true);
+ }
+ boolean ret = validateMapWorkOperator(op, mapWork, isTez);
+ if (!ret) {
+ LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized.");
+ return new Boolean(false);
+ }
+ }
+ return new Boolean(true);
+ }
+ }
+
+ class ReduceWorkValidationNodeProcessor implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ for (Node n : stack) {
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n;
+ if (nonVectorizableChildOfGroupBy(op)) {
+ return new Boolean(true);
+ }
+ boolean ret = validateReduceWorkOperator(op);
+ if (!ret) {
+ LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized.");
+ return new Boolean(false);
+ }
+ }
+ return new Boolean(true);
+ }
+ }
+
+ // This class has common code used by both MapWorkVectorizationNodeProcessor and
+ // ReduceWorkVectorizationNodeProcessor.
+ class VectorizationNodeProcessor implements NodeProcessor {
+
+ // The vectorization context for the Map or Reduce task.
+ protected VectorizationContext taskVectorizationContext;
+
+ // The input projection column type name map for the Map or Reduce task.
+ protected Map<Integer, String> taskColumnTypeNameMap;
+
+ VectorizationNodeProcessor() {
+ taskColumnTypeNameMap = new HashMap<Integer, String>();
+ }
+
+ public Map<String, Integer> getVectorColumnNameMap() {
+ return taskVectorizationContext.getProjectionColumnMap();
+ }
+
+ public Map<Integer, String> getVectorColumnTypeMap() {
+ return taskColumnTypeNameMap;
+ }
+
+ public Map<Integer, String> getVectorScratchColumnTypeMap() {
+ return taskVectorizationContext.getScratchColumnTypeMap();
+ }
+
+ protected final Set<Operator<? extends OperatorDesc>> opsDone =
+ new HashSet<Operator<? extends OperatorDesc>>();
+
+ protected final Map<Operator<? extends OperatorDesc>, Operator<? extends OperatorDesc>> opToVectorOpMap =
+ new HashMap<Operator<? extends OperatorDesc>, Operator<? extends OperatorDesc>>();
+
+ public VectorizationContext walkStackToFindVectorizationContext(Stack<Node> stack,
+ Operator<? extends OperatorDesc> op) throws SemanticException {
+ VectorizationContext vContext = null;
+ if (stack.size() <= 1) {
+ throw new SemanticException(
+ String.format("Expected operator stack for operator %s to have at least 2 operators",
+ op.getName()));
+ }
+ // Walk down the stack of operators until we found one willing to give us a context.
+ // At the bottom will be the root operator, guaranteed to have a context
+ int i= stack.size()-2;
+ while (vContext == null) {
+ if (i < 0) {
+ return null;
+ }
+ Operator<? extends OperatorDesc> opParent = (Operator<? extends OperatorDesc>) stack.get(i);
+ Operator<? extends OperatorDesc> vectorOpParent = opToVectorOpMap.get(opParent);
+ if (vectorOpParent != null) {
+ if (vectorOpParent instanceof VectorizationContextRegion) {
+ VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOpParent;
+ vContext = vcRegion.getOuputVectorizationContext();
+ LOG.info("walkStackToFindVectorizationContext " + vectorOpParent.getName() + " has new vectorization context " + vContext.toString());
+ } else {
+ LOG.info("walkStackToFindVectorizationContext " + vectorOpParent.getName() + " does not have new vectorization context");
+ }
+ } else {
+ LOG.info("walkStackToFindVectorizationContext " + opParent.getName() + " is not vectorized");
+ }
+ --i;
+ }
+ return vContext;
+ }
+
+ public Operator<? extends OperatorDesc> doVectorize(Operator<? extends OperatorDesc> op,
+ VectorizationContext vContext, boolean isTez) throws SemanticException {
+ Operator<? extends OperatorDesc> vectorOp = op;
+ try {
+ if (!opsDone.contains(op)) {
+ vectorOp = vectorizeOperator(op, vContext, isTez);
+ opsDone.add(op);
+ if (vectorOp != op) {
+ opToVectorOpMap.put(op, vectorOp);
+ opsDone.add(vectorOp);
+ }
+ }
+ } catch (HiveException e) {
+ throw new SemanticException(e);
+ }
+ return vectorOp;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ throw new SemanticException("Must be overridden");
+ }
+ }
+
+ class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
+
+ private final boolean isTez;
+
+ public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez) {
+ super();
+ this.isTez = isTez;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+
+ VectorizationContext vContext = null;
+
+ if (op instanceof TableScanOperator) {
+ if (taskVectorizationContext == null) {
+ taskVectorizationContext = getVectorizationContext(op.getSchema(), op.getName(),
+ taskColumnTypeNameMap);
+ }
+ vContext = taskVectorizationContext;
+ } else {
+ LOG.info("MapWorkVectorizationNodeProcessor process going to walk the operator stack to get vectorization context for " + op.getName());
+ vContext = walkStackToFindVectorizationContext(stack, op);
+ if (vContext == null) {
+ // No operator has "pushed" a new context -- so use the task vectorization context.
+ vContext = taskVectorizationContext;
+ }
+ }
+
+ assert vContext != null;
+ LOG.info("MapWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString());
+
+ // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't
+ // vectorize the operators below it.
+ if (nonVectorizableChildOfGroupBy(op)) {
+ // No need to vectorize
+ if (!opsDone.contains(op)) {
+ opsDone.add(op);
+ }
+ return null;
+ }
+
+ Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext, isTez);
+
+ if (LOG.isDebugEnabled()) {
+ if (vectorOp instanceof VectorizationContextRegion) {
+ VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
+ VectorizationContext vNewContext = vcRegion.getOuputVectorizationContext();
+ LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString());
+ }
+ }
+
+ return null;
+ }
+ }
+
+ class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
+
+ private final List<String> reduceColumnNames;
+ private final List<TypeInfo> reduceTypeInfos;
+
+ private final boolean isTez;
+
+ private Operator<? extends OperatorDesc> rootVectorOp;
+
+ public Operator<? extends OperatorDesc> getRootVectorOp() {
+ return rootVectorOp;
+ }
+
+ public ReduceWorkVectorizationNodeProcessor(List<String> reduceColumnNames,
+ List<TypeInfo> reduceTypeInfos, boolean isTez) {
+ super();
+ this.reduceColumnNames = reduceColumnNames;
+ this.reduceTypeInfos = reduceTypeInfos;
+ rootVectorOp = null;
+ this.isTez = isTez;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+
+ VectorizationContext vContext = null;
+
+ boolean saveRootVectorOp = false;
+
+ if (op.getParentOperators().size() == 0) {
+ LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + reduceColumnNames.toString());
+
+ vContext = new VectorizationContext("__Reduce_Shuffle__", reduceColumnNames);
+ taskVectorizationContext = vContext;
+ int i = 0;
+ for (TypeInfo typeInfo : reduceTypeInfos) {
+ taskColumnTypeNameMap.put(i, typeInfo.getTypeName());
+ i++;
+ }
+ saveRootVectorOp = true;
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context " + vContext.toString());
+ }
+ } else {
+ LOG.info("ReduceWorkVectorizationNodeProcessor process going to walk the operator stack to get vectorization context for " + op.getName());
+ vContext = walkStackToFindVectorizationContext(stack, op);
+ if (vContext == null) {
+ // If we didn't find a context among the operators, assume the top -- reduce shuffle's
+ // vectorization context.
+ vContext = taskVectorizationContext;
+ }
+ }
+
+ assert vContext != null;
+ LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString());
+
+ // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't
+ // vectorize the operators below it.
+ if (nonVectorizableChildOfGroupBy(op)) {
+ // No need to vectorize
+ if (!opsDone.contains(op)) {
+ opsDone.add(op);
+ }
+ return null;
+ }
+
+ Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext, isTez);
+
+ if (LOG.isDebugEnabled()) {
+ if (vectorOp instanceof VectorizationContextRegion) {
+ VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
+ VectorizationContext vNewContext = vcRegion.getOuputVectorizationContext();
+ LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString());
+ }
+ }
+ if (saveRootVectorOp && op != vectorOp) {
+ rootVectorOp = vectorOp;
+ }
+
+ return null;
+ }
+ }
+
+ private static class ValidatorVectorizationContext extends VectorizationContext {
+ private ValidatorVectorizationContext() {
+ super("No Name");
+ }
+
+ @Override
+ protected int getInputColumnIndex(String name) {
+ return 0;
+ }
+
+ @Override
+ protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) {
+ return 0;
+ }
+ }
+
+ @Override
+ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticException {
+ hiveConf = physicalContext.getConf();
+
+ boolean vectorPath = HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
+ if (!vectorPath) {
+ LOG.info("Vectorization is disabled");
+ return physicalContext;
+ }
+ // create dispatcher and graph walker
+ Dispatcher disp = new VectorizationDispatcher(physicalContext);
+ TaskGraphWalker ogw = new TaskGraphWalker(disp);
+
+ // get all the tasks nodes from root task
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(physicalContext.getRootTasks());
+
+ // begin to walk through the task tree.
+ ogw.startWalking(topNodes, null);
+ return physicalContext;
+ }
+
+ boolean validateMapWorkOperator(Operator<? extends OperatorDesc> op, MapWork mWork, boolean isTez) {
+ boolean ret = false;
+ switch (op.getType()) {
+ case MAPJOIN:
+ if (op instanceof MapJoinOperator) {
+ ret = validateMapJoinOperator((MapJoinOperator) op);
+ } else if (op instanceof SMBMapJoinOperator) {
+ ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op);
+ }
+ break;
+ case GROUPBY:
+ ret = validateGroupByOperator((GroupByOperator) op, false, isTez);
+ break;
+ case FILTER:
+ ret = validateFilterOperator((FilterOperator) op);
+ break;
+ case SELECT:
+ ret = validateSelectOperator((SelectOperator) op);
+ break;
+ case REDUCESINK:
+ ret = validateReduceSinkOperator((ReduceSinkOperator) op);
+ break;
+ case TABLESCAN:
+ ret = validateTableScanOperator((TableScanOperator) op, mWork);
+ break;
+ case FILESINK:
+ case LIMIT:
+ case EVENT:
+ case SPARKPRUNINGSINK:
+ ret = true;
+ break;
+ case HASHTABLESINK:
+ ret = op instanceof SparkHashTableSinkOperator &&
+ validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op);
+ break;
+ default:
+ ret = false;
+ break;
+ }
+ return ret;
+ }
+
+ boolean validateReduceWorkOperator(Operator<? extends OperatorDesc> op) {
+ boolean ret = false;
+ switch (op.getType()) {
+ case MAPJOIN:
+ // Does MAPJOIN actually get planned in Reduce?
+ if (op instanceof MapJoinOperator) {
+ ret = validateMapJoinOperator((MapJoinOperator) op);
+ } else if (op instanceof SMBMapJoinOperator) {
+ ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op);
+ }
+ break;
+ case GROUPBY:
+ if (HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) {
+ ret = validateGroupByOperator((GroupByOperator) op, true, true);
+ } else {
+ ret = false;
+ }
+ break;
+ case FILTER:
+ ret = validateFilterOperator((FilterOperator) op);
+ break;
+ case SELECT:
+ ret = validateSelectOperator((SelectOperator) op);
+ break;
+ case REDUCESINK:
+ ret = validateReduceSinkOperator((ReduceSinkOperator) op);
+ break;
+ case FILESINK:
+ ret = validateFileSinkOperator((FileSinkOperator) op);
+ break;
+ case LIMIT:
+ case EVENT:
+ case SPARKPRUNINGSINK:
+ ret = true;
+ break;
+ case HASHTABLESINK:
+ ret = op instanceof SparkHashTableSinkOperator &&
+ validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op);
+ break;
+ default:
+ ret = false;
+ break;
+ }
+ return ret;
+ }
+
+ public Boolean nonVectorizableChildOfGroupBy(Operator<? extends OperatorDesc> op) {
+ Operator<? extends OperatorDesc> currentOp = op;
+ while (currentOp.getParentOperators().size() > 0) {
+ currentOp = currentOp.getParentOperators().get(0);
+ if (currentOp.getType().equals(OperatorType.GROUPBY)) {
+ GroupByDesc desc = (GroupByDesc)currentOp.getConf();
+ boolean isVectorOutput = desc.getVectorDesc().isVectorOutput();
+ if (isVectorOutput) {
+ // This GROUP BY does vectorize its output.
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) {
+ SMBJoinDesc desc = op.getConf();
+ // Validation is the same as for map join, since the 'small' tables are not vectorized
+ return validateMapJoinDesc(desc);
+ }
+
+ private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) {
+ TableScanDesc desc = op.getConf();
+ if (desc.isGatherStats()) {
+ return false;
+ }
+
+ String columns = "";
+ String types = "";
+ String partitionColumns = "";
+ String partitionTypes = "";
+ boolean haveInfo = false;
+
+ // This over-reaches slightly, since we can have > 1 table-scan per map-work.
+ // It needs path to partition, path to alias, then check the alias == the same table-scan, to be accurate.
+ // That said, that is a TODO item to be fixed when we support >1 TableScans per vectorized pipeline later.
+ LinkedHashMap<String, PartitionDesc> partitionDescs = mWork.getPathToPartitionInfo();
+
+ // For vectorization, compare each partition information for against the others.
+ // We assume the table information will be from one of the partitions, so it will
+ // work to focus on the partition information and not compare against the TableScanOperator
+ // columns (in the VectorizationContext)....
+ for (Map.Entry<String, PartitionDesc> entry : partitionDescs.entrySet()) {
+ PartitionDesc partDesc = entry.getValue();
+ if (partDesc.getPartSpec() == null || partDesc.getPartSpec().isEmpty()) {
+ // No partition information -- we match because we would default to using the table description.
+ continue;
+ }
+ Properties partProps = partDesc.getProperties();
+ if (!haveInfo) {
+ columns = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS);
+ types = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES);
+ partitionColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
+ partitionTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
+ haveInfo = true;
+ } else {
+ String nextColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS);
+ String nextTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES);
+ String nextPartitionColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
+ String nextPartitionTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
+ if (!columns.equalsIgnoreCase(nextColumns)) {
+ LOG.info(
+ String.format("Could not vectorize partition %s. Its column names %s do not match the other column names %s",
+ entry.getKey(), nextColumns, columns));
+ return false;
+ }
+ if (!types.equalsIgnoreCase(nextTypes)) {
+ LOG.info(
+ String.format("Could not vectorize partition %s. Its column types %s do not match the other column types %s",
+ entry.getKey(), nextTypes, types));
+ return false;
+ }
+ if (!partitionColumns.equalsIgnoreCase(nextPartitionColumns)) {
+ LOG.info(
+ String.format("Could not vectorize partition %s. Its partition column names %s do not match the other partition column names %s",
+ entry.getKey(), nextPartitionColumns, partitionColumns));
+ return false;
+ }
+ if (!partitionTypes.equalsIgnoreCase(nextPartitionTypes)) {
+ LOG.info(
+ String.format("Could not vectorize partition %s. Its partition column types %s do not match the other partition column types %s",
+ entry.getKey(), nextPartitionTypes, partitionTypes));
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private boolean validateMapJoinOperator(MapJoinOperator op) {
+ MapJoinDesc desc = op.getConf();
+ return validateMapJoinDesc(desc);
+ }
+
+ private boolean validateMapJoinDesc(MapJoinDesc desc) {
+ byte posBigTable = (byte) desc.getPosBigTable();
+ List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
+ if (!validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER)) {
+ LOG.info("Cannot vectorize map work filter expression");
+ return false;
+ }
+ List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable);
+ if (!validateExprNodeDesc(keyExprs)) {
+ LOG.info("Cannot vectorize map work key expression");
+ return false;
+ }
+ List<ExprNodeDesc> valueExprs = desc.getExprs().get(posBigTable);
+ if (!validateExprNodeDesc(valueExprs)) {
+ LOG.info("Cannot vectorize map work value expression");
+ return false;
+ }
+ return true;
+ }
+
+ private boolean validateSparkHashTableSinkOperator(SparkHashTableSinkOperator op) {
+ SparkHashTableSinkDesc desc = op.getConf();
+ byte tag = desc.getTag();
+ // it's essentially a MapJoinDesc
+ List<ExprNodeDesc> filterExprs = desc.getFilters().get(tag);
+ List<ExprNodeDesc> keyExprs = desc.getKeys().get(tag);
+ List<ExprNodeDesc> valueExprs = desc.getExprs().get(tag);
+ return validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER) &&
+ validateExprNodeDesc(keyExprs) && validateExprNodeDesc(valueExprs);
+ }
+
+ private boolean validateReduceSinkOperator(ReduceSinkOperator op) {
+ List<ExprNodeDesc> keyDescs = op.getConf().getKeyCols();
+ List<ExprNodeDesc> partitionDescs = op.getConf().getPartitionCols();
+ List<ExprNodeDesc> valueDesc = op.getConf().getValueCols();
+ return validateExprNodeDesc(keyDescs) && validateExprNodeDesc(partitionDescs) &&
+ validateExprNodeDesc(valueDesc);
+ }
+
+ private boolean validateSelectOperator(SelectOperator op) {
+ List<ExprNodeDesc> descList = op.getConf().getColList();
+ for (ExprNodeDesc desc : descList) {
+ boolean ret = validateExprNodeDesc(desc);
+ if (!ret) {
+ LOG.info("Cannot vectorize select expression: " + desc.toString());
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private boolean validateFilterOperator(FilterOperator op) {
+ ExprNodeDesc desc = op.getConf().getPredicate();
+ return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.FILTER);
+ }
+
+ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTez) {
+ GroupByDesc desc = op.getConf();
+ VectorGroupByDesc vectorDesc = desc.getVectorDesc();
+
+ if (desc.isGroupingSetsPresent()) {
+ LOG.info("Grouping sets not supported in vector mode");
+ return false;
+ }
+ if (desc.pruneGroupingSetId()) {
+ LOG.info("Pruning grouping set id not supported in vector mode");
+ return false;
+ }
+ boolean ret = validateExprNodeDesc(desc.getKeys());
+ if (!ret) {
+ LOG.info("Cannot vectorize groupby key expression");
+ return false;
+ }
+
+ if (!isReduce) {
+
+ // MapWork
+
+ ret = validateHashAggregationDesc(desc.getAggregators());
+ if (!ret) {
+ return false;
+ }
+ } else {
+
+ // ReduceWork
+
+ boolean isComplete = desc.getMode() == GroupByDesc.Mode.COMPLETE;
+ if (desc.getMode() != GroupByDesc.Mode.HASH) {
+
+ // Reduce Merge-Partial GROUP BY.
+
+ // A merge-partial GROUP BY is fed by grouping by keys from reduce-shuffle. It is the
+ // first (or root) operator for its reduce task.
+ // TODO: Technically, we should also handle FINAL, PARTIAL1, PARTIAL2 and PARTIALS
+ // that are not hash or complete, but aren't merge-partial, somehow.
+
+ if (desc.isDistinct()) {
+ LOG.info("Vectorized Reduce MergePartial GROUP BY does not support DISTINCT");
+ return false;
+ }
+
+ boolean hasKeys = (desc.getKeys().size() > 0);
+
+ // Do we support merge-partial aggregation AND the output is primitive?
+ ret = validateReduceMergePartialAggregationDesc(desc.getAggregators(), hasKeys);
+ if (!ret) {
+ return false;
+ }
+
+ if (hasKeys) {
+ if (op.getParentOperators().size() > 0 && !isComplete) {
+ LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle a key group when it is fed by reduce-shuffle");
+ return false;
+ }
+
+ LOG.info("Vectorized Reduce MergePartial GROUP BY will process key groups");
+
+ // Primitive output validation above means we can output VectorizedRowBatch to the
+ // children operators.
+ vectorDesc.setVectorOutput(true);
+ } else {
+ LOG.info("Vectorized Reduce MergePartial GROUP BY will do global aggregation");
+ }
+ if (!isComplete) {
+ vectorDesc.setIsReduceMergePartial(true);
+ } else {
+ vectorDesc.setIsReduceStreaming(true);
+ }
+ } else {
+
+ // Reduce Hash GROUP BY or global aggregation.
+
+ ret = validateHashAggregationDesc(desc.getAggregators());
+ if (!ret) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ private boolean validateFileSinkOperator(FileSinkOperator op) {
+ return true;
+ }
+
+ private boolean validateExprNodeDesc(List<ExprNodeDesc> descs) {
+ return validateExprNodeDesc(descs, VectorExpressionDescriptor.Mode.PROJECTION);
+ }
+
+ private boolean validateExprNodeDesc(List<ExprNodeDesc> descs,
+ VectorExpressionDescriptor.Mode mode) {
+ for (ExprNodeDesc d : descs) {
+ boolean ret = validateExprNodeDesc(d, mode);
+ if (!ret) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+ private boolean validateHashAggregationDesc(List<AggregationDesc> descs) {
+ return validateAggregationDesc(descs, /* isReduceMergePartial */ false, false);
+ }
+
+ private boolean validateReduceMergePartialAggregationDesc(List<AggregationDesc> descs, boolean hasKeys) {
+ return validateAggregationDesc(descs, /* isReduceMergePartial */ true, hasKeys);
+ }
+
+ private boolean validateAggregationDesc(List<AggregationDesc> descs, boolean isReduceMergePartial, boolean hasKeys) {
+ for (AggregationDesc d : descs) {
+ boolean ret = validateAggregationDesc(d, isReduceMergePartial, hasKeys);
+ if (!ret) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) {
+ if (desc instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
+ // Currently, we do not support vectorized virtual columns (see HIVE-5570).
+ if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) {
+ LOG.info("Cannot vectorize virtual column " + c.getColumn());
+ return false;
+ }
+ }
+ String typeName = desc.getTypeInfo().getTypeName();
+ boolean ret = validateDataType(typeName, mode);
+ if (!ret) {
+ LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
+ return false;
+ }
+ if (desc instanceof ExprNodeGenericFuncDesc) {
+ ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
+ boolean r = validateGenericUdf(d);
+ if (!r) {
+ LOG.info("Cannot vectorize UDF " + d);
+ return false;
+ }
+ }
+ if (desc.getChildren() != null) {
+ for (ExprNodeDesc d: desc.getChildren()) {
+ // Don't restrict child expressions for projection. Always use looser FILTER mode.
+ boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
+ if (!r) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private boolean validateExprNodeDesc(ExprNodeDesc desc) {
+ return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.PROJECTION);
+ }
+
+ boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) {
+ if (!validateExprNodeDescRecursive(desc, mode)) {
+ return false;
+ }
+ try {
+ VectorizationContext vc = new ValidatorVectorizationContext();
+ if (vc.getVectorExpression(desc, mode) == null) {
+ // TODO: this cannot happen - VectorizationContext throws in such cases.
+ LOG.info("getVectorExpression returned null");
+ return false;
+ }
+ } catch (Exception e) {
+ LOG.info("Failed to vectorize", e);
+ return false;
+ }
+ return true;
+ }
+
+ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) {
+ if (VectorizationContext.isCustomUDF(genericUDFExpr)) {
+ return true;
+ }
+ GenericUDF genericUDF = genericUDFExpr.getGenericUDF();
+ if (genericUDF instanceof GenericUDFBridge) {
+ Class<? extends UDF> udf = ((GenericUDFBridge) genericUDF).getUdfClass();
+ return supportedGenericUDFs.contains(udf);
+ } else {
+ return supportedGenericUDFs.contains(genericUDF.getClass());
+ }
+ }
+
+ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorAggrExpr) {
+ ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector();
+ return (outputObjInspector.getCategory() == ObjectInspector.Category.PRIMITIVE);
+ }
+
+ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduceMergePartial,
+ boolean hasKeys) {
+
+ String udfName = aggDesc.getGenericUDAFName().toLowerCase();
+ if (!supportedAggregationUdfs.contains(udfName)) {
+ LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported");
+ return false;
+ }
+ if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) {
+ LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported");
+ return false;
+ }
+
+ // See if we can vectorize the aggregation.
+ VectorizationContext vc = new ValidatorVectorizationContext();
+ VectorAggregateExpression vectorAggrExpr;
+ try {
+ vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduceMergePartial);
+ } catch (Exception e) {
+ // We should have already attempted to vectorize in validateAggregationDesc.
+ LOG.info("Vectorization of aggreation should have succeeded ", e);
+ return false;
+ }
+
+ if (isReduceMergePartial && hasKeys && !validateAggregationIsPrimitive(vectorAggrExpr)) {
+ LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types");
+ return false;
+ }
+
+ return true;
+ }
+
+ private boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode) {
+ type = type.toLowerCase();
+ boolean result = supportedDataTypesPattern.matcher(type).matches();
+ if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) {
+ return false;
+ }
+ return result;
+ }
+
+ private VectorizationContext getVectorizationContext(RowSchema rowSchema, String contextName,
+ Map<Integer, String> typeNameMap) {
+
+ VectorizationContext vContext = new VectorizationContext(contextName);
+
+ // Add all non-virtual columns to make a vectorization context for
+ // the TableScan operator.
+ int i = 0;
+ for (ColumnInfo c : rowSchema.getSignature()) {
+ // Earlier, validation code should have eliminated virtual columns usage (HIVE-5560).
+ if (!isVirtualColumn(c)) {
+ vContext.addInitialColumn(c.getInternalName());
+ typeNameMap.put(i, c.getTypeName());
+ i++;
+ }
+ }
+ vContext.finishedAddingInitialColumns();
+
+ return vContext;
+ }
+
+ private void fixupParentChildOperators(Operator<? extends OperatorDesc> op,
+ Operator<? extends OperatorDesc> vectorOp) {
+ if (op.getParentOperators() != null) {
+ vectorOp.setParentOperators(op.getParentOperators());
+ for (Operator<? extends OperatorDesc> p : op.getParentOperators()) {
+ p.replaceChild(op, vectorOp);
+ }
+ }
+ if (op.getChildOperators() != null) {
+ vectorOp.setChildOperators(op.getChildOperators());
+ for (Operator<? extends OperatorDesc> c : op.getChildOperators()) {
+ c.replaceParent(op, vectorOp);
+ }
+ }
+ }
+
+ private boolean isBigTableOnlyResults(MapJoinDesc desc) {
+ Byte[] order = desc.getTagOrder();
+ byte posBigTable = (byte) desc.getPosBigTable();
+ Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
+
+ int[] smallTableIndices;
+ int smallTableIndicesSize;
+ if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) {
+ smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable);
+ LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices " + Arrays.toString(smallTableIndices));
+ smallTableIndicesSize = smallTableIndices.length;
+ } else {
+ smallTableIndices = null;
+ LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices EMPTY");
+ smallTableIndicesSize = 0;
+ }
+
+ List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable);
+ LOG.info("Vectorizer isBigTableOnlyResults smallTableRetainList " + smallTableRetainList);
+ int smallTableRetainSize = smallTableRetainList.size();
+
+ if (smallTableIndicesSize > 0) {
+ // Small table indices has priority over retain.
+ for (int i = 0; i < smallTableIndicesSize; i++) {
+ if (smallTableIndices[i] < 0) {
+ // Negative numbers indicate a column to be (deserialize) read from the small table's
+ // LazyBinary value row.
+ LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices[i] < 0 returning false");
+ return false;
+ }
+ }
+ } else if (smallTableRetainSize > 0) {
+ LOG.info("Vectorizer isBigTableOnlyResults smallTableRetainSize > 0 returning false");
+ return false;
+ }
+
+ LOG.info("Vectorizer isBigTableOnlyResults returning true");
+ return true;
+ }
+
+ Operator<? extends OperatorDesc> specializeMapJoinOperator(Operator<? extends OperatorDesc> op,
+ VectorizationContext vContext, MapJoinDesc desc) throws HiveException {
+ Operator<? extends OperatorDesc> vectorOp = null;
+ Class<? extends Operator<?>> opClass = null;
+
+ VectorMapJoinDesc.HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE;
+ VectorMapJoinDesc.HashTableKind hashTableKind = HashTableKind.NONE;
+ VectorMapJoinDesc.HashTableKeyType hashTableKeyType = HashTableKeyType.NONE;
+
+ if (HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) {
+ hashTableImplementationType = HashTableImplementationType.FAST;
+ } else {
+ // Restrict to using BytesBytesMultiHashMap via MapJoinBytesTableContainer or
+ // HybridHashTableContainer.
+ hashTableImplementationType = HashTableImplementationType.OPTIMIZED;
+ }
+
+ int joinType = desc.getConds()[0].getType();
+
+ boolean isInnerBigOnly = false;
+ if (joinType == JoinDesc.INNER_JOIN && isBigTableOnlyResults(desc)) {
+ isInnerBigOnly = true;
+ }
+
+ // By default, we can always use the multi-key class.
+ hashTableKeyType = HashTableKeyType.MULTI_KEY;
+
+ if (!HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED)) {
+
+ // Look for single column optimization.
+ byte posBigTable = (byte) desc.getPosBigTable();
+ Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
+ List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
+ if (bigTableKeyExprs.size() == 1) {
+ String typeName = bigTableKeyExprs.get(0).getTypeString();
+ LOG.info("Vectorizer vectorizeOperator map join typeName " + typeName);
+ if (typeName.equals("boolean")) {
+ hashTableKeyType = HashTableKeyType.BOOLEAN;
+ } else if (typeName.equals("tinyint")) {
+ hashTableKeyType = HashTableKeyType.BYTE;
+ } else if (typeName.equals("smallint")) {
+ hashTableKeyType = HashTableKeyType.SHORT;
+ } else if (typeName.equals("int")) {
+ hashTableKeyType = HashTableKeyType.INT;
+ } else if (typeName.equals("bigint") || typeName.equals("long")) {
+ hashTableKeyType = HashTableKeyType.LONG;
+ } else if (VectorizationContext.isStringFamily(typeName)) {
+ hashTableKeyType = HashTableKeyType.STRING;
+ }
+ }
+ }
+
+ switch (joinType) {
+ case JoinDesc.INNER_JOIN:
+ if (!isInnerBigOnly) {
+ hashTableKind = HashTableKind.HASH_MAP;
+ } else {
+ hashTableKind = HashTableKind.HASH_MULTISET;
+ }
+ break;
+ case JoinDesc.LEFT_OUTER_JOIN:
+ case JoinDesc.RIGHT_OUTER_JOIN:
+ hashTableKind = HashTableKind.HASH_MAP;
+ break;
+ case JoinDesc.LEFT_SEMI_JOIN:
+ hashTableKind = HashTableKind.HASH_SET;
+ break;
+ default:
+ throw new HiveException("Unknown join type " + joinType);
+ }
+
+ LOG.info("Vectorizer vectorizeOperator map join hashTableKind " + hashTableKind.name() + " hashTableKeyType " + hashTableKeyType.name());
+
+ switch (hashTableKeyType) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ switch (joinType) {
+ case JoinDesc.INNER_JOIN:
+ if (!isInnerBigOnly) {
+ opClass = VectorMapJoinInnerLongOperator.class;
+ } else {
+ opClass = VectorMapJoinInnerBigOnlyLongOperator.class;
+ }
+ break;
+ case JoinDesc.LEFT_OUTER_JOIN:
+ case JoinDesc.RIGHT_OUTER_JOIN:
+ opClass = VectorMapJoinOuterLongOperator.class;
+ break;
+ case JoinDesc.LEFT_SEMI_JOIN:
+ opClass = VectorMapJoinLeftSemiLongOperator.class;
+ break;
+ default:
+ throw new HiveException("Unknown join type " + joinType);
+ }
+ break;
+ case STRING:
+ switch (joinType) {
+ case JoinDesc.INNER_JOIN:
+ if (!isInnerBigOnly) {
+ opClass = VectorMapJoinInnerStringOperator.class;
+ } else {
+ opClass = VectorMapJoinInnerBigOnlyStringOperator.class;
+ }
+ break;
+ case JoinDesc.LEFT_OUTER_JOIN:
+ case JoinDesc.RIGHT_OUTER_JOIN:
+ opClass = VectorMapJoinOuterStringOperator.class;
+ break;
+ case JoinDesc.LEFT_SEMI_JOIN:
+ opClass = VectorMapJoinLeftSemiStringOperator.class;
+ break;
+ default:
+ throw new HiveException("Unknown join type " + joinType);
+ }
+ break;
+ case MULTI_KEY:
+ switch (joinType) {
+ case JoinDesc.INNER_JOIN:
+ if (!isInnerBigOnly) {
+ opClass = VectorMapJoinInnerMultiKeyOperator.class;
+ } else {
+ opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class;
+ }
+ break;
+ case JoinDesc.LEFT_OUTER_JOIN:
+ case JoinDesc.RIGHT_OUTER_JOIN:
+ opClass = VectorMapJoinOuterMultiKeyOperator.class;
+ break;
+ case JoinDesc.LEFT_SEMI_JOIN:
+ opClass = VectorMapJoinLeftSemiMultiKeyOperator.class;
+ break;
+ default:
+ throw new HiveException("Unknown join type " + joinType);
+ }
+ break;
+ }
+
+ vectorOp = OperatorFactory.getVectorOperator(opClass, op.getConf(), vContext);
+ LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName());
+
+ boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED);
+
+ VectorMapJoinDesc vectorDesc = desc.getVectorDesc();
+ vectorDesc.setHashTableImplementationType(hashTableImplementationType);
+ vectorDesc.setHashTableKind(hashTableKind);
+ vectorDesc.setHashTableKeyType(hashTableKeyType);
+ vectorDesc.setMinMaxEnabled(minMaxEnabled);
+ return vectorOp;
+ }
+
+ private boolean onExpressionHasNullSafes(MapJoinDesc desc) {
+ boolean[] nullSafes = desc.getNullSafes();
+ for (boolean nullSafe : nullSafes) {
+ if (nullSafe) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean canSpecializeMapJoin(Operator<? extends OperatorDesc> op, MapJoinDesc desc,
+ boolean isTez) {
+
+ boolean specialize = false;
+
+ if (op instanceof MapJoinOperator &&
+ HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED)) {
+
+ // Currently, only under Tez and non-N-way joins.
+ if (isTez && desc.getConds().length == 1 && !onExpressionHasNullSafes(desc)) {
+
+ // Ok, all basic restrictions satisfied so far...
+ specialize = true;
+
+ if (!HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) {
+
+ // We are using the optimized hash table we have further
+ // restrictions (using optimized and key type).
+
+ if (!HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE)) {
+ specialize = false;
+ } else {
+ byte posBigTable = (byte) desc.getPosBigTable();
+ Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
+ List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
+ for (ExprNodeDesc exprNodeDesc : bigTableKeyExprs) {
+ String typeName = exprNodeDesc.getTypeString();
+ if (!MapJoinKey.isSupportedField(typeName)) {
+ specialize = false;
+ break;
+ }
+ }
+ }
+ } else {
+
+ // With the fast hash table implementation, we currently do not support
+ // Hybrid Grace Hash Join.
+
+ if (HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)) {
+ specialize = false;
+ }
+ }
+ }
+ }
+ return specialize;
+ }
+
+ Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op,
+ VectorizationContext vContext, boolean isTez) throws HiveException {
+ Operator<? extends OperatorDesc> vectorOp = null;
+
+ switch (op.getType()) {
+ case MAPJOIN:
+ {
+ MapJoinDesc desc = (MapJoinDesc) op.getConf();
+ boolean specialize = canSpecializeMapJoin(op, desc, isTez);
+
+ if (!specialize) {
+
+ Class<? extends Operator<?>> opClass = null;
+ if (op instanceof MapJoinOperator) {
+
+ // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
+
+ List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
+ boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
+ if (!isOuterAndFiltered) {
+ opClass = VectorMapJoinOperator.class;
+ } else {
+ opClass = VectorMapJoinOuterFilteredOperator.class;
+ }
+ } else if (op instanceof SMBMapJoinOperator) {
+ opClass = VectorSMBMapJoinOperator.class;
+ }
+
+ vectorOp = OperatorFactory.getVectorOperator(opClass, op.getConf(), vContext);
+
+ } else {
+
+ // TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
+ // HiveConf.setBoolVar(physicalContext.getConf(),
+ // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
+
+ vectorOp = specializeMapJoinOperator(op, vContext, desc);
+ }
+ }
+ break;
+ case GROUPBY:
+ case FILTER:
+ case SELECT:
+ case FILESINK:
+ case REDUCESINK:
+ case LIMIT:
+ case EXTRACT:
+ case EVENT:
+ case HASHTABLESINK:
+ vectorOp = OperatorFactory.getVectorOperator(op.getConf(), vContext);
+ break;
+ default:
+ vectorOp = op;
+ break;
+ }
+
+ LOG.info("vectorizeOperator " + (vectorOp == null ? "NULL" : vectorOp.getClass().getName()));
+ LOG.info("vectorizeOperator " + (vectorOp == null || vectorOp.getConf() == null ? "NULL" : vectorOp.getConf().getClass().getName()));
+
+ if (vectorOp != op) {
+ fixupParentChildOperators(op, vectorOp);
+ ((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
+ }
+ return vectorOp;
+ }
+
+ private boolean isVirtualColumn(ColumnInfo column) {
+
+ // Not using method column.getIsVirtualCol() because partitioning columns are also
+ // treated as virtual columns in ColumnInfo.
+ if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) {
+ return true;
+ }
+ return false;
+ }
+
+ public void debugDisplayAllMaps(BaseWork work) {
+
+ Map<String, Integer> columnNameMap = work.getVectorColumnNameMap();
+ Map<Integer, String> columnTypeMap = work.getVectorColumnTypeMap();
+ Map<Integer, String> scratchColumnTypeMap = work.getVectorScratchColumnTypeMap();
+
+ LOG.debug("debugDisplayAllMaps columnNameMap " + columnNameMap.toString());
+ LOG.debug("debugDisplayAllMaps columnTypeMap " + columnTypeMap.toString());
+ LOG.debug("debugDisplayAllMaps scratchColumnTypeMap " + scratchColumnTypeMap.toString());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
new file mode 100644
index 0000000..5a10b58
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
@@ -0,0 +1,86 @@
+***************
+*** 1255,1272 ****
+ LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
+ return false;
+ }
+ if (desc instanceof ExprNodeGenericFuncDesc) {
+ ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
+ boolean r = validateGenericUdf(d);
+ if (!r) {
+ return false;
+ }
+ }
+ if (desc.getChildren() != null) {
+- for (ExprNodeDesc d: desc.getChildren()) {
+- // Don't restrict child expressions for projection. Always use looser FILTER mode.
+- boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
+- if (!r) {
+ return false;
+ }
+ }
+--- 1265,1329 ----
+ LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
+ return false;
+ }
++ boolean isInExpression = false;
+ if (desc instanceof ExprNodeGenericFuncDesc) {
+ ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
+ boolean r = validateGenericUdf(d);
+ if (!r) {
+ return false;
+ }
++ GenericUDF genericUDF = d.getGenericUDF();
++ isInExpression = (genericUDF instanceof GenericUDFIn);
+ }
+ if (desc.getChildren() != null) {
++ if (isInExpression &&
++ desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
++ boolean r = validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER);
++ } else {
++ for (ExprNodeDesc d: desc.getChildren()) {
++ // Don't restrict child expressions for projection. Always use looser FILTER mode.
++ boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
++ if (!r) {
++ return false;
++ }
++ }
++ }
++ }
++ return true;
++ }
++
++ private boolean validateStructInExpression(ExprNodeDesc desc,
++ VectorExpressionDescriptor.Mode mode) {
++
++ for (ExprNodeDesc d: desc.getChildren()) {
++ TypeInfo typeInfo = d.getTypeInfo();
++ if (typeInfo.getCategory() != Category.STRUCT){
++ return false;
++ }
++ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
++
++ ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
++ ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
++ final int fieldCount = fieldTypeInfos.size();
++ for (int f = 0; f < fieldCount; f++) {
++ TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
++ Category category = fieldTypeInfo.getCategory();
++ if (category != Category.PRIMITIVE){
++ LOG.info("Cannot vectorize struct field " + fieldNames.get(f) +
++ " of type " + fieldTypeInfo.getTypeName());
++ return false;
++ }
++ PrimitiveTypeInfo fieldPrimitiveTypeInfo = (PrimitiveTypeInfo) fieldTypeInfo;
++ InConstantType inConstantType =
++ VectorizationContext.getInConstantTypeFromPrimitiveCategory(
++ fieldPrimitiveTypeInfo.getPrimitiveCategory());
++
++ // For now, limit the data types we support for Vectorized Struct IN().
++ if (inConstantType != InConstantType.INT_FAMILY &&
++ inConstantType != InConstantType.FLOAT_FAMILY &&
++ inConstantType != InConstantType.STRING_FAMILY) {
++ LOG.info("Cannot vectorize struct field " + fieldNames.get(f) +
++ " of type " + fieldTypeInfo.getTypeName());
+ return false;
+ }
+ }
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/test/queries/clientpositive/vector_struct_in.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_struct_in.q b/ql/src/test/queries/clientpositive/vector_struct_in.q
new file mode 100644
index 0000000..0e3a4ca
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_struct_in.q
@@ -0,0 +1,247 @@
+set hive.cbo.enable=false;
+set hive.tez.dynamic.partition.pruning=false;
+set hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+
+-- SORT_QUERY_RESULTS
+
+-- 2 Strings
+create table test_1 (`id` string, `lineid` string) stored as orc;
+
+insert into table test_1 values ('one','1'), ('seven','1');
+
+explain
+select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+);
+
+select * from test_1 where struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+);
+
+explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1 ;
+
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two','3'),
+struct('three','1'),
+struct('one','1'),
+struct('five','2'),
+struct('six','1'),
+struct('eight','1'),
+struct('seven','1'),
+struct('nine','1'),
+struct('ten','1')
+) as b from test_1 ;
+
+
+-- 2 Integers
+create table test_2 (`id` int, `lineid` int) stored as orc;
+
+insert into table test_2 values (1,1), (7,1);
+
+explain
+select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+);
+
+select * from test_2 where struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+);
+
+explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2;
+
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct(2,3),
+struct(3,1),
+struct(1,1),
+struct(5,2),
+struct(6,1),
+struct(8,1),
+struct(7,1),
+struct(9,1),
+struct(10,1)
+) as b from test_2;
+
+-- 1 String and 1 Integer
+create table test_3 (`id` string, `lineid` int) stored as orc;
+
+insert into table test_3 values ('one',1), ('seven',1);
+
+explain
+select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+);
+
+select * from test_3 where struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+);
+
+explain
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3;
+
+select `id`, `lineid`, struct(`id`, `lineid`)
+IN (
+struct('two',3),
+struct('three',1),
+struct('one',1),
+struct('five',2),
+struct('six',1),
+struct('eight',1),
+struct('seven',1),
+struct('nine',1),
+struct('ten',1)
+) as b from test_3;
+
+-- 1 Integer and 1 String and 1 Double
+create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) stored as orc;
+
+insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5);
+
+explain
+select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+);
+
+select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+);
+
+explain
+select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4;
+
+select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`)
+IN (
+struct(1L, "a", 1.5),
+struct(1L, "b", -0.5),
+struct(3L, "b", 1.5),
+struct(1L, "d", 1.5),
+struct(1L, "c", 1.5),
+struct(1L, "b", 2.5),
+struct(1L, "b", 0.5),
+struct(5L, "b", 1.5),
+struct(1L, "a", 0.5),
+struct(3L, "b", 1.5)
+) as b from test_4;
\ No newline at end of file
[12/25] hive git commit: HIVE-11748: HivePreparedStatement's
setTimestamp() does not quote value as required (Angus Smithson,
reviewed by Sergio Pena)
Posted by pr...@apache.org.
HIVE-11748: HivePreparedStatement's setTimestamp() does not quote value as required (Angus Smithson, reviewed by Sergio Pena)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cdc65dc7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cdc65dc7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cdc65dc7
Branch: refs/heads/llap
Commit: cdc65dc7c6a0c725054839269a9c04ba02da0f5f
Parents: b98a60d
Author: Sergio Pena <se...@cloudera.com>
Authored: Wed Sep 23 14:18:16 2015 -0500
Committer: Sergio Pena <se...@cloudera.com>
Committed: Wed Sep 23 14:18:16 2015 -0500
----------------------------------------------------------------------
NOTICE | 3 +
.../org/apache/hive/jdbc/TestJdbcDriver2.java | 80 +++++++++++---------
.../apache/hive/jdbc/HivePreparedStatement.java | 4 +-
3 files changed, 48 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/cdc65dc7/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
index 32d89b1..5c862df 100644
--- a/NOTICE
+++ b/NOTICE
@@ -9,3 +9,6 @@ Copyright (c) 2010-2014 Oracle and/or its affiliates.
This project includes software copyrighted by Microsoft Corporation and
licensed under the Apache License, Version 2.0.
+
+This project includes software copyrighted by Dell SecureWorks and
+licensed under the Apache License, Version 2.0.
http://git-wip-us.apache.org/repos/asf/hive/blob/cdc65dc7/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
index b2dd2ab..3aa6bce 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
@@ -37,7 +37,9 @@ import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
+import java.sql.Timestamp;
import java.sql.Types;
+import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
@@ -412,29 +414,28 @@ public class TestJdbcDriver2 {
@Test
public void testPrepareStatement() {
-
- String sql = "from (select count(1) from "
+ String sql = "FROM (SELECT 1 FROM "
+ tableName
+ " where 'not?param?not?param' <> 'not_param??not_param' and ?=? "
+ " and 1=? and 2=? and 3.0=? and 4.0=? and 'test\\'string\"'=? and 5=? and ?=? "
+ " and date '2012-01-01' = date ?"
- + " ) t select '2011-03-25' ddate,'China',true bv, 10 num limit 10";
+ + " and timestamp '2012-04-22 09:00:00.123456789' = timestamp ?"
+ + " ) t SELECT '2011-03-25' ddate,'China',true bv, 10 num LIMIT 1";
///////////////////////////////////////////////
//////////////////// correct testcase
//////////////////// executed twice: once with the typed ps setters, once with the generic setObject
//////////////////////////////////////////////
try {
- PreparedStatement ps = createPreapredStatementUsingSetXXX(sql);
- ResultSet res = ps.executeQuery();
- assertPreparedStatementResultAsExpected(res);
- ps.close();
-
- ps = createPreapredStatementUsingSetObject(sql);
- res = ps.executeQuery();
- assertPreparedStatementResultAsExpected(res);
- ps.close();
+ try (PreparedStatement ps = createPreapredStatementUsingSetXXX(sql);
+ ResultSet res = ps.executeQuery()) {
+ assertPreparedStatementResultAsExpected(res);
+ }
+ try (PreparedStatement ps = createPreapredStatementUsingSetObject(sql);
+ ResultSet res = ps.executeQuery()) {
+ assertPreparedStatementResultAsExpected(res);
+ }
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
@@ -445,9 +446,8 @@ public class TestJdbcDriver2 {
//////////////////////////////////////////////
// set nothing for prepared sql
Exception expectedException = null;
- try {
- PreparedStatement ps = con.prepareStatement(sql);
- ps.executeQuery();
+ try (PreparedStatement ps = con.prepareStatement(sql);
+ ResultSet ignored = ps.executeQuery()) {
} catch (Exception e) {
expectedException = e;
}
@@ -457,11 +457,10 @@ public class TestJdbcDriver2 {
// set some of parameters for prepared sql, not all of them.
expectedException = null;
- try {
- PreparedStatement ps = con.prepareStatement(sql);
+ try (PreparedStatement ps = con.prepareStatement(sql)) {
ps.setBoolean(1, true);
ps.setBoolean(2, true);
- ps.executeQuery();
+ try (ResultSet ignored = ps.executeQuery()) {}
} catch (Exception e) {
expectedException = e;
}
@@ -471,16 +470,11 @@ public class TestJdbcDriver2 {
// set the wrong type parameters for prepared sql.
expectedException = null;
- try {
- PreparedStatement ps = con.prepareStatement(sql);
-
+ try (PreparedStatement ps = con.prepareStatement(sql)) {
// wrong type here
ps.setString(1, "wrong");
-
- assertTrue(true);
- ResultSet res = ps.executeQuery();
- if (!res.next()) {
- throw new Exception("there must be a empty result set");
+ try (ResultSet res = ps.executeQuery()) {
+ assertFalse("ResultSet was not empty", res.next());
}
} catch (Exception e) {
expectedException = e;
@@ -491,17 +485,15 @@ public class TestJdbcDriver2 {
// setObject to the yet unknown type java.util.Date
expectedException = null;
- try {
- PreparedStatement ps = con.prepareStatement(sql);
+ try (PreparedStatement ps = con.prepareStatement(sql)) {
ps.setObject(1, new Date());
- ps.executeQuery();
+ try (ResultSet ignored = ps.executeQuery()) {}
} catch (Exception e) {
expectedException = e;
}
assertNotNull(
"Setting to an unknown type should throw an exception",
expectedException);
-
}
private PreparedStatement createPreapredStatementUsingSetObject(String sql) throws SQLException {
@@ -509,7 +501,6 @@ public class TestJdbcDriver2 {
ps.setObject(1, true); //setBoolean
ps.setObject(2, true); //setBoolean
-
ps.setObject(3, Short.valueOf("1")); //setShort
ps.setObject(4, 2); //setInt
ps.setObject(5, 3f); //setFloat
@@ -519,6 +510,7 @@ public class TestJdbcDriver2 {
ps.setObject(9, (byte) 1); //setByte
ps.setObject(10, (byte) 1); //setByte
ps.setString(11, "2012-01-01"); //setString
+ ps.setObject(12, Timestamp.valueOf("2012-04-22 09:00:00.123456789")); //setTimestamp
ps.setMaxRows(2);
return ps;
@@ -529,7 +521,6 @@ public class TestJdbcDriver2 {
ps.setBoolean(1, true); //setBoolean
ps.setBoolean(2, true); //setBoolean
-
ps.setShort(3, Short.valueOf("1")); //setShort
ps.setInt(4, 2); //setInt
ps.setFloat(5, 3f); //setFloat
@@ -539,15 +530,17 @@ public class TestJdbcDriver2 {
ps.setByte(9, (byte) 1); //setByte
ps.setByte(10, (byte) 1); //setByte
ps.setString(11, "2012-01-01"); //setString
+ ps.setTimestamp(12, Timestamp.valueOf("2012-04-22 09:00:00.123456789")); //setTimestamp
ps.setMaxRows(2);
return ps;
}
- private void assertPreparedStatementResultAsExpected(ResultSet res ) throws SQLException {
+ private void assertPreparedStatementResultAsExpected(ResultSet res) throws SQLException {
assertNotNull(res);
+ assertTrue("ResultSet contained no rows", res.next());
- while (res.next()) {
+ do {
assertEquals("2011-03-25", res.getString("ddate"));
assertEquals("10", res.getString("num"));
assertEquals((byte) 10, res.getByte("num"));
@@ -561,9 +554,7 @@ public class TestJdbcDriver2 {
assertNotNull(o);
o = res.getObject("num");
assertNotNull(o);
- }
- res.close();
- assertTrue(true);
+ } while (res.next());
}
/**
@@ -2382,4 +2373,19 @@ public void testParseUrlHttpMode() throws SQLException, JdbcUriParseException,
fail(e.toString());
}
}
+
+ @Test
+ public void testPrepareSetTimestamp() throws SQLException, ParseException {
+ String sql = String.format("SELECT * FROM %s WHERE c17 = ?", dataTypeTableName);
+ try (PreparedStatement ps = con.prepareStatement(sql)) {
+ Timestamp timestamp = Timestamp.valueOf("2012-04-22 09:00:00.123456789");
+ ps.setTimestamp(1, timestamp);
+ // Ensure we find the single row which matches our timestamp (where field 1 has value 1)
+ try (ResultSet resultSet = ps.executeQuery()) {
+ assertTrue(resultSet.next());
+ assertEquals(1, resultSet.getInt(1));
+ assertFalse(resultSet.next());
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/cdc65dc7/jdbc/src/java/org/apache/hive/jdbc/HivePreparedStatement.java
----------------------------------------------------------------------
diff --git a/jdbc/src/java/org/apache/hive/jdbc/HivePreparedStatement.java b/jdbc/src/java/org/apache/hive/jdbc/HivePreparedStatement.java
index 7687537..c28b7d6 100644
--- a/jdbc/src/java/org/apache/hive/jdbc/HivePreparedStatement.java
+++ b/jdbc/src/java/org/apache/hive/jdbc/HivePreparedStatement.java
@@ -607,7 +607,7 @@ public class HivePreparedStatement extends HiveStatement implements PreparedStat
} else if (x instanceof Character) {
setString(parameterIndex, x.toString());
} else if (x instanceof Timestamp) {
- setString(parameterIndex, x.toString());
+ setTimestamp(parameterIndex, (Timestamp) x);
} else if (x instanceof BigDecimal) {
setString(parameterIndex, x.toString());
} else {
@@ -728,7 +728,7 @@ public class HivePreparedStatement extends HiveStatement implements PreparedStat
*/
public void setTimestamp(int parameterIndex, Timestamp x) throws SQLException {
- this.parameters.put(parameterIndex, x.toString());
+ this.parameters.put(parameterIndex, "'" + x.toString() + "'");
}
/*
[06/25] hive git commit: HIVE-11217: CTAS statements throws error,
when the table is stored as ORC File format and select clause has
NULL/VOID type column (Yongzhi Chen reviewed by Prasanth Jayachandran)
Posted by pr...@apache.org.
HIVE-11217: CTAS statements throws error, when the table is stored as ORC File format and select clause has NULL/VOID type column (Yongzhi Chen reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2e8324e4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2e8324e4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2e8324e4
Branch: refs/heads/llap
Commit: 2e8324e439de02c75e173e27147d208720f51964
Parents: 072c5a0
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Sep 23 00:48:03 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Sep 23 00:48:03 2015 -0500
----------------------------------------------------------------------
ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java | 2 +-
.../org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 8 +++++++-
ql/src/test/queries/clientnegative/ctasnullcol.q | 2 ++
ql/src/test/results/clientnegative/ctasnullcol.q.out | 5 +++++
4 files changed, 15 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2e8324e4/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 7abef0b..87c2830 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -432,7 +432,7 @@ public enum ErrorMsg {
UPDATE_CANNOT_UPDATE_BUCKET_VALUE(10302, "Updating values of bucketing columns is not supported. Column {0}.", true),
IMPORT_INTO_STRICT_REPL_TABLE(10303,"Non-repl import disallowed against table that is a destination of replication."),
CTAS_LOCATION_NONEMPTY(10304, "CREATE-TABLE-AS-SELECT cannot create table with location to a non-empty directory."),
-
+ CTAS_CREATES_VOID_TYPE(10305, "CREATE-TABLE-AS-SELECT creates a VOID type, please use CAST to specify the type, near field: "),
//========================== 20000 range starts here ========================//
SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."),
SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "
http://git-wip-us.apache.org/repos/asf/hive/blob/2e8324e4/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1076dfd..c5f39d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -6477,7 +6477,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
colName = fixCtasColumnName(colName);
col.setName(colName);
- col.setType(colInfo.getType().getTypeName());
+ String typeName = colInfo.getType().getTypeName();
+ // CTAS should NOT create a VOID type
+ if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) {
+ throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE
+ .getMsg(colName));
+ }
+ col.setType(typeName);
field_schemas.add(col);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2e8324e4/ql/src/test/queries/clientnegative/ctasnullcol.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/ctasnullcol.q b/ql/src/test/queries/clientnegative/ctasnullcol.q
new file mode 100644
index 0000000..b03c172
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/ctasnullcol.q
@@ -0,0 +1,2 @@
+drop table if exists orc_table_with_null;
+CREATE TABLE orc_table_with_null STORED AS ORC AS SELECT key, null FROM src;
http://git-wip-us.apache.org/repos/asf/hive/blob/2e8324e4/ql/src/test/results/clientnegative/ctasnullcol.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/ctasnullcol.q.out b/ql/src/test/results/clientnegative/ctasnullcol.q.out
new file mode 100644
index 0000000..6d36bb8
--- /dev/null
+++ b/ql/src/test/results/clientnegative/ctasnullcol.q.out
@@ -0,0 +1,5 @@
+PREHOOK: query: drop table if exists orc_table_with_null
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists orc_table_with_null
+POSTHOOK: type: DROPTABLE
+FAILED: SemanticException [Error 10305]: CREATE-TABLE-AS-SELECT creates a VOID type, please use CAST to specify the type, near field: c1
[21/25] hive git commit: HIVE-11831 : TXN tables in Oracle should be
created with ROWDEPENDENCIES (Sergey Shelukhin, reviewed by Alan Gates)
Posted by pr...@apache.org.
HIVE-11831 : TXN tables in Oracle should be created with ROWDEPENDENCIES (Sergey Shelukhin, reviewed by Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/41a12cb2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/41a12cb2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/41a12cb2
Branch: refs/heads/llap
Commit: 41a12cb26789c94be22fa2936fc4ca41b3e675ba
Parents: 7b92f44
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Sep 24 18:09:23 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Sep 24 18:09:23 2015 -0700
----------------------------------------------------------------------
.../scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql | 10 +++++-----
.../scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql | 10 +++++-----
.../upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql | 10 +++++-----
.../upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql | 10 +++++-----
4 files changed, 20 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/41a12cb2/metastore/scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql
index 6bd8df9..014b7c0 100644
--- a/metastore/scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-schema-0.13.0.oracle.sql
@@ -766,21 +766,21 @@ CREATE TABLE TXNS (
TXN_LAST_HEARTBEAT NUMBER(19) NOT NULL,
TXN_USER varchar(128) NOT NULL,
TXN_HOST varchar(128) NOT NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE TXN_COMPONENTS (
TC_TXNID NUMBER(19) REFERENCES TXNS (TXN_ID),
TC_DATABASE VARCHAR2(128) NOT NULL,
TC_TABLE VARCHAR2(128),
TC_PARTITION VARCHAR2(767) NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE COMPLETED_TXN_COMPONENTS (
CTC_TXNID NUMBER(19),
CTC_DATABASE varchar(128) NOT NULL,
CTC_TABLE varchar(128),
CTC_PARTITION varchar(767)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_TXN_ID (
NTXN_NEXT NUMBER(19) NOT NULL
@@ -801,7 +801,7 @@ CREATE TABLE HIVE_LOCKS (
HL_USER varchar(128) NOT NULL,
HL_HOST varchar(128) NOT NULL,
PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID)
-);
+) ROWDEPENDENCIES;
CREATE INDEX HL_TXNID_INDEX ON HIVE_LOCKS (HL_TXNID);
@@ -820,7 +820,7 @@ CREATE TABLE COMPACTION_QUEUE (
CQ_WORKER_ID varchar(128),
CQ_START NUMBER(19),
CQ_RUN_AS varchar(128)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_COMPACTION_QUEUE_ID (
NCQ_NEXT NUMBER(19) NOT NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/41a12cb2/metastore/scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql
index f1f71ce..ec9abba 100644
--- a/metastore/scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-schema-0.14.0.oracle.sql
@@ -766,21 +766,21 @@ CREATE TABLE TXNS (
TXN_LAST_HEARTBEAT NUMBER(19) NOT NULL,
TXN_USER varchar(128) NOT NULL,
TXN_HOST varchar(128) NOT NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE TXN_COMPONENTS (
TC_TXNID NUMBER(19) REFERENCES TXNS (TXN_ID),
TC_DATABASE VARCHAR2(128) NOT NULL,
TC_TABLE VARCHAR2(128),
TC_PARTITION VARCHAR2(767) NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE COMPLETED_TXN_COMPONENTS (
CTC_TXNID NUMBER(19),
CTC_DATABASE varchar(128) NOT NULL,
CTC_TABLE varchar(128),
CTC_PARTITION varchar(767)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_TXN_ID (
NTXN_NEXT NUMBER(19) NOT NULL
@@ -801,7 +801,7 @@ CREATE TABLE HIVE_LOCKS (
HL_USER varchar(128) NOT NULL,
HL_HOST varchar(128) NOT NULL,
PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID)
-);
+) ROWDEPENDENCIES;
CREATE INDEX HL_TXNID_INDEX ON HIVE_LOCKS (HL_TXNID);
@@ -820,7 +820,7 @@ CREATE TABLE COMPACTION_QUEUE (
CQ_WORKER_ID varchar(128),
CQ_START NUMBER(19),
CQ_RUN_AS varchar(128)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_COMPACTION_QUEUE_ID (
NCQ_NEXT NUMBER(19) NOT NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/41a12cb2/metastore/scripts/upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql
index 7435ea8..58e53c4 100644
--- a/metastore/scripts/upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-txn-schema-0.13.0.oracle.sql
@@ -24,21 +24,21 @@ CREATE TABLE TXNS (
TXN_LAST_HEARTBEAT NUMBER(19) NOT NULL,
TXN_USER varchar(128) NOT NULL,
TXN_HOST varchar(128) NOT NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE TXN_COMPONENTS (
TC_TXNID NUMBER(19) REFERENCES TXNS (TXN_ID),
TC_DATABASE VARCHAR2(128) NOT NULL,
TC_TABLE VARCHAR2(128),
TC_PARTITION VARCHAR2(767) NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE COMPLETED_TXN_COMPONENTS (
CTC_TXNID NUMBER(19),
CTC_DATABASE varchar(128) NOT NULL,
CTC_TABLE varchar(128),
CTC_PARTITION varchar(767)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_TXN_ID (
NTXN_NEXT NUMBER(19) NOT NULL
@@ -59,7 +59,7 @@ CREATE TABLE HIVE_LOCKS (
HL_USER varchar(128) NOT NULL,
HL_HOST varchar(128) NOT NULL,
PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID)
-);
+) ROWDEPENDENCIES;
CREATE INDEX HL_TXNID_INDEX ON HIVE_LOCKS (HL_TXNID);
@@ -78,7 +78,7 @@ CREATE TABLE COMPACTION_QUEUE (
CQ_WORKER_ID varchar(128),
CQ_START NUMBER(19),
CQ_RUN_AS varchar(128)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_COMPACTION_QUEUE_ID (
NCQ_NEXT NUMBER(19) NOT NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/41a12cb2/metastore/scripts/upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql
index 7435ea8..58e53c4 100644
--- a/metastore/scripts/upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-txn-schema-0.14.0.oracle.sql
@@ -24,21 +24,21 @@ CREATE TABLE TXNS (
TXN_LAST_HEARTBEAT NUMBER(19) NOT NULL,
TXN_USER varchar(128) NOT NULL,
TXN_HOST varchar(128) NOT NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE TXN_COMPONENTS (
TC_TXNID NUMBER(19) REFERENCES TXNS (TXN_ID),
TC_DATABASE VARCHAR2(128) NOT NULL,
TC_TABLE VARCHAR2(128),
TC_PARTITION VARCHAR2(767) NULL
-);
+) ROWDEPENDENCIES;
CREATE TABLE COMPLETED_TXN_COMPONENTS (
CTC_TXNID NUMBER(19),
CTC_DATABASE varchar(128) NOT NULL,
CTC_TABLE varchar(128),
CTC_PARTITION varchar(767)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_TXN_ID (
NTXN_NEXT NUMBER(19) NOT NULL
@@ -59,7 +59,7 @@ CREATE TABLE HIVE_LOCKS (
HL_USER varchar(128) NOT NULL,
HL_HOST varchar(128) NOT NULL,
PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID)
-);
+) ROWDEPENDENCIES;
CREATE INDEX HL_TXNID_INDEX ON HIVE_LOCKS (HL_TXNID);
@@ -78,7 +78,7 @@ CREATE TABLE COMPACTION_QUEUE (
CQ_WORKER_ID varchar(128),
CQ_START NUMBER(19),
CQ_RUN_AS varchar(128)
-);
+) ROWDEPENDENCIES;
CREATE TABLE NEXT_COMPACTION_QUEUE_ID (
NCQ_NEXT NUMBER(19) NOT NULL
[25/25] hive git commit: HIVE-11967: LLAP: Merge master to branch
(Prasanth Jayachandran)
Posted by pr...@apache.org.
HIVE-11967: LLAP: Merge master to branch (Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3c5b4ced
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3c5b4ced
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3c5b4ced
Branch: refs/heads/llap
Commit: 3c5b4cedaa76cd40e177d60a47414d2db150ed0e
Parents: 3b64bd6 6c2d71c
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Fri Sep 25 13:38:23 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Fri Sep 25 13:38:23 2015 -0500
----------------------------------------------------------------------
NOTICE | 3 +
.../apache/hadoop/hive/ant/GenVectorCode.java | 31 -
bin/hive | 2 +-
.../org/apache/hadoop/hive/conf/HiveConf.java | 2 +-
.../vectorization/VectorizationBench.java | 32 +-
.../org/apache/hive/jdbc/TestJdbcDriver2.java | 80 +-
.../test/resources/testconfiguration.properties | 1 +
.../apache/hive/jdbc/HivePreparedStatement.java | 4 +-
.../hive/jdbc/ZooKeeperHiveClientHelper.java | 2 +-
.../oracle/hive-schema-0.13.0.oracle.sql | 10 +-
.../oracle/hive-schema-0.14.0.oracle.sql | 10 +-
.../oracle/hive-txn-schema-0.13.0.oracle.sql | 10 +-
.../oracle/hive-txn-schema-0.14.0.oracle.sql | 10 +-
.../hadoop/hive/metastore/ObjectStore.java | 16 +-
.../hadoop/hive/metastore/txn/TxnDbUtil.java | 4 +-
.../hadoop/hive/metastore/txn/TxnHandler.java | 5 +-
packaging/src/main/assembly/bin.xml | 1 +
.../ExpressionTemplates/IfExprColumnColumn.txt | 186 --
.../org/apache/hadoop/hive/ql/ErrorMsg.java | 2 +-
.../hadoop/hive/ql/exec/FunctionRegistry.java | 1 +
.../ql/exec/vector/VectorGroupByOperator.java | 5 +-
.../exec/vector/VectorSMBMapJoinOperator.java | 15 +-
.../ql/exec/vector/VectorizationContext.java | 203 +-
.../expressions/FilterStringColumnInList.java | 13 +-
.../expressions/FilterStructColumnInList.java | 178 ++
.../exec/vector/expressions/IStructInExpr.java | 36 +
.../IfExprDoubleColumnDoubleColumn.java | 167 ++
.../expressions/IfExprLongColumnLongColumn.java | 166 ++
.../vector/expressions/StringColumnInList.java | 4 +
.../vector/expressions/StructColumnInList.java | 174 ++
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 6 +-
.../functions/HiveSqlCountAggFunction.java | 72 +
.../functions/HiveSqlMinMaxAggFunction.java | 49 +
.../functions/HiveSqlSumAggFunction.java | 125 ++
.../rules/HiveAggregateJoinTransposeRule.java | 372 ++++
.../translator/SqlFunctionConverter.java | 40 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 86 +-
.../ql/optimizer/physical/Vectorizer.java.orig | 1744 +++++++++++++++++
.../ql/optimizer/physical/Vectorizer.java.rej | 86 +
.../hive/ql/optimizer/ppr/PartitionPruner.java | 26 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 8 +-
.../hadoop/hive/ql/plan/VectorGroupByDesc.java | 10 +
.../hadoop/hive/ql/session/SessionState.java | 3 +
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 182 +-
.../hive/ql/stats/jdbc/JDBCStatsPublisher.java | 13 +-
.../ql/stats/jdbc/JDBCStatsSetupConstants.java | 4 +-
.../hive/ql/udf/generic/GenericUDAFSum.java | 2 +-
.../udf/generic/GenericUDAFSumEmptyIsZero.java | 63 +
.../hive/ql/udf/generic/GenericUDFIf.java | 4 +-
.../exec/vector/TestVectorizationContext.java | 4 +-
.../TestVectorConditionalExpressions.java | 3 +-
.../hive/ql/io/orc/TestInputOutputFormat.java | 19 +
.../TestNegativePartitionPrunerCompactExpr.java | 27 +
.../TestPositivePartitionPrunerCompactExpr.java | 115 ++
.../test/queries/clientnegative/ctasnullcol.q | 2 +
.../queries/clientpositive/avrocountemptytbl.q | 8 +
.../clientpositive/groupby_join_pushdown.q | 55 +
.../clientpositive/vector_auto_smb_mapjoin_14.q | 297 +++
.../clientpositive/vector_groupby_reduce.q | 62 +-
.../queries/clientpositive/vector_struct_in.q | 247 +++
.../clientpositive/vectorization_limit.q | 4 +-
.../results/clientnegative/ctasnullcol.q.out | 5 +
.../clientpositive/avrocountemptytbl.q.out | 58 +
.../clientpositive/groupby_join_pushdown.q.out | 1522 +++++++++++++++
.../results/clientpositive/show_functions.q.out | 1 +
.../tez/vector_auto_smb_mapjoin_14.q.out | 1576 +++++++++++++++
.../tez/vector_groupby_reduce.q.out | 1452 +++++++++++++-
.../tez/vectorization_limit.q.out | 8 +-
.../vector_auto_smb_mapjoin_14.q.out | 1792 ++++++++++++++++++
.../clientpositive/vector_groupby_reduce.q.out | 1466 +++++++++++++-
.../clientpositive/vector_struct_in.q.out | 825 ++++++++
.../clientpositive/vectorization_limit.q.out | 8 +-
.../hadoop/hive/serde2/avro/AvroSerdeUtils.java | 24 +-
shims/0.23/pom.xml | 1 -
.../apache/hadoop/hive/shims/Hadoop23Shims.java | 23 +-
76 files changed, 13371 insertions(+), 506 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/packaging/src/main/assembly/bin.xml
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c5b4ced/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
[05/25] hive git commit: HIVE-11794 : GBY vectorization appears to
process COMPLETE reduce-side GBY incorrectly (Sergey Shelukhin,
reviewed by Matt McCline)
Posted by pr...@apache.org.
HIVE-11794 : GBY vectorization appears to process COMPLETE reduce-side GBY incorrectly (Sergey Shelukhin, reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/072c5a0b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/072c5a0b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/072c5a0b
Branch: refs/heads/llap
Commit: 072c5a0bce78cde0124d98a1243392cdee2f2f3e
Parents: e82bf25
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Sep 22 18:13:15 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Sep 22 18:13:15 2015 -0700
----------------------------------------------------------------------
.../ql/exec/vector/VectorGroupByOperator.java | 5 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 16 +-
.../hive/ql/plan/ExprNodeGenericFuncDesc.java | 10 +-
.../hadoop/hive/ql/plan/VectorGroupByDesc.java | 10 +
.../clientpositive/vector_groupby_reduce.q | 62 +-
.../clientpositive/vectorization_limit.q | 4 +-
.../tez/vector_groupby_reduce.q.out | 1452 ++++++++++++++++-
.../tez/vectorization_limit.q.out | 8 +-
.../clientpositive/vector_groupby_reduce.q.out | 1466 +++++++++++++++++-
.../clientpositive/vectorization_limit.q.out | 8 +-
10 files changed, 2958 insertions(+), 83 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 917f406..7a552b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -625,8 +625,7 @@ public class VectorGroupByOperator extends Operator<GroupByDesc> implements
rowsToFlush[flushMark] = currentStreamingAggregators;
if (keysToFlush[flushMark] == null) {
keysToFlush[flushMark] = (VectorHashKeyWrapper) streamingKey.copyKey();
- }
- else {
+ } else {
streamingKey.duplicateTo(keysToFlush[flushMark]);
}
@@ -836,6 +835,8 @@ public class VectorGroupByOperator extends Operator<GroupByDesc> implements
} else if (conf.getVectorDesc().isReduceMergePartial()) {
// Sorted GroupBy of vector batches where an individual batch has the same group key (e.g. reduce).
processingMode = this.new ProcessingModeReduceMergePartialKeys();
+ } else if (conf.getVectorDesc().isReduceStreaming()) {
+ processingMode = this.new ProcessingModeUnsortedStreaming();
} else {
// We start in hash mode and may dynamically switch to unsorted stream mode.
processingMode = this.new ProcessingModeHashAggregate();
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 2e3bd76..0d4c1d8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1139,8 +1139,6 @@ public class Vectorizer implements PhysicalPlanResolver {
return false;
}
- boolean isMergePartial = (desc.getMode() != GroupByDesc.Mode.HASH);
-
if (!isReduce) {
// MapWork
@@ -1153,12 +1151,15 @@ public class Vectorizer implements PhysicalPlanResolver {
// ReduceWork
- if (isMergePartial) {
+ boolean isComplete = desc.getMode() == GroupByDesc.Mode.COMPLETE;
+ if (desc.getMode() != GroupByDesc.Mode.HASH) {
// Reduce Merge-Partial GROUP BY.
// A merge-partial GROUP BY is fed by grouping by keys from reduce-shuffle. It is the
// first (or root) operator for its reduce task.
+ // TODO: Technically, we should also handle FINAL, PARTIAL1, PARTIAL2 and PARTIALS
+ // that are not hash or complete, but aren't merge-partial, somehow.
if (desc.isDistinct()) {
LOG.info("Vectorized Reduce MergePartial GROUP BY does not support DISTINCT");
@@ -1174,7 +1175,7 @@ public class Vectorizer implements PhysicalPlanResolver {
}
if (hasKeys) {
- if (op.getParentOperators().size() > 0) {
+ if (op.getParentOperators().size() > 0 && !isComplete) {
LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle a key group when it is fed by reduce-shuffle");
return false;
}
@@ -1187,7 +1188,11 @@ public class Vectorizer implements PhysicalPlanResolver {
} else {
LOG.info("Vectorized Reduce MergePartial GROUP BY will do global aggregation");
}
- vectorDesc.setIsReduceMergePartial(true);
+ if (!isComplete) {
+ vectorDesc.setIsReduceMergePartial(true);
+ } else {
+ vectorDesc.setIsReduceStreaming(true);
+ }
} else {
// Reduce Hash GROUP BY or global aggregation.
@@ -1259,6 +1264,7 @@ public class Vectorizer implements PhysicalPlanResolver {
ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
boolean r = validateGenericUdf(d);
if (!r) {
+ LOG.info("Cannot vectorize UDF " + d);
return false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
index 4b2c1ad..b5d2ddf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
@@ -136,11 +136,13 @@ public class ExprNodeGenericFuncDesc extends ExprNodeDesc implements
StringBuilder sb = new StringBuilder();
sb.append(genericUDF.getClass().getSimpleName());
sb.append("(");
- for (int i = 0; i < chidren.size(); i++) {
- if (i > 0) {
- sb.append(", ");
+ if (chidren != null) {
+ for (int i = 0; i < chidren.size(); i++) {
+ if (i > 0) {
+ sb.append(", ");
+ }
+ sb.append(chidren.get(i));
}
- sb.append(chidren.get(i).toString());
}
sb.append(")");
return sb.toString();
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
index 7e791f2..e613a4e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
@@ -34,6 +34,8 @@ public class VectorGroupByDesc extends AbstractVectorDesc {
private boolean isVectorOutput;
+ private boolean isReduceStreaming;
+
public VectorGroupByDesc() {
this.isReduceMergePartial = false;
this.isVectorOutput = false;
@@ -54,4 +56,12 @@ public class VectorGroupByDesc extends AbstractVectorDesc {
public void setVectorOutput(boolean isVectorOutput) {
this.isVectorOutput = isVectorOutput;
}
+
+ public void setIsReduceStreaming(boolean isReduceStreaming) {
+ this.isReduceStreaming = isReduceStreaming;
+ }
+
+ public boolean isReduceStreaming() {
+ return isReduceStreaming;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/queries/clientpositive/vector_groupby_reduce.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_reduce.q b/ql/src/test/queries/clientpositive/vector_groupby_reduce.q
index 1438c29..8fe6b7e 100644
--- a/ql/src/test/queries/clientpositive/vector_groupby_reduce.q
+++ b/ql/src/test/queries/clientpositive/vector_groupby_reduce.q
@@ -105,12 +105,11 @@ from
group by ss_ticket_number
limit 20;
--- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently,
--- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't
--- vectorize the 2nd GROUP BY...
+
+
explain
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -118,10 +117,10 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20;
+order by m;
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -129,5 +128,54 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20;
+order by m;
+
+
+
+explain
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number;
+
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number;
+
+
+explain
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk;
+
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk;
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/queries/clientpositive/vectorization_limit.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorization_limit.q b/ql/src/test/queries/clientpositive/vectorization_limit.q
index 8799087..f261a36 100644
--- a/ql/src/test/queries/clientpositive/vectorization_limit.q
+++ b/ql/src/test/queries/clientpositive/vectorization_limit.q
@@ -23,8 +23,8 @@ select distinct(ctinyint) from alltypesorc limit 20;
select distinct(ctinyint) from alltypesorc limit 20;
explain
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20;
-- limit zero
explain
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
index 814ee39..fe7e829 100644
--- a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
@@ -321,12 +321,9 @@ POSTHOOK: Input: default@store_sales
18
19
20
-PREHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently,
--- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't
--- vectorize the 2nd GROUP BY...
-explain
+PREHOOK: query: explain
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -334,14 +331,11 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
PREHOOK: type: QUERY
-POSTHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently,
--- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't
--- vectorize the 2nd GROUP BY...
-explain
+POSTHOOK: query: explain
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -349,7 +343,7 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -360,6 +354,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -399,25 +394,34 @@ STAGE PLANS:
expressions: _col1 (type: int)
outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
- limit: 20
+ limit: -1
Processor Tree:
ListSink
PREHOOK: query: select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -425,12 +429,12 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
PREHOOK: type: QUERY
PREHOOK: Input: default@store_sales
#### A masked pattern was here ####
POSTHOOK: query: select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -438,7 +442,7 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
POSTHOOK: type: QUERY
POSTHOOK: Input: default@store_sales
#### A masked pattern was here ####
@@ -462,3 +466,1397 @@ POSTHOOK: Input: default@store_sales
18
19
20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+PREHOOK: query: explain
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2)
+ keys: _col0 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+1 85411 816
+2 157365 812
+3 147948 710
+4 69545 411
+5 163232 840
+6 86307 627
+7 114874 563
+8 117953 662
+9 173250 690
+10 60338 602
+11 138545 657
+12 97181 586
+13 109484 555
+14 137333 442
+15 176829 652
+16 115004 654
+17 105008 460
+18 165135 738
+19 128252 831
+20 104789 374
+21 72771 469
+22 128153 449
+23 110253 603
+24 100662 1029
+25 118714 760
+26 81596 502
+27 164068 871
+28 58632 409
+29 133777 417
+30 130451 772
+31 114967 586
+32 142021 592
+33 151818 691
+34 112559 662
+35 137027 780
+36 118285 538
+37 94528 401
+38 81368 521
+39 101064 937
+40 84435 480
+41 112444 688
+42 95731 840
+43 57298 410
+44 159880 839
+45 68919 474
+46 111212 374
+47 78210 416
+48 94459 445
+49 90879 589
+50 37821 407
+51 124927 612
+52 98099 489
+53 138706 609
+54 87478 354
+55 90290 406
+56 78812 372
+57 101175 597
+58 88044 202
+59 104582 753
+60 99218 900
+61 66514 392
+62 126713 527
+63 98778 648
+64 131659 380
+65 86990 494
+66 108808 492
+67 75250 711
+68 91671 548
+69 92821 405
+70 75021 319
+71 124484 748
+72 161470 744
+73 104358 621
+74 88609 688
+75 92940 649
+76 75853 580
+77 124755 873
+78 98285 573
+79 160595 581
+80 151471 704
+81 105109 429
+82 55611 254
+PREHOOK: query: explain
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+1 49 5
+1 173 65
+1 1553 50
+1 3248 58
+1 3617 79
+1 4553 100
+1 4583 72
+1 4682 44
+1 5527 88
+1 5981 14
+1 10993 91
+1 13283 37
+1 13538 14
+1 13631 99
+2 1363 4
+2 2930 36
+2 3740 49
+2 6928 65
+2 7654 25
+2 9436 79
+2 10768 30
+2 12068 74
+2 12223 78
+2 13340 71
+2 13927 93
+2 14701 58
+2 15085 88
+2 15782 62
+2 17420 NULL
+3 246 96
+3 1531 NULL
+3 3525 42
+3 4698 98
+3 5355 53
+3 10693 27
+3 12447 82
+3 13021 64
+3 14100 79
+3 14443 4
+3 15786 56
+3 16869 4
+3 17263 17
+3 17971 88
+4 163 17
+4 1576 74
+4 5350 86
+4 5515 23
+4 6988 23
+4 7990 56
+4 8452 27
+4 9685 21
+4 11036 41
+4 12790 43
+5 1808 NULL
+5 1940 60
+5 5842 50
+5 6068 76
+5 6466 36
+5 11324 52
+5 11590 15
+5 12650 66
+5 13562 64
+5 13958 60
+5 14599 83
+5 14686 91
+5 15752 66
+5 16195 50
+5 16792 71
+6 2549 62
+6 2647 100
+6 3049 31
+6 3291 100
+6 6437 72
+6 8621 NULL
+6 10355 94
+6 10895 1
+6 11705 61
+6 13245 64
+6 13513 42
+7 4627 9
+7 4795 73
+7 4833 88
+7 5183 51
+7 5905 69
+7 8955 54
+7 9751 4
+7 10487 52
+7 12571 82
+7 15179 12
+7 15333 NULL
+7 17255 69
+8 665 31
+8 4183 90
+8 5929 83
+8 7115 54
+8 11365 7
+8 11893 95
+8 12041 95
+8 13427 87
+8 16671 20
+8 17119 51
+8 17545 49
+9 69 11
+9 889 6
+9 1185 62
+9 4623 34
+9 7945 83
+9 8334 71
+9 12027 27
+9 12969 59
+9 13483 NULL
+9 13717 53
+9 15133 15
+9 16083 32
+9 16363 54
+9 16461 66
+9 16659 84
+9 17310 33
+10 755 74
+10 1425 92
+10 1511 76
+10 3433 83
+10 3933 52
+10 4357 17
+10 5863 47
+10 9811 28
+10 13803 66
+10 15447 67
+11 157 84
+11 1315 70
+11 7519 68
+11 7608 66
+11 9901 57
+11 10699 33
+11 11490 NULL
+11 11991 38
+11 12438 16
+11 15157 96
+11 15649 33
+11 17226 11
+11 17395 85
+12 373 57
+12 1591 82
+12 4888 56
+12 6148 36
+12 6248 36
+12 9616 66
+12 9788 73
+12 13399 46
+12 14746 26
+12 14944 9
+12 15440 99
+13 868 NULL
+13 1760 12
+13 1898 NULL
+13 2108 9
+13 2191 NULL
+13 4430 73
+13 5971 80
+13 6085 58
+13 6140 15
+13 6682 80
+13 7640 48
+13 7723 27
+13 10096 12
+13 11758 34
+13 16894 87
+13 17240 20
+14 177 41
+14 769 20
+14 4507 4
+14 10175 19
+14 11549 6
+14 11653 60
+14 11817 81
+14 12587 NULL
+14 13069 77
+14 13515 57
+14 13845 17
+14 16741 46
+14 16929 14
+15 4241 21
+15 4505 59
+15 4777 28
+15 7391 98
+15 8336 15
+15 8353 NULL
+15 8690 32
+15 8707 21
+15 10361 39
+15 11659 80
+15 13172 25
+15 16619 81
+15 17267 7
+15 17330 82
+15 17564 26
+15 17857 38
+16 457 60
+16 1888 4
+16 4144 94
+16 6008 59
+16 7504 51
+16 8887 35
+16 9769 42
+16 9790 17
+16 9997 94
+16 11168 86
+16 11920 29
+16 16226 13
+16 17246 70
+17 2092 37
+17 4678 34
+17 6811 70
+17 9214 57
+17 10543 54
+17 11203 21
+17 13177 45
+17 13826 32
+17 15781 76
+17 17683 34
+18 2440 40
+18 5251 41
+18 7378 94
+18 8779 9
+18 8884 18
+18 9886 62
+18 11584 76
+18 11890 7
+18 12602 81
+18 12826 93
+18 12860 18
+18 14011 95
+18 14372 76
+18 14377 15
+18 17995 13
+19 1094 48
+19 3133 96
+19 3376 84
+19 4882 84
+19 6772 97
+19 7087 1
+19 7814 29
+19 8662 97
+19 9094 49
+19 9346 39
+19 10558 82
+19 10651 46
+19 11914 59
+19 16330 NULL
+19 17539 20
+20 1451 89
+20 2618 4
+20 5312 9
+20 5425 15
+20 5483 8
+20 6026 21
+20 7207 90
+20 8714 NULL
+20 9086 4
+20 9800 32
+20 13601 17
+20 14935 NULL
+20 15131 85
+21 230 48
+21 1810 59
+21 2870 50
+21 5170 45
+21 5998 51
+21 6476 49
+21 9187 14
+21 12266 47
+21 14368 18
+21 14396 88
+22 9985 70
+22 10474 31
+22 11599 66
+22 12415 10
+22 15310 15
+22 16396 85
+22 16922 88
+22 17392 14
+22 17660 70
+23 319 86
+23 7242 37
+23 8181 13
+23 8413 1
+23 9093 38
+23 9097 81
+23 11220 91
+23 11257 64
+23 12397 80
+23 15403 96
+23 17631 16
+24 407 53
+24 1389 72
+24 1795 21
+24 2497 85
+24 3103 73
+24 4425 57
+24 4749 28
+24 4873 41
+24 5653 92
+24 6043 1
+24 6751 82
+24 7375 97
+24 10265 93
+24 11551 48
+24 13303 97
+24 16483 89
+25 1333 55
+25 2150 100
+25 2608 76
+25 3454 100
+25 4880 29
+25 5954 34
+25 6955 40
+25 7874 65
+25 9472 48
+25 10159 24
+25 14488 26
+25 14635 68
+25 17000 40
+25 17752 55
+26 1989 26
+26 5053 4
+26 5385 97
+26 5721 81
+26 6647 64
+26 7337 45
+26 9679 18
+26 11895 77
+26 12851 56
+26 15039 34
+27 1305 44
+27 2137 96
+27 2671 92
+27 5831 61
+27 7139 59
+27 8167 28
+27 10757 15
+27 11441 15
+27 11509 65
+27 12237 89
+27 12749 31
+27 13885 66
+27 15025 26
+27 16029 59
+27 16419 65
+27 16767 60
+28 1807 98
+28 2817 8
+28 2967 29
+28 4483 78
+28 5437 15
+28 6411 3
+28 7965 93
+28 8043 58
+28 8407 14
+28 10295 13
+29 20 18
+29 1363 75
+29 2930 23
+29 3740 5
+29 7654 20
+29 9458 33
+29 10795 33
+29 12068 37
+29 12223 59
+29 13340 21
+29 13693 NULL
+29 15085 40
+29 15626 NULL
+29 15782 53
+30 217 91
+30 1951 59
+30 3238 16
+30 3506 15
+30 3928 87
+30 5431 77
+30 6752 69
+30 7870 7
+30 8666 21
+30 12572 33
+30 12670 20
+30 13579 75
+30 14848 62
+30 17348 62
+30 17875 78
+31 913 54
+31 4963 67
+31 6617 11
+31 6917 4
+31 7513 82
+31 11739 95
+31 14575 97
+31 14727 41
+31 15341 31
+31 15411 53
+31 16251 51
+32 1115 61
+32 2095 34
+32 2887 8
+32 4339 6
+32 4537 22
+32 4808 NULL
+32 5798 87
+32 7547 24
+32 9683 26
+32 11005 46
+32 11348 41
+32 12134 21
+32 15001 57
+32 15644 34
+32 16421 74
+32 17659 51
+33 4798 27
+33 7300 3
+33 9649 36
+33 10376 21
+33 11119 92
+33 11756 26
+33 12643 89
+33 12760 54
+33 12964 80
+33 14125 66
+33 14158 82
+33 14692 93
+33 15478 22
+34 1526 91
+34 1717 53
+34 2312 6
+34 4118 88
+34 5197 63
+34 5449 9
+34 6193 61
+34 9325 3
+34 9766 83
+34 12016 42
+34 12290 53
+34 12512 60
+34 13814 20
+34 16324 30
+35 411 51
+35 2377 52
+35 3667 97
+35 4325 56
+35 5179 83
+35 11635 87
+35 11661 81
+35 14239 55
+35 15619 45
+35 15757 9
+35 17341 92
+35 17365 65
+35 17451 7
+36 1115 80
+36 2095 43
+36 2887 31
+36 7547 46
+36 11005 49
+36 11349 80
+36 15001 54
+36 15645 23
+36 16421 25
+36 17561 16
+36 17659 91
+37 2997 94
+37 7283 87
+37 10715 52
+37 10929 88
+37 13171 6
+37 15337 62
+37 16971 12
+37 17125 NULL
+38 757 2
+38 2164 17
+38 3439 84
+38 4154 35
+38 5113 73
+38 6220 98
+38 7018 15
+38 7784 56
+38 8870 15
+38 9710 7
+38 10441 62
+38 15698 57
+39 386 89
+39 1598 64
+39 3476 73
+39 3943 64
+39 4190 86
+39 4957 24
+39 5393 98
+39 7097 78
+39 7118 67
+39 7604 49
+39 7697 24
+39 8078 54
+39 8411 96
+39 15491 54
+39 15625 17
+40 2854 71
+40 3490 65
+40 3985 63
+40 5098 35
+40 5318 87
+40 10094 80
+40 10912 23
+40 12050 NULL
+40 13658 53
+40 16976 3
+41 10 50
+41 64 29
+41 3380 88
+41 5566 11
+41 6310 90
+41 7402 69
+41 7603 94
+41 9322 8
+41 10915 81
+41 14788 15
+41 15242 87
+41 15328 46
+41 16514 20
+42 619 69
+42 976 100
+42 1436 94
+42 2314 74
+42 2392 14
+42 2602 30
+42 3346 74
+42 3613 30
+42 6058 30
+42 6134 92
+42 8462 23
+42 9740 52
+42 10016 57
+42 10471 19
+42 12550 41
+42 15002 41
+43 2923 16
+43 3344 22
+43 3911 26
+43 4364 77
+43 4691 41
+43 5773 85
+43 5852 16
+43 11771 30
+43 14669 97
+44 2351 56
+44 2623 18
+44 7303 14
+44 7527 67
+44 9059 68
+44 11707 83
+44 12341 20
+44 13331 98
+44 13449 45
+44 14149 80
+44 15803 81
+44 16491 56
+44 16837 92
+44 16909 61
+45 811 62
+45 1479 49
+45 3265 98
+45 5309 18
+45 7363 87
+45 10115 68
+45 11095 40
+45 13133 46
+45 16349 6
+46 1960 12
+46 3010 67
+46 7040 33
+46 8065 NULL
+46 11426 72
+46 13042 58
+46 15595 32
+46 16540 30
+46 17150 57
+46 17384 13
+47 254 NULL
+47 481 30
+47 1132 66
+47 1916 71
+47 3085 51
+47 3202 7
+47 3878 NULL
+47 4774 11
+47 5008 82
+47 5305 NULL
+47 5468 7
+47 7214 1
+47 9770 33
+47 13246 47
+47 13477 10
+48 1761 22
+48 2820 4
+48 2829 65
+48 4431 39
+48 5971 29
+48 6085 1
+48 6684 44
+48 9199 88
+48 11259 NULL
+48 12468 62
+48 13153 74
+48 17799 17
+49 749 60
+49 2135 4
+49 5342 69
+49 5852 47
+49 6805 40
+49 7141 94
+49 9049 68
+49 9553 71
+49 12737 48
+49 15155 84
+49 16361 4
+50 1280 69
+50 1312 30
+50 1909 53
+50 1984 40
+50 3097 64
+50 5023 NULL
+50 7135 69
+50 16081 82
+51 422 21
+51 3091 28
+51 4687 6
+51 5029 12
+51 5059 51
+51 6565 33
+51 8384 79
+51 9311 90
+51 10133 54
+51 11234 NULL
+51 12625 53
+51 13199 97
+51 17483 22
+51 17705 66
+52 2420 90
+52 3334 73
+52 6098 NULL
+52 7606 45
+52 11488 76
+52 15649 29
+52 16646 48
+52 17402 91
+52 17456 37
+53 1114 40
+53 2095 62
+53 2786 70
+53 2887 39
+53 7546 58
+53 11348 38
+53 13220 76
+53 13795 38
+53 15991 37
+53 16420 14
+53 16648 79
+53 17296 43
+53 17560 15
+54 702 40
+54 825 50
+54 1165 62
+54 3861 NULL
+54 6517 40
+54 9159 75
+54 14737 38
+54 16059 15
+54 16974 NULL
+54 17479 34
+55 1339 16
+55 3001 7
+55 5137 33
+55 9703 44
+55 12170 92
+55 12205 90
+55 14135 36
+55 14923 71
+55 17677 17
+56 4242 2
+56 4506 57
+56 8353 35
+56 8691 59
+56 8707 68
+56 10362 54
+56 16620 23
+56 17331 74
+57 3253 71
+57 4028 88
+57 4933 22
+57 12596 91
+57 12721 62
+57 12740 52
+57 15182 86
+57 17729 26
+57 17993 99
+58 1829 52
+58 3848 6
+58 5117 2
+58 7649 19
+58 9743 62
+58 10802 14
+58 15635 6
+58 16472 6
+58 16949 35
+59 3133 92
+59 3546 22
+59 5772 70
+59 7087 80
+59 8010 46
+59 8335 36
+59 9348 62
+59 9397 92
+59 10651 100
+59 11916 19
+59 12858 90
+59 14529 44
+60 97 50
+60 555 62
+60 633 71
+60 999 43
+60 1117 78
+60 1573 90
+60 4041 25
+60 4235 28
+60 4513 72
+60 4937 22
+60 7231 95
+60 10277 62
+60 10393 75
+60 13975 14
+60 16887 25
+60 17755 88
+61 1106 4
+61 2264 36
+61 3362 48
+61 4567 26
+61 5528 78
+61 6380 77
+61 7591 78
+61 8924 11
+61 10330 8
+61 16462 26
+62 4093 94
+62 6403 NULL
+62 8457 37
+62 10149 75
+62 12163 29
+62 12199 5
+62 12407 NULL
+62 13559 80
+62 15399 74
+62 15733 40
+62 16151 93
+63 4488 73
+63 5079 79
+63 5217 66
+63 5658 99
+63 9319 80
+63 11370 38
+63 11946 85
+63 13339 19
+63 15793 40
+63 16569 69
+64 1213 NULL
+64 3090 87
+64 3963 NULL
+64 11835 82
+64 13224 NULL
+64 14407 8
+64 15867 59
+64 15936 30
+64 16921 19
+64 17586 78
+64 17617 17
+65 2287 100
+65 4227 42
+65 9625 51
+65 9847 54
+65 13897 40
+65 14905 85
+65 15177 55
+65 17025 67
+66 6507 76
+66 7033 65
+66 7227 66
+66 8197 41
+66 9237 29
+66 10019 10
+66 11419 66
+66 15629 20
+66 16745 91
+66 16795 28
+67 757 77
+67 2133 74
+67 3439 73
+67 4155 87
+67 5113 NULL
+67 7020 79
+67 7507 77
+67 8469 59
+67 8871 71
+67 12087 70
+67 15699 44
+68 1387 74
+68 1603 57
+68 1820 54
+68 2035 22
+68 2296 52
+68 2564 83
+68 5162 23
+68 6763 77
+68 7765 NULL
+68 12526 3
+68 12724 88
+68 17426 2
+68 17600 13
+69 322 45
+69 337 34
+69 4208 9
+69 4267 10
+69 6136 7
+69 7264 67
+69 7822 30
+69 8599 53
+69 11137 68
+69 13489 66
+69 13792 NULL
+69 15448 16
+70 1592 53
+70 2462 NULL
+70 3296 48
+70 3947 NULL
+70 6185 82
+70 6425 NULL
+70 8893 17
+70 9857 20
+70 14549 4
+70 17815 95
+71 457 75
+71 1888 4
+71 2098 51
+71 4144 49
+71 5858 NULL
+71 6008 54
+71 7504 3
+71 8887 10
+71 9274 36
+71 9769 79
+71 9790 96
+71 9997 26
+71 10108 66
+71 10288 30
+71 11168 79
+71 17246 90
+72 1535 9
+72 5917 85
+72 6113 45
+72 6671 13
+72 9860 26
+72 10427 66
+72 10753 16
+72 11741 62
+72 12788 29
+72 12901 57
+72 13085 94
+72 13423 62
+72 13904 37
+72 15587 87
+72 16765 56
+73 247 53
+73 1063 37
+73 3205 82
+73 4946 54
+73 6862 58
+73 10051 49
+73 12502 75
+73 15109 38
+73 16519 97
+73 16585 38
+73 17269 40
+74 326 29
+74 3104 78
+74 3175 23
+74 3278 NULL
+74 3542 96
+74 3754 26
+74 5492 54
+74 7694 17
+74 8653 12
+74 9620 95
+74 10069 99
+74 13208 87
+74 16694 72
+75 607 20
+75 2948 25
+75 4625 73
+75 6938 89
+75 6953 71
+75 8726 6
+75 9905 54
+75 10217 85
+75 11039 70
+75 14186 63
+75 16796 93
+76 257 5
+76 465 2
+76 1107 16
+76 1503 97
+76 2265 98
+76 2869 32
+76 3363 25
+76 4237 48
+76 4567 40
+76 5529 78
+76 6381 50
+76 7591 27
+76 8925 6
+76 10331 3
+76 16463 53
+77 992 62
+77 1399 34
+77 2713 85
+77 3868 89
+77 6289 30
+77 7339 88
+77 7448 95
+77 7486 49
+77 8686 38
+77 9220 90
+77 11918 36
+77 12439 95
+77 13456 48
+77 14815 18
+77 16687 16
+78 901 3
+78 3304 50
+78 3856 27
+78 5965 78
+78 6044 59
+78 6110 43
+78 6500 76
+78 7576 87
+78 8611 79
+78 10507 6
+78 11209 7
+78 12706 19
+78 14996 39
+79 247 NULL
+79 1063 85
+79 3205 48
+79 4947 35
+79 6864 1
+79 10051 10
+79 10524 36
+79 12504 81
+79 14322 41
+79 15109 NULL
+79 15498 3
+79 15888 58
+79 16519 9
+79 16585 93
+79 17269 81
+80 998 93
+80 1519 25
+80 1573 40
+80 4040 66
+80 4513 NULL
+80 4622 1
+80 7231 49
+80 7610 37
+80 10393 5
+80 12968 NULL
+80 13717 91
+80 13975 13
+80 16363 84
+80 16886 77
+80 17308 29
+80 17755 94
+81 4486 31
+81 5078 75
+81 5216 64
+81 5656 24
+81 7166 7
+81 7663 79
+81 8918 37
+81 9319 36
+81 11107 36
+81 11368 26
+81 13339 6
+81 15793 8
+82 2572 53
+82 7862 75
+82 13138 59
+82 14998 49
+82 17041 18
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
index 33f7ed9..fec2d2c 100644
--- a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
@@ -319,10 +319,10 @@ NULL
-47
-46
PREHOOK: query: explain
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -385,11 +385,11 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
index 331ba4f..fc1997c 100644
--- a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
+++ b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
@@ -314,12 +314,9 @@ POSTHOOK: Input: default@store_sales
18
19
20
-PREHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently,
--- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't
--- vectorize the 2nd GROUP BY...
-explain
+PREHOOK: query: explain
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -327,14 +324,11 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
PREHOOK: type: QUERY
-POSTHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently,
--- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't
--- vectorize the 2nd GROUP BY...
-explain
+POSTHOOK: query: explain
select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -342,11 +336,12 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -386,25 +381,42 @@ STAGE PLANS:
expressions: _col1 (type: int)
outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
- limit: 20
+ limit: -1
Processor Tree:
ListSink
PREHOOK: query: select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -412,12 +424,12 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
PREHOOK: type: QUERY
PREHOOK: Input: default@store_sales
#### A masked pattern was here ####
POSTHOOK: query: select
- min(ss_ticket_number)
+ min(ss_ticket_number) m
from
(select
ss_ticket_number
@@ -425,7 +437,7 @@ from
store_sales
group by ss_ticket_number) a
group by ss_ticket_number
-limit 20
+order by m
POSTHOOK: type: QUERY
POSTHOOK: Input: default@store_sales
#### A masked pattern was here ####
@@ -449,3 +461,1401 @@ POSTHOOK: Input: default@store_sales
18
19
20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+PREHOOK: query: explain
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2)
+ keys: _col0 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ ss_ticket_number, sum(ss_item_sk), sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number
+order by ss_ticket_number
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+1 85411 816
+2 157365 812
+3 147948 710
+4 69545 411
+5 163232 840
+6 86307 627
+7 114874 563
+8 117953 662
+9 173250 690
+10 60338 602
+11 138545 657
+12 97181 586
+13 109484 555
+14 137333 442
+15 176829 652
+16 115004 654
+17 105008 460
+18 165135 738
+19 128252 831
+20 104789 374
+21 72771 469
+22 128153 449
+23 110253 603
+24 100662 1029
+25 118714 760
+26 81596 502
+27 164068 871
+28 58632 409
+29 133777 417
+30 130451 772
+31 114967 586
+32 142021 592
+33 151818 691
+34 112559 662
+35 137027 780
+36 118285 538
+37 94528 401
+38 81368 521
+39 101064 937
+40 84435 480
+41 112444 688
+42 95731 840
+43 57298 410
+44 159880 839
+45 68919 474
+46 111212 374
+47 78210 416
+48 94459 445
+49 90879 589
+50 37821 407
+51 124927 612
+52 98099 489
+53 138706 609
+54 87478 354
+55 90290 406
+56 78812 372
+57 101175 597
+58 88044 202
+59 104582 753
+60 99218 900
+61 66514 392
+62 126713 527
+63 98778 648
+64 131659 380
+65 86990 494
+66 108808 492
+67 75250 711
+68 91671 548
+69 92821 405
+70 75021 319
+71 124484 748
+72 161470 744
+73 104358 621
+74 88609 688
+75 92940 649
+76 75853 580
+77 124755 873
+78 98285 573
+79 160595 581
+80 151471 704
+81 105109 429
+82 55611 254
+PREHOOK: query: explain
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: store_sales
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ ss_ticket_number, ss_item_sk, sum(q)
+from
+ (select
+ ss_ticket_number, ss_item_sk, min(ss_quantity) q
+ from
+ store_sales
+ group by ss_ticket_number, ss_item_sk) a
+group by ss_ticket_number, ss_item_sk
+order by ss_ticket_number, ss_item_sk
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+1 49 5
+1 173 65
+1 1553 50
+1 3248 58
+1 3617 79
+1 4553 100
+1 4583 72
+1 4682 44
+1 5527 88
+1 5981 14
+1 10993 91
+1 13283 37
+1 13538 14
+1 13631 99
+2 1363 4
+2 2930 36
+2 3740 49
+2 6928 65
+2 7654 25
+2 9436 79
+2 10768 30
+2 12068 74
+2 12223 78
+2 13340 71
+2 13927 93
+2 14701 58
+2 15085 88
+2 15782 62
+2 17420 NULL
+3 246 96
+3 1531 NULL
+3 3525 42
+3 4698 98
+3 5355 53
+3 10693 27
+3 12447 82
+3 13021 64
+3 14100 79
+3 14443 4
+3 15786 56
+3 16869 4
+3 17263 17
+3 17971 88
+4 163 17
+4 1576 74
+4 5350 86
+4 5515 23
+4 6988 23
+4 7990 56
+4 8452 27
+4 9685 21
+4 11036 41
+4 12790 43
+5 1808 NULL
+5 1940 60
+5 5842 50
+5 6068 76
+5 6466 36
+5 11324 52
+5 11590 15
+5 12650 66
+5 13562 64
+5 13958 60
+5 14599 83
+5 14686 91
+5 15752 66
+5 16195 50
+5 16792 71
+6 2549 62
+6 2647 100
+6 3049 31
+6 3291 100
+6 6437 72
+6 8621 NULL
+6 10355 94
+6 10895 1
+6 11705 61
+6 13245 64
+6 13513 42
+7 4627 9
+7 4795 73
+7 4833 88
+7 5183 51
+7 5905 69
+7 8955 54
+7 9751 4
+7 10487 52
+7 12571 82
+7 15179 12
+7 15333 NULL
+7 17255 69
+8 665 31
+8 4183 90
+8 5929 83
+8 7115 54
+8 11365 7
+8 11893 95
+8 12041 95
+8 13427 87
+8 16671 20
+8 17119 51
+8 17545 49
+9 69 11
+9 889 6
+9 1185 62
+9 4623 34
+9 7945 83
+9 8334 71
+9 12027 27
+9 12969 59
+9 13483 NULL
+9 13717 53
+9 15133 15
+9 16083 32
+9 16363 54
+9 16461 66
+9 16659 84
+9 17310 33
+10 755 74
+10 1425 92
+10 1511 76
+10 3433 83
+10 3933 52
+10 4357 17
+10 5863 47
+10 9811 28
+10 13803 66
+10 15447 67
+11 157 84
+11 1315 70
+11 7519 68
+11 7608 66
+11 9901 57
+11 10699 33
+11 11490 NULL
+11 11991 38
+11 12438 16
+11 15157 96
+11 15649 33
+11 17226 11
+11 17395 85
+12 373 57
+12 1591 82
+12 4888 56
+12 6148 36
+12 6248 36
+12 9616 66
+12 9788 73
+12 13399 46
+12 14746 26
+12 14944 9
+12 15440 99
+13 868 NULL
+13 1760 12
+13 1898 NULL
+13 2108 9
+13 2191 NULL
+13 4430 73
+13 5971 80
+13 6085 58
+13 6140 15
+13 6682 80
+13 7640 48
+13 7723 27
+13 10096 12
+13 11758 34
+13 16894 87
+13 17240 20
+14 177 41
+14 769 20
+14 4507 4
+14 10175 19
+14 11549 6
+14 11653 60
+14 11817 81
+14 12587 NULL
+14 13069 77
+14 13515 57
+14 13845 17
+14 16741 46
+14 16929 14
+15 4241 21
+15 4505 59
+15 4777 28
+15 7391 98
+15 8336 15
+15 8353 NULL
+15 8690 32
+15 8707 21
+15 10361 39
+15 11659 80
+15 13172 25
+15 16619 81
+15 17267 7
+15 17330 82
+15 17564 26
+15 17857 38
+16 457 60
+16 1888 4
+16 4144 94
+16 6008 59
+16 7504 51
+16 8887 35
+16 9769 42
+16 9790 17
+16 9997 94
+16 11168 86
+16 11920 29
+16 16226 13
+16 17246 70
+17 2092 37
+17 4678 34
+17 6811 70
+17 9214 57
+17 10543 54
+17 11203 21
+17 13177 45
+17 13826 32
+17 15781 76
+17 17683 34
+18 2440 40
+18 5251 41
+18 7378 94
+18 8779 9
+18 8884 18
+18 9886 62
+18 11584 76
+18 11890 7
+18 12602 81
+18 12826 93
+18 12860 18
+18 14011 95
+18 14372 76
+18 14377 15
+18 17995 13
+19 1094 48
+19 3133 96
+19 3376 84
+19 4882 84
+19 6772 97
+19 7087 1
+19 7814 29
+19 8662 97
+19 9094 49
+19 9346 39
+19 10558 82
+19 10651 46
+19 11914 59
+19 16330 NULL
+19 17539 20
+20 1451 89
+20 2618 4
+20 5312 9
+20 5425 15
+20 5483 8
+20 6026 21
+20 7207 90
+20 8714 NULL
+20 9086 4
+20 9800 32
+20 13601 17
+20 14935 NULL
+20 15131 85
+21 230 48
+21 1810 59
+21 2870 50
+21 5170 45
+21 5998 51
+21 6476 49
+21 9187 14
+21 12266 47
+21 14368 18
+21 14396 88
+22 9985 70
+22 10474 31
+22 11599 66
+22 12415 10
+22 15310 15
+22 16396 85
+22 16922 88
+22 17392 14
+22 17660 70
+23 319 86
+23 7242 37
+23 8181 13
+23 8413 1
+23 9093 38
+23 9097 81
+23 11220 91
+23 11257 64
+23 12397 80
+23 15403 96
+23 17631 16
+24 407 53
+24 1389 72
+24 1795 21
+24 2497 85
+24 3103 73
+24 4425 57
+24 4749 28
+24 4873 41
+24 5653 92
+24 6043 1
+24 6751 82
+24 7375 97
+24 10265 93
+24 11551 48
+24 13303 97
+24 16483 89
+25 1333 55
+25 2150 100
+25 2608 76
+25 3454 100
+25 4880 29
+25 5954 34
+25 6955 40
+25 7874 65
+25 9472 48
+25 10159 24
+25 14488 26
+25 14635 68
+25 17000 40
+25 17752 55
+26 1989 26
+26 5053 4
+26 5385 97
+26 5721 81
+26 6647 64
+26 7337 45
+26 9679 18
+26 11895 77
+26 12851 56
+26 15039 34
+27 1305 44
+27 2137 96
+27 2671 92
+27 5831 61
+27 7139 59
+27 8167 28
+27 10757 15
+27 11441 15
+27 11509 65
+27 12237 89
+27 12749 31
+27 13885 66
+27 15025 26
+27 16029 59
+27 16419 65
+27 16767 60
+28 1807 98
+28 2817 8
+28 2967 29
+28 4483 78
+28 5437 15
+28 6411 3
+28 7965 93
+28 8043 58
+28 8407 14
+28 10295 13
+29 20 18
+29 1363 75
+29 2930 23
+29 3740 5
+29 7654 20
+29 9458 33
+29 10795 33
+29 12068 37
+29 12223 59
+29 13340 21
+29 13693 NULL
+29 15085 40
+29 15626 NULL
+29 15782 53
+30 217 91
+30 1951 59
+30 3238 16
+30 3506 15
+30 3928 87
+30 5431 77
+30 6752 69
+30 7870 7
+30 8666 21
+30 12572 33
+30 12670 20
+30 13579 75
+30 14848 62
+30 17348 62
+30 17875 78
+31 913 54
+31 4963 67
+31 6617 11
+31 6917 4
+31 7513 82
+31 11739 95
+31 14575 97
+31 14727 41
+31 15341 31
+31 15411 53
+31 16251 51
+32 1115 61
+32 2095 34
+32 2887 8
+32 4339 6
+32 4537 22
+32 4808 NULL
+32 5798 87
+32 7547 24
+32 9683 26
+32 11005 46
+32 11348 41
+32 12134 21
+32 15001 57
+32 15644 34
+32 16421 74
+32 17659 51
+33 4798 27
+33 7300 3
+33 9649 36
+33 10376 21
+33 11119 92
+33 11756 26
+33 12643 89
+33 12760 54
+33 12964 80
+33 14125 66
+33 14158 82
+33 14692 93
+33 15478 22
+34 1526 91
+34 1717 53
+34 2312 6
+34 4118 88
+34 5197 63
+34 5449 9
+34 6193 61
+34 9325 3
+34 9766 83
+34 12016 42
+34 12290 53
+34 12512 60
+34 13814 20
+34 16324 30
+35 411 51
+35 2377 52
+35 3667 97
+35 4325 56
+35 5179 83
+35 11635 87
+35 11661 81
+35 14239 55
+35 15619 45
+35 15757 9
+35 17341 92
+35 17365 65
+35 17451 7
+36 1115 80
+36 2095 43
+36 2887 31
+36 7547 46
+36 11005 49
+36 11349 80
+36 15001 54
+36 15645 23
+36 16421 25
+36 17561 16
+36 17659 91
+37 2997 94
+37 7283 87
+37 10715 52
+37 10929 88
+37 13171 6
+37 15337 62
+37 16971 12
+37 17125 NULL
+38 757 2
+38 2164 17
+38 3439 84
+38 4154 35
+38 5113 73
+38 6220 98
+38 7018 15
+38 7784 56
+38 8870 15
+38 9710 7
+38 10441 62
+38 15698 57
+39 386 89
+39 1598 64
+39 3476 73
+39 3943 64
+39 4190 86
+39 4957 24
+39 5393 98
+39 7097 78
+39 7118 67
+39 7604 49
+39 7697 24
+39 8078 54
+39 8411 96
+39 15491 54
+39 15625 17
+40 2854 71
+40 3490 65
+40 3985 63
+40 5098 35
+40 5318 87
+40 10094 80
+40 10912 23
+40 12050 NULL
+40 13658 53
+40 16976 3
+41 10 50
+41 64 29
+41 3380 88
+41 5566 11
+41 6310 90
+41 7402 69
+41 7603 94
+41 9322 8
+41 10915 81
+41 14788 15
+41 15242 87
+41 15328 46
+41 16514 20
+42 619 69
+42 976 100
+42 1436 94
+42 2314 74
+42 2392 14
+42 2602 30
+42 3346 74
+42 3613 30
+42 6058 30
+42 6134 92
+42 8462 23
+42 9740 52
+42 10016 57
+42 10471 19
+42 12550 41
+42 15002 41
+43 2923 16
+43 3344 22
+43 3911 26
+43 4364 77
+43 4691 41
+43 5773 85
+43 5852 16
+43 11771 30
+43 14669 97
+44 2351 56
+44 2623 18
+44 7303 14
+44 7527 67
+44 9059 68
+44 11707 83
+44 12341 20
+44 13331 98
+44 13449 45
+44 14149 80
+44 15803 81
+44 16491 56
+44 16837 92
+44 16909 61
+45 811 62
+45 1479 49
+45 3265 98
+45 5309 18
+45 7363 87
+45 10115 68
+45 11095 40
+45 13133 46
+45 16349 6
+46 1960 12
+46 3010 67
+46 7040 33
+46 8065 NULL
+46 11426 72
+46 13042 58
+46 15595 32
+46 16540 30
+46 17150 57
+46 17384 13
+47 254 NULL
+47 481 30
+47 1132 66
+47 1916 71
+47 3085 51
+47 3202 7
+47 3878 NULL
+47 4774 11
+47 5008 82
+47 5305 NULL
+47 5468 7
+47 7214 1
+47 9770 33
+47 13246 47
+47 13477 10
+48 1761 22
+48 2820 4
+48 2829 65
+48 4431 39
+48 5971 29
+48 6085 1
+48 6684 44
+48 9199 88
+48 11259 NULL
+48 12468 62
+48 13153 74
+48 17799 17
+49 749 60
+49 2135 4
+49 5342 69
+49 5852 47
+49 6805 40
+49 7141 94
+49 9049 68
+49 9553 71
+49 12737 48
+49 15155 84
+49 16361 4
+50 1280 69
+50 1312 30
+50 1909 53
+50 1984 40
+50 3097 64
+50 5023 NULL
+50 7135 69
+50 16081 82
+51 422 21
+51 3091 28
+51 4687 6
+51 5029 12
+51 5059 51
+51 6565 33
+51 8384 79
+51 9311 90
+51 10133 54
+51 11234 NULL
+51 12625 53
+51 13199 97
+51 17483 22
+51 17705 66
+52 2420 90
+52 3334 73
+52 6098 NULL
+52 7606 45
+52 11488 76
+52 15649 29
+52 16646 48
+52 17402 91
+52 17456 37
+53 1114 40
+53 2095 62
+53 2786 70
+53 2887 39
+53 7546 58
+53 11348 38
+53 13220 76
+53 13795 38
+53 15991 37
+53 16420 14
+53 16648 79
+53 17296 43
+53 17560 15
+54 702 40
+54 825 50
+54 1165 62
+54 3861 NULL
+54 6517 40
+54 9159 75
+54 14737 38
+54 16059 15
+54 16974 NULL
+54 17479 34
+55 1339 16
+55 3001 7
+55 5137 33
+55 9703 44
+55 12170 92
+55 12205 90
+55 14135 36
+55 14923 71
+55 17677 17
+56 4242 2
+56 4506 57
+56 8353 35
+56 8691 59
+56 8707 68
+56 10362 54
+56 16620 23
+56 17331 74
+57 3253 71
+57 4028 88
+57 4933 22
+57 12596 91
+57 12721 62
+57 12740 52
+57 15182 86
+57 17729 26
+57 17993 99
+58 1829 52
+58 3848 6
+58 5117 2
+58 7649 19
+58 9743 62
+58 10802 14
+58 15635 6
+58 16472 6
+58 16949 35
+59 3133 92
+59 3546 22
+59 5772 70
+59 7087 80
+59 8010 46
+59 8335 36
+59 9348 62
+59 9397 92
+59 10651 100
+59 11916 19
+59 12858 90
+59 14529 44
+60 97 50
+60 555 62
+60 633 71
+60 999 43
+60 1117 78
+60 1573 90
+60 4041 25
+60 4235 28
+60 4513 72
+60 4937 22
+60 7231 95
+60 10277 62
+60 10393 75
+60 13975 14
+60 16887 25
+60 17755 88
+61 1106 4
+61 2264 36
+61 3362 48
+61 4567 26
+61 5528 78
+61 6380 77
+61 7591 78
+61 8924 11
+61 10330 8
+61 16462 26
+62 4093 94
+62 6403 NULL
+62 8457 37
+62 10149 75
+62 12163 29
+62 12199 5
+62 12407 NULL
+62 13559 80
+62 15399 74
+62 15733 40
+62 16151 93
+63 4488 73
+63 5079 79
+63 5217 66
+63 5658 99
+63 9319 80
+63 11370 38
+63 11946 85
+63 13339 19
+63 15793 40
+63 16569 69
+64 1213 NULL
+64 3090 87
+64 3963 NULL
+64 11835 82
+64 13224 NULL
+64 14407 8
+64 15867 59
+64 15936 30
+64 16921 19
+64 17586 78
+64 17617 17
+65 2287 100
+65 4227 42
+65 9625 51
+65 9847 54
+65 13897 40
+65 14905 85
+65 15177 55
+65 17025 67
+66 6507 76
+66 7033 65
+66 7227 66
+66 8197 41
+66 9237 29
+66 10019 10
+66 11419 66
+66 15629 20
+66 16745 91
+66 16795 28
+67 757 77
+67 2133 74
+67 3439 73
+67 4155 87
+67 5113 NULL
+67 7020 79
+67 7507 77
+67 8469 59
+67 8871 71
+67 12087 70
+67 15699 44
+68 1387 74
+68 1603 57
+68 1820 54
+68 2035 22
+68 2296 52
+68 2564 83
+68 5162 23
+68 6763 77
+68 7765 NULL
+68 12526 3
+68 12724 88
+68 17426 2
+68 17600 13
+69 322 45
+69 337 34
+69 4208 9
+69 4267 10
+69 6136 7
+69 7264 67
+69 7822 30
+69 8599 53
+69 11137 68
+69 13489 66
+69 13792 NULL
+69 15448 16
+70 1592 53
+70 2462 NULL
+70 3296 48
+70 3947 NULL
+70 6185 82
+70 6425 NULL
+70 8893 17
+70 9857 20
+70 14549 4
+70 17815 95
+71 457 75
+71 1888 4
+71 2098 51
+71 4144 49
+71 5858 NULL
+71 6008 54
+71 7504 3
+71 8887 10
+71 9274 36
+71 9769 79
+71 9790 96
+71 9997 26
+71 10108 66
+71 10288 30
+71 11168 79
+71 17246 90
+72 1535 9
+72 5917 85
+72 6113 45
+72 6671 13
+72 9860 26
+72 10427 66
+72 10753 16
+72 11741 62
+72 12788 29
+72 12901 57
+72 13085 94
+72 13423 62
+72 13904 37
+72 15587 87
+72 16765 56
+73 247 53
+73 1063 37
+73 3205 82
+73 4946 54
+73 6862 58
+73 10051 49
+73 12502 75
+73 15109 38
+73 16519 97
+73 16585 38
+73 17269 40
+74 326 29
+74 3104 78
+74 3175 23
+74 3278 NULL
+74 3542 96
+74 3754 26
+74 5492 54
+74 7694 17
+74 8653 12
+74 9620 95
+74 10069 99
+74 13208 87
+74 16694 72
+75 607 20
+75 2948 25
+75 4625 73
+75 6938 89
+75 6953 71
+75 8726 6
+75 9905 54
+75 10217 85
+75 11039 70
+75 14186 63
+75 16796 93
+76 257 5
+76 465 2
+76 1107 16
+76 1503 97
+76 2265 98
+76 2869 32
+76 3363 25
+76 4237 48
+76 4567 40
+76 5529 78
+76 6381 50
+76 7591 27
+76 8925 6
+76 10331 3
+76 16463 53
+77 992 62
+77 1399 34
+77 2713 85
+77 3868 89
+77 6289 30
+77 7339 88
+77 7448 95
+77 7486 49
+77 8686 38
+77 9220 90
+77 11918 36
+77 12439 95
+77 13456 48
+77 14815 18
+77 16687 16
+78 901 3
+78 3304 50
+78 3856 27
+78 5965 78
+78 6044 59
+78 6110 43
+78 6500 76
+78 7576 87
+78 8611 79
+78 10507 6
+78 11209 7
+78 12706 19
+78 14996 39
+79 247 NULL
+79 1063 85
+79 3205 48
+79 4947 35
+79 6864 1
+79 10051 10
+79 10524 36
+79 12504 81
+79 14322 41
+79 15109 NULL
+79 15498 3
+79 15888 58
+79 16519 9
+79 16585 93
+79 17269 81
+80 998 93
+80 1519 25
+80 1573 40
+80 4040 66
+80 4513 NULL
+80 4622 1
+80 7231 49
+80 7610 37
+80 10393 5
+80 12968 NULL
+80 13717 91
+80 13975 13
+80 16363 84
+80 16886 77
+80 17308 29
+80 17755 94
+81 4486 31
+81 5078 75
+81 5216 64
+81 5656 24
+81 7166 7
+81 7663 79
+81 8918 37
+81 9319 36
+81 11107 36
+81 11368 26
+81 13339 6
+81 15793 8
+82 2572 53
+82 7862 75
+82 13138 59
+82 14998 49
+82 17041 18
http://git-wip-us.apache.org/repos/asf/hive/blob/072c5a0b/ql/src/test/results/clientpositive/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out b/ql/src/test/results/clientpositive/vectorization_limit.q.out
index 9ff888c..2400baa 100644
--- a/ql/src/test/results/clientpositive/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out
@@ -316,10 +316,10 @@ NULL
-47
-46
PREHOOK: query: explain
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -373,11 +373,11 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20
+POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
[20/25] hive git commit: HIVE-11932: JDBC Driver appends an extra /
when configuring connection by reading httpPath from ZooKeeper (Vaibhav
Gumashta, reviewed by Thejas Nair)
Posted by pr...@apache.org.
HIVE-11932: JDBC Driver appends an extra / when configuring connection by reading httpPath from ZooKeeper (Vaibhav Gumashta, reviewed by Thejas Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b92f44b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b92f44b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b92f44b
Branch: refs/heads/llap
Commit: 7b92f44b674c5455eb3629b75037531efca43126
Parents: 461e38e
Author: Gunther Hagleitner <gu...@apache.org>
Authored: Thu Sep 24 15:20:26 2015 -0700
Committer: Gunther Hagleitner <gu...@apache.org>
Committed: Thu Sep 24 15:24:42 2015 -0700
----------------------------------------------------------------------
jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7b92f44b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
----------------------------------------------------------------------
diff --git a/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
index 4712d2e..6c21423 100644
--- a/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
+++ b/jdbc/src/java/org/apache/hive/jdbc/ZooKeeperHiveClientHelper.java
@@ -130,7 +130,7 @@ class ZooKeeperHiveClientHelper {
// Set http path
if ((matcher.group(1).equals("hive.server2.thrift.http.path"))
&& !(connParams.getSessionVars().containsKey(JdbcConnectionParams.HTTP_PATH))) {
- connParams.getSessionVars().put(JdbcConnectionParams.HTTP_PATH, "/" + matcher.group(2));
+ connParams.getSessionVars().put(JdbcConnectionParams.HTTP_PATH, matcher.group(2));
}
// Set SSL
if ((matcher.group(1) != null) && (matcher.group(1).equals("hive.server2.use.SSL"))
[14/25] hive git commit: HIVE-11922: Better error message when ORC
split generation fails (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Posted by pr...@apache.org.
HIVE-11922: Better error message when ORC split generation fails (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/648f2c6b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/648f2c6b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/648f2c6b
Branch: refs/heads/llap
Commit: 648f2c6bd47c9fcb555fcaea64c15f8b03a48ab4
Parents: f73157f
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Sep 23 20:02:00 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Sep 23 20:02:00 2015 -0500
----------------------------------------------------------------------
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 6 +-----
.../hive/ql/io/orc/TestInputOutputFormat.java | 19 +++++++++++++++++++
2 files changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/648f2c6b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 2500fb6..52e1b06 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.io.orc;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -50,7 +49,6 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.io.AcidInputFormat;
-import org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaMetaData;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.AcidUtils.Directory;
@@ -59,12 +57,10 @@ import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -1060,7 +1056,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
} catch (Exception e) {
cancelFutures(pathFutures);
cancelFutures(splitFutures);
- throw new RuntimeException("serious problem", e);
+ throw new RuntimeException("ORC split generation failed with exception: " + e.getMessage(), e);
}
if (context.cacheStripeDetails) {
http://git-wip-us.apache.org/repos/asf/hive/blob/648f2c6b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 8ba4d2e..f451fce 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -1311,6 +1311,25 @@ public class TestInputOutputFormat {
assertEquals(null, serde.getSerDeStats());
}
+ @Test(expected = RuntimeException.class)
+ public void testSplitGenFailure() throws IOException {
+ Properties properties = new Properties();
+ HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
+ org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer =
+ outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
+ properties, Reporter.NULL);
+ writer.close(true);
+ InputFormat<?,?> in = new OrcInputFormat();
+ fs.setPermission(testFilePath, FsPermission.createImmutable((short) 0333));
+ FileInputFormat.setInputPaths(conf, testFilePath.toString());
+ try {
+ in.getSplits(conf, 1);
+ } catch (RuntimeException e) {
+ assertEquals(true, e.getMessage().contains("Permission denied"));
+ throw e;
+ }
+ }
+
static class StringRow implements Writable {
String str;
String str2;
[15/25] hive git commit: HIVE-11926: Stats annotation might not
extract stats for varchar/decimal columns (Chaoyu Tang,
reviewed by Xuefu Zhang)
Posted by pr...@apache.org.
HIVE-11926: Stats annotation might not extract stats for varchar/decimal columns (Chaoyu Tang, reviewed by Xuefu Zhang)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/15281351
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/15281351
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/15281351
Branch: refs/heads/llap
Commit: 1528135176df0bb30351471eb05b919d706669b8
Parents: 648f2c6
Author: ctang <ct...@gmail.com>
Authored: Thu Sep 24 12:20:42 2015 -0400
Committer: ctang <ct...@gmail.com>
Committed: Thu Sep 24 14:43:32 2015 -0400
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 182 ++++++++++---------
1 file changed, 94 insertions(+), 88 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/15281351/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 2c970bd..cc8c9e8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -429,10 +429,11 @@ public class StatsUtils {
String colType, String defaultPartName) {
Range range = null;
String partVal;
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
+ String colTypeLowerCase = colType.toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
long min = Long.MAX_VALUE;
long max = Long.MIN_VALUE;
for (Partition partition : partitions) {
@@ -447,8 +448,8 @@ public class StatsUtils {
}
}
range = new Range(min, max);
- } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
double min = Double.MAX_VALUE;
double max = Double.MIN_VALUE;
for (Partition partition : partitions) {
@@ -463,7 +464,7 @@ public class StatsUtils {
}
}
range = new Range(min, max);
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
double min = Double.MAX_VALUE;
double max = Double.MIN_VALUE;
for (Partition partition : partitions) {
@@ -515,18 +516,18 @@ public class StatsUtils {
continue;
}
ObjectInspector oi = ci.getObjectInspector();
- String colType = ci.getTypeName();
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.LIST_TYPE_NAME)
- || colType.startsWith(serdeConstants.MAP_TYPE_NAME)
- || colType.startsWith(serdeConstants.STRUCT_TYPE_NAME)
- || colType.startsWith(serdeConstants.UNION_TYPE_NAME)) {
- avgRowSize += getAvgColLenOfVariableLengthTypes(conf, oi, colType);
+ String colTypeLowerCase = ci.getTypeName().toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.LIST_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) {
+ avgRowSize += getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
} else {
- avgRowSize += getAvgColLenOfFixedLengthTypes(colType);
+ avgRowSize += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
}
}
return avgRowSize;
@@ -640,38 +641,38 @@ public class StatsUtils {
*/
public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName,
String colName) {
- ColStatistics cs = new ColStatistics(colName, cso.getColType());
- String colType = cso.getColType();
+ String colTypeLowerCase = cso.getColType().toLowerCase();
+ ColStatistics cs = new ColStatistics(colName, colTypeLowerCase);
ColumnStatisticsData csd = cso.getStatsData();
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) {
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)) {
cs.setCountDistint(csd.getLongStats().getNumDVs());
cs.setNumNulls(csd.getLongStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
cs.setCountDistint(csd.getLongStats().getNumDVs());
cs.setNumNulls(csd.getLongStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive2());
cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
cs.setCountDistint(csd.getDoubleStats().getNumDVs());
cs.setNumNulls(csd.getDoubleStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
cs.setCountDistint(csd.getDoubleStats().getNumDVs());
cs.setNumNulls(csd.getDoubleStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive2());
cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
cs.setCountDistint(csd.getStringStats().getNumDVs());
cs.setNumNulls(csd.getStringStats().getNumNulls());
cs.setAvgColLen(csd.getStringStats().getAvgColLen());
- } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) {
cs.setCountDistint(2);
} else {
@@ -681,12 +682,12 @@ public class StatsUtils {
cs.setNumFalses(csd.getBooleanStats().getNumFalses());
cs.setNumNulls(csd.getBooleanStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
cs.setAvgColLen(csd.getBinaryStats().getAvgColLen());
cs.setNumNulls(csd.getBinaryStats().getNumNulls());
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
cs.setCountDistint(csd.getDecimalStats().getNumDVs());
cs.setNumNulls(csd.getDecimalStats().getNumNulls());
@@ -697,7 +698,7 @@ public class StatsUtils {
BigDecimal minVal = HiveDecimal.
create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
cs.setRange(minVal, maxVal);
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
} else {
// Columns statistics for complex datatypes are not supported yet
@@ -741,7 +742,9 @@ public class StatsUtils {
List<ColStatistics> stats = new ArrayList<ColStatistics>(colStats.size());
for (ColumnStatisticsObj statObj : colStats) {
ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName());
- stats.add(cs);
+ if (cs != null) {
+ stats.add(cs);
+ }
}
return stats;
}
@@ -776,8 +779,8 @@ public class StatsUtils {
String colType) {
long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
-
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) {
+ String colTypeLowCase = colType.toLowerCase();
+ if (colTypeLowCase.equals(serdeConstants.STRING_TYPE_NAME)) {
// constant string projection Ex: select "hello" from table
if (oi instanceof ConstantObjectInspector) {
@@ -793,7 +796,7 @@ public class StatsUtils {
// return the variable length from config
return configVarLen;
}
- } else if (colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ } else if (colTypeLowCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
// constant varchar projection
if (oi instanceof ConstantObjectInspector) {
@@ -806,7 +809,7 @@ public class StatsUtils {
VarcharTypeInfo type = (VarcharTypeInfo) ((HiveVarcharObjectInspector) oi).getTypeInfo();
return type.getLength();
}
- } else if (colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ } else if (colTypeLowCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
// constant char projection
if (oi instanceof ConstantObjectInspector) {
@@ -819,7 +822,7 @@ public class StatsUtils {
CharTypeInfo type = (CharTypeInfo) ((HiveCharObjectInspector) oi).getTypeInfo();
return type.getLength();
}
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
// constant byte arrays
if (oi instanceof ConstantObjectInspector) {
@@ -858,17 +861,17 @@ public class StatsUtils {
switch (oi.getCategory()) {
case PRIMITIVE:
- String colType = oi.getTypeName();
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
- int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType);
+ String colTypeLowerCase = oi.getTypeName().toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
result += JavaDataModel.get().lengthForStringOfLength(avgColLen);
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
- int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType);
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
+ int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
result += JavaDataModel.get().lengthForByteArrayOfSize(avgColLen);
} else {
- result += getAvgColLenOfFixedLengthTypes(colType);
+ result += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
}
break;
case LIST:
@@ -952,21 +955,22 @@ public class StatsUtils {
* @return raw data size
*/
public static long getAvgColLenOfFixedLengthTypes(String colType) {
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
+ String colTypeLowerCase = colType.toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
return JavaDataModel.get().primitive1();
- } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
- || colType.equalsIgnoreCase("long")) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
+ || colTypeLowerCase.equals("long")) {
return JavaDataModel.get().primitive2();
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
return JavaDataModel.get().lengthOfTimestamp();
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
return JavaDataModel.get().lengthOfDate();
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
return JavaDataModel.get().lengthOfDecimal();
} else {
return 0;
@@ -982,25 +986,26 @@ public class StatsUtils {
* @return raw data size
*/
public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length) {
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
+ String colTypeLowerCase = colType.toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
return JavaDataModel.get().lengthForIntArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
return JavaDataModel.get().lengthForDoubleArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
- || colType.equalsIgnoreCase("long")) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
+ || colTypeLowerCase.equals("long")) {
return JavaDataModel.get().lengthForLongArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
return JavaDataModel.get().lengthForByteArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
return JavaDataModel.get().lengthForBooleanArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
return JavaDataModel.get().lengthForTimestampArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
return JavaDataModel.get().lengthForDateArrayOfSize(length);
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
return JavaDataModel.get().lengthForDecimalArrayOfSize(length);
} else {
return 0;
@@ -1267,8 +1272,9 @@ public class StatsUtils {
throw new IllegalArgumentException("not supported expr type " + end.getClass());
}
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)
+ colType = colType.toLowerCase();
+ if (colType.equals(serdeConstants.STRING_TYPE_NAME)
+ || colType.equals(serdeConstants.BINARY_TYPE_NAME)
|| colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
|| colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
|| colType.startsWith(serdeConstants.LIST_TYPE_NAME)
@@ -1380,30 +1386,30 @@ public class StatsUtils {
for (ColStatistics cs : colStats) {
if (cs != null) {
- String colType = cs.getColumnType();
+ String colTypeLowerCase = cs.getColumnType().toLowerCase();
long nonNullCount = numRows - cs.getNumNulls();
double sizeOf = 0;
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
sizeOf = cs.getAvgColLen();
- } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
int acl = (int) Math.round(cs.getAvgColLen());
sizeOf = JavaDataModel.get().lengthForStringOfLength(acl);
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
int acl = (int) Math.round(cs.getAvgColLen());
sizeOf = JavaDataModel.get().lengthForByteArrayOfSize(acl);
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
sizeOf = JavaDataModel.get().lengthOfTimestamp();
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
sizeOf = JavaDataModel.get().lengthOfDecimal();
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
sizeOf = JavaDataModel.get().lengthOfDate();
} else {
sizeOf = cs.getAvgColLen();
[18/25] hive git commit: HIVE-11517 Vectorized auto_smb_mapjoin_14.q
produces different results (Matt McCline, reviewed by Vikram Dixit K)
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/461e38ec/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out
new file mode 100644
index 0000000..827e6b5
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out
@@ -0,0 +1,1792 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl1
+PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl2
+PREHOOK: query: insert overwrite table tbl1
+select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: insert overwrite table tbl1
+select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl1
+POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table tbl2
+select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: insert overwrite table tbl2
+select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl2
+POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+22
+PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+6
+PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+0 9 9
+2 1 1
+4 1 1
+5 9 9
+8 1 1
+9 1 1
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 key (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 8) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
+-- join should be performed
+explain
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
+-- join should be performed
+explain
+select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (key + 1) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (key + 1) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1
+ join
+ (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+ on subq1.key = subq2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+22
+PREHOOK: query: -- One of the tables is a sub-query and the other is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- One of the tables is a sub-query and the other is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 key (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+56
+PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 key (type: int)
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+20
+PREHOOK: query: CREATE TABLE dest1(key int, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(key int, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest2
+POSTHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest2
+PREHOOK: query: -- The join is followed by a multi-table insert. It should be converted to
+-- a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is followed by a multi-table insert. It should be converted to
+-- a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+ Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12
+ Stage-11
+ Stage-1 depends on stages: Stage-11, Stage-10, Stage-13
+ Stage-9 depends on stages: Stage-1
+ Stage-10
+ Stage-12
+ Stage-13 depends on stages: Stage-12
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col6
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+ Execution mode: vectorized
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-14
+ Conditional Operator
+
+ Stage: Stage-11
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-9
+ Stats-Aggr Operator
+
+ Stage: Stage-10
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-12
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-13
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, val1, val2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+2 val_2
+4 val_4
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+8 val_8
+9 val_9
+PREHOOK: query: select * from dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+0 val_0 val_0
+2 val_2 val_2
+4 val_4 val_4
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+5 val_5 val_5
+8 val_8 val_8
+9 val_9 val_9
+PREHOOK: query: DROP TABLE dest2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest2
+PREHOOK: Output: default@dest2
+POSTHOOK: query: DROP TABLE dest2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest2
+POSTHOOK: Output: default@dest2
+PREHOOK: query: CREATE TABLE dest2(key int, cnt int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest2
+POSTHOOK: query: CREATE TABLE dest2(key int, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest2
+PREHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer.
+-- It should be converted to a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer.
+-- It should be converted to a sort-merge join
+explain
+from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+ Stage-1 depends on stages: Stage-2
+ Stage-9 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-9
+ Stats-Aggr Operator
+
+PREHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+insert overwrite table dest1 select key, val1
+insert overwrite table dest2 select key, count(*) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(tbl1)a.null, (tbl2)b.null, ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+2 val_2
+4 val_4
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+5 val_5
+8 val_8
+9 val_9
+PREHOOK: query: select * from dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+0 9
+2 1
+4 1
+5 9
+8 1
+9 1
[09/25] hive git commit: HIVE-11468: Vectorize Struct IN() clauses
(Matt McCline, via Gopal V)
Posted by pr...@apache.org.
HIVE-11468: Vectorize Struct IN() clauses (Matt McCline, via Gopal V)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7cfe3743
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7cfe3743
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7cfe3743
Branch: refs/heads/llap
Commit: 7cfe3743ff583386653bdd32c79f2c44ffe734ba
Parents: 2e8324e
Author: Gopal V <go...@apache.org>
Authored: Tue Sep 22 19:39:49 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Tue Sep 22 23:24:14 2015 -0700
----------------------------------------------------------------------
.../ql/exec/vector/VectorizationContext.java | 203 +-
.../expressions/FilterStringColumnInList.java | 13 +-
.../expressions/FilterStructColumnInList.java | 178 ++
.../exec/vector/expressions/IStructInExpr.java | 36 +
.../vector/expressions/StringColumnInList.java | 4 +
.../vector/expressions/StructColumnInList.java | 174 ++
.../hive/ql/optimizer/physical/Vectorizer.java | 71 +-
.../ql/optimizer/physical/Vectorizer.java.orig | 1744 ++++++++++++++++++
.../ql/optimizer/physical/Vectorizer.java.rej | 86 +
.../queries/clientpositive/vector_struct_in.q | 247 +++
.../clientpositive/vector_struct_in.q.out | 825 +++++++++
11 files changed, 3566 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 2483196..46c2a78 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -104,20 +104,30 @@ import org.apache.hadoop.hive.ql.udf.UDFToLong;
import org.apache.hadoop.hive.ql.udf.UDFToShort;
import org.apache.hadoop.hive.ql.udf.UDFToString;
import org.apache.hadoop.hive.ql.udf.generic.*;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.DateUtils;
+
/**
* Context class for vectorization execution.
* Main role is to map column names to column indices and serves as a
@@ -1273,17 +1283,208 @@ public class VectorizationContext {
}
}
+ public enum InConstantType {
+ INT_FAMILY,
+ TIMESTAMP,
+ DATE,
+ FLOAT_FAMILY,
+ STRING_FAMILY,
+ DECIMAL
+ }
+
+ public static InConstantType getInConstantTypeFromPrimitiveCategory(PrimitiveCategory primitiveCategory) {
+
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return InConstantType.INT_FAMILY;
+
+ case DATE:
+ return InConstantType.TIMESTAMP;
+
+ case TIMESTAMP:
+ return InConstantType.DATE;
+
+ case FLOAT:
+ case DOUBLE:
+ return InConstantType.FLOAT_FAMILY;
+
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ case BINARY:
+ return InConstantType.STRING_FAMILY;
+
+ case DECIMAL:
+ return InConstantType.DECIMAL;
+
+
+ case INTERVAL_YEAR_MONTH:
+ case INTERVAL_DAY_TIME:
+ // UNDONE: Fall through for these... they don't appear to be supported yet.
+ default:
+ throw new RuntimeException("Unexpected primitive type category " + primitiveCategory);
+ }
+ }
+
+ private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr,
+ TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, Mode mode, TypeInfo returnType)
+ throws HiveException {
+
+ VectorExpression expr = null;
+
+ StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
+
+ ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ final int fieldCount = fieldTypeInfos.size();
+ ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
+ InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
+ for (int f = 0; f < fieldCount; f++) {
+ TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
+ // Only primitive fields supports for now.
+ if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
+ return null;
+ }
+
+ // We are going to serialize using the 4 basic types.
+ ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
+ fieldVectorColumnTypes[f] = fieldVectorColumnType;
+
+ // We currently evaluate the IN (..) constants in special ways.
+ PrimitiveCategory fieldPrimitiveCategory =
+ ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
+ InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
+ fieldInConstantTypes[f] = inConstantType;
+ }
+
+ Output buffer = new Output();
+ BinarySortableSerializeWrite binarySortableSerializeWrite =
+ new BinarySortableSerializeWrite(fieldCount);
+
+ final int inChildrenCount = inChildren.size();
+ byte[][] serializedInChildren = new byte[inChildrenCount][];
+ try {
+ for (int i = 0; i < inChildrenCount; i++) {
+ final ExprNodeDesc node = inChildren.get(i);
+ final Object[] constants;
+
+ if (node instanceof ExprNodeConstantDesc) {
+ ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
+ ConstantObjectInspector output = constNode.getWritableObjectInspector();
+ constants = ((List<?>) output.getWritableConstantValue()).toArray();
+ } else {
+ ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
+ ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory
+ .get(exprNode);
+ ObjectInspector output = evaluator.initialize(exprNode
+ .getWritableObjectInspector());
+ constants = (Object[]) evaluator.evaluate(null);
+ }
+
+ binarySortableSerializeWrite.set(buffer);
+ for (int f = 0; f < fieldCount; f++) {
+ Object constant = constants[f];
+ if (constant == null) {
+ binarySortableSerializeWrite.writeNull();
+ } else {
+ InConstantType inConstantType = fieldInConstantTypes[f];
+ switch (inConstantType) {
+ case STRING_FAMILY:
+ {
+ byte[] bytes;
+ if (constant instanceof Text) {
+ Text text = (Text) constant;
+ bytes = text.getBytes();
+ binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
+ } else {
+ throw new HiveException("Unexpected constant String type " +
+ constant.getClass().getSimpleName());
+ }
+ }
+ break;
+ case INT_FAMILY:
+ {
+ long value;
+ if (constant instanceof IntWritable) {
+ value = ((IntWritable) constant).get();
+ } else if (constant instanceof LongWritable) {
+ value = ((LongWritable) constant).get();
+ } else {
+ throw new HiveException("Unexpected constant Long type " +
+ constant.getClass().getSimpleName());
+ }
+ binarySortableSerializeWrite.writeLong(value);
+ }
+ break;
+
+ case FLOAT_FAMILY:
+ {
+ double value;
+ if (constant instanceof DoubleWritable) {
+ value = ((DoubleWritable) constant).get();
+ } else {
+ throw new HiveException("Unexpected constant Double type " +
+ constant.getClass().getSimpleName());
+ }
+ binarySortableSerializeWrite.writeDouble(value);
+ }
+ break;
+
+ // UNDONE...
+ case DATE:
+ case TIMESTAMP:
+ case DECIMAL:
+ default:
+ throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
+ }
+ }
+ }
+ serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
+ }
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
+
+ // Create a single child representing the scratch column where we will
+ // generate the serialized keys of the batch.
+ int scratchBytesCol = ocm.allocateOutputColumn("string");
+
+ Class<?> cl = (mode == Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
+
+ expr = createVectorExpression(cl, null, Mode.PROJECTION, returnType);
+
+ ((IStringInExpr) expr).setInListValues(serializedInChildren);
+
+ ((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
+ ((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(),
+ fieldVectorColumnTypes);
+
+ return expr;
+ }
+
/**
* Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
*/
private VectorExpression getInExpression(List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType)
throws HiveException {
ExprNodeDesc colExpr = childExpr.get(0);
+ List<ExprNodeDesc> inChildren = childExpr.subList(1, childExpr.size());
String colType = colExpr.getTypeString();
+ colType = VectorizationContext.mapTypeNameSynonyms(colType);
+ TypeInfo colTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colType);
+ Category category = colTypeInfo.getCategory();
+ if (category == Category.STRUCT){
+ return getStructInExpression(childExpr, colExpr, colTypeInfo, inChildren, mode, returnType);
+ } else if (category != Category.PRIMITIVE) {
+ return null;
+ }
// prepare arguments for createVectorExpression
- List<ExprNodeDesc> childrenForInList = evaluateCastOnConstants(childExpr.subList(1, childExpr.size()));
+ List<ExprNodeDesc> childrenForInList = evaluateCastOnConstants(inChildren);
/* This method assumes that the IN list has no NULL entries. That is enforced elsewhere,
* in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined.
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
index 2434e90..e34ec75 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
@@ -20,16 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFLike;
-import org.apache.hadoop.io.Text;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
/**
* Evaluate an IN filter on a batch for a vector of strings.
@@ -165,6 +156,10 @@ public class FilterStringColumnInList extends VectorExpression implements IStrin
return "boolean";
}
+ public void setInputColumn(int inputCol) {
+ this.inputCol = inputCol;
+ }
+
@Override
public int getOutputColumn() {
return -1;
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java
new file mode 100644
index 0000000..00f22bb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
+
+/**
+ * Evaluate an IN filter on a batch for a vector of structs.
+ * This is optimized so that no objects have to be created in
+ * the inner loop, and there is a hash table implemented
+ * with Cuckoo hashing that has fast lookup to do the IN test.
+ */
+public class FilterStructColumnInList extends FilterStringColumnInList implements IStructInExpr {
+ private static final long serialVersionUID = 1L;
+ private VectorExpression[] structExpressions;
+ private ColumnVector.Type[] fieldVectorColumnTypes;
+ private int[] structColumnMap;
+ private int scratchBytesColumn;
+
+ private transient Output buffer;
+ private transient BinarySortableSerializeWrite binarySortableSerializeWrite;
+
+ /**
+ * After construction you must call setInListValues() to add the values to the IN set
+ * (on the IStringInExpr interface).
+ *
+ * And, call a and b on the IStructInExpr interface.
+ */
+ public FilterStructColumnInList() {
+ super(-1);
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ final int logicalSize = batch.size;
+ if (logicalSize == 0) {
+ return;
+ }
+
+ if (buffer == null) {
+ buffer = new Output();
+ binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
+ }
+
+ for (VectorExpression ve : structExpressions) {
+ ve.evaluate(batch);
+ }
+
+ BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
+
+ try {
+ boolean selectedInUse = batch.selectedInUse;
+ int[] selected = batch.selected;
+ for (int logical = 0; logical < logicalSize; logical++) {
+ int batchIndex = (selectedInUse ? selected[logical] : logical);
+
+ binarySortableSerializeWrite.set(buffer);
+ for (int f = 0; f < structColumnMap.length; f++) {
+ int fieldColumn = structColumnMap[f];
+ ColumnVector colVec = batch.cols[fieldColumn];
+ int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
+ if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
+ switch (fieldVectorColumnTypes[f]) {
+ case BYTES:
+ {
+ BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
+ byte[] bytes = bytesColVec.vector[adjustedIndex];
+ int start = bytesColVec.start[adjustedIndex];
+ int length = bytesColVec.length[adjustedIndex];
+ binarySortableSerializeWrite.writeString(bytes, start, length);
+ }
+ break;
+
+ case LONG:
+ binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
+ break;
+
+ case DOUBLE:
+ binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
+ break;
+
+ case DECIMAL:
+ binarySortableSerializeWrite.writeHiveDecimal(
+ ((DecimalColumnVector) colVec).vector[adjustedIndex].getHiveDecimal());
+ break;
+
+ default:
+ throw new RuntimeException("Unexpected vector column type " +
+ fieldVectorColumnTypes[f].name());
+ }
+ } else {
+ binarySortableSerializeWrite.writeNull();
+ }
+ }
+ scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
+ }
+
+ // Now, take the serialized keys we just wrote into our scratch column and look them
+ // up in the IN list.
+ super.evaluate(batch);
+
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+
+ }
+
+
+ @Override
+ public String getOutputType() {
+ return "boolean";
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return -1;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+
+ // This VectorExpression (IN) is a special case, so don't return a descriptor.
+ return null;
+ }
+
+ @Override
+ public void setScratchBytesColumn(int scratchBytesColumn) {
+
+ // Tell our super class FilterStringColumnInList it will be evaluating our scratch
+ // BytesColumnVector.
+ super.setInputColumn(scratchBytesColumn);
+ this.scratchBytesColumn = scratchBytesColumn;
+ }
+
+ @Override
+ public void setStructColumnExprs(VectorizationContext vContext,
+ List<ExprNodeDesc> structColumnExprs, ColumnVector.Type[] fieldVectorColumnTypes)
+ throws HiveException {
+
+ structExpressions = vContext.getVectorExpressions(structColumnExprs);
+ structColumnMap = new int[structExpressions.length];
+ for (int i = 0; i < structColumnMap.length; i++) {
+ VectorExpression ve = structExpressions[i];
+ structColumnMap[i] = ve.getOutputColumn();
+ }
+ this.fieldVectorColumnTypes = fieldVectorColumnTypes;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java
new file mode 100644
index 0000000..3b25255
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+
+/**
+ * Interface used for both filter and non-filter versions of IN to simplify
+ * VectorizationContext code.
+ */
+public interface IStructInExpr {
+ void setScratchBytesColumn(int scratchBytesColumn);
+ void setStructColumnExprs(VectorizationContext vContext, List<ExprNodeDesc> structColumnExprs,
+ ColumnVector.Type[] fieldVectorColumnTypes) throws HiveException;
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
index 03833a2..b90e3c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
@@ -140,6 +140,10 @@ public class StringColumnInList extends VectorExpression implements IStringInExp
return "boolean";
}
+ public void setInputColumn(int inputCol) {
+ this.inputCol = inputCol;
+ }
+
@Override
public int getOutputColumn() {
return this.outputColumn;
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java
new file mode 100644
index 0000000..724497a
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
+
+/**
+ * Evaluate an IN boolean expression (not a filter) on a batch for a vector of structs.
+ * This is optimized so that no objects have to be created in
+ * the inner loop, and there is a hash table implemented
+ * with Cuckoo hashing that has fast lookup to do the IN test.
+ */
+public class StructColumnInList extends StringColumnInList implements IStructInExpr {
+ private static final long serialVersionUID = 1L;
+ private VectorExpression[] structExpressions;
+ private ColumnVector.Type[] fieldVectorColumnTypes;
+ private int[] structColumnMap;
+ private int scratchBytesColumn;
+
+ private transient Output buffer;
+ private transient BinarySortableSerializeWrite binarySortableSerializeWrite;
+
+ public StructColumnInList() {
+ super();
+ }
+
+ /**
+ * After construction you must call setInListValues() to add the values to the IN set.
+ */
+ public StructColumnInList(int outputColumn) {
+ super(-1, outputColumn);
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ final int logicalSize = batch.size;
+ if (logicalSize == 0) {
+ return;
+ }
+
+ if (buffer == null) {
+ buffer = new Output();
+ binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
+ }
+
+ for (VectorExpression ve : structExpressions) {
+ ve.evaluate(batch);
+ }
+
+ BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
+
+ try {
+ boolean selectedInUse = batch.selectedInUse;
+ int[] selected = batch.selected;
+ for (int logical = 0; logical < logicalSize; logical++) {
+ int batchIndex = (selectedInUse ? selected[logical] : logical);
+
+ binarySortableSerializeWrite.set(buffer);
+ for (int f = 0; f < structColumnMap.length; f++) {
+ int fieldColumn = structColumnMap[f];
+ ColumnVector colVec = batch.cols[fieldColumn];
+ int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
+ if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
+ switch (fieldVectorColumnTypes[f]) {
+ case BYTES:
+ {
+ BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
+ byte[] bytes = bytesColVec.vector[adjustedIndex];
+ int start = bytesColVec.start[adjustedIndex];
+ int length = bytesColVec.length[adjustedIndex];
+ binarySortableSerializeWrite.writeString(bytes, start, length);
+ }
+ break;
+
+ case LONG:
+ binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
+ break;
+
+ case DOUBLE:
+ binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
+ break;
+
+ case DECIMAL:
+ binarySortableSerializeWrite.writeHiveDecimal(
+ ((DecimalColumnVector) colVec).vector[adjustedIndex].getHiveDecimal());
+ break;
+
+ default:
+ throw new RuntimeException("Unexpected vector column type " +
+ fieldVectorColumnTypes[f].name());
+ }
+ } else {
+ binarySortableSerializeWrite.writeNull();
+ }
+ }
+ scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
+ }
+
+ // Now, take the serialized keys we just wrote into our scratch column and look them
+ // up in the IN list.
+ super.evaluate(batch);
+
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+
+ @Override
+ public String getOutputType() {
+ return "boolean";
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+
+ // This VectorExpression (IN) is a special case, so don't return a descriptor.
+ return null;
+ }
+
+
+ @Override
+ public void setScratchBytesColumn(int scratchBytesColumn) {
+
+ // Tell our super class FilterStringColumnInList it will be evaluating our scratch
+ // BytesColumnVector.
+ super.setInputColumn(scratchBytesColumn);
+ this.scratchBytesColumn = scratchBytesColumn;
+ }
+
+ @Override
+ public void setStructColumnExprs(VectorizationContext vContext,
+ List<ExprNodeDesc> structColumnExprs, ColumnVector.Type[] fieldVectorColumnTypes)
+ throws HiveException {
+
+ structExpressions = vContext.getVectorExpressions(structColumnExprs);
+ structColumnMap = new int[structExpressions.length];
+ for (int i = 0; i < structColumnMap.length; i++) {
+ VectorExpression ve = structExpressions[i];
+ structColumnMap[i] = ve.getOutputColumn();
+ }
+ this.fieldVectorColumnTypes = fieldVectorColumnTypes;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7cfe3743/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 0d4c1d8..da1d9eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -53,10 +53,12 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiString
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
@@ -139,8 +141,11 @@ import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.*;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -575,7 +580,12 @@ public class Vectorizer implements PhysicalPlanResolver {
if (nonVectorizableChildOfGroupBy(op)) {
return new Boolean(true);
}
- boolean ret = validateMapWorkOperator(op, mapWork, isTez);
+ boolean ret;
+ try {
+ ret = validateMapWorkOperator(op, mapWork, isTez);
+ } catch (Exception e) {
+ throw new SemanticException(e);
+ }
if (!ret) {
LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized.");
return new Boolean(false);
@@ -1260,6 +1270,7 @@ public class Vectorizer implements PhysicalPlanResolver {
LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
return false;
}
+ boolean isInExpression = false;
if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
boolean r = validateGenericUdf(d);
@@ -1267,12 +1278,62 @@ public class Vectorizer implements PhysicalPlanResolver {
LOG.info("Cannot vectorize UDF " + d);
return false;
}
+ GenericUDF genericUDF = d.getGenericUDF();
+ isInExpression = (genericUDF instanceof GenericUDFIn);
}
if (desc.getChildren() != null) {
- for (ExprNodeDesc d: desc.getChildren()) {
- // Don't restrict child expressions for projection. Always use looser FILTER mode.
- boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
- if (!r) {
+ if (isInExpression
+ && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
+ // Don't restrict child expressions for projection.
+ // Always use loose FILTER mode.
+ if (!validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER)) {
+ return false;
+ }
+ } else {
+ for (ExprNodeDesc d : desc.getChildren()) {
+ // Don't restrict child expressions for projection.
+ // Always use loose FILTER mode.
+ if (!validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER)) {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ private boolean validateStructInExpression(ExprNodeDesc desc,
+ VectorExpressionDescriptor.Mode mode) {
+ for (ExprNodeDesc d : desc.getChildren()) {
+ TypeInfo typeInfo = d.getTypeInfo();
+ if (typeInfo.getCategory() != Category.STRUCT) {
+ return false;
+ }
+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+
+ ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo
+ .getAllStructFieldTypeInfos();
+ ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ final int fieldCount = fieldTypeInfos.size();
+ for (int f = 0; f < fieldCount; f++) {
+ TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
+ Category category = fieldTypeInfo.getCategory();
+ if (category != Category.PRIMITIVE) {
+ LOG.info("Cannot vectorize struct field " + fieldNames.get(f)
+ + " of type " + fieldTypeInfo.getTypeName());
+ return false;
+ }
+ PrimitiveTypeInfo fieldPrimitiveTypeInfo = (PrimitiveTypeInfo) fieldTypeInfo;
+ InConstantType inConstantType = VectorizationContext
+ .getInConstantTypeFromPrimitiveCategory(fieldPrimitiveTypeInfo
+ .getPrimitiveCategory());
+
+ // For now, limit the data types we support for Vectorized Struct IN().
+ if (inConstantType != InConstantType.INT_FAMILY
+ && inConstantType != InConstantType.FLOAT_FAMILY
+ && inConstantType != InConstantType.STRING_FAMILY) {
+ LOG.info("Cannot vectorize struct field " + fieldNames.get(f)
+ + " of type " + fieldTypeInfo.getTypeName());
return false;
}
}
[16/25] hive git commit: HIVE-10785 : Support aggregate push down
through joins (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out b/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out
new file mode 100644
index 0000000..17df98f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out
@@ -0,0 +1,1522 @@
+PREHOOK: query: EXPLAIN
+SELECT f.key, g.key, count(g.key)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key, g.key, count(g.key)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string), (_col1 * _col3) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.key, g.key
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key, g.key
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT DISTINCT f.value, g.value
+FROM src f JOIN src g ON(f.value = g.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT DISTINCT f.value, g.value
+FROM src f JOIN src g ON(f.value = g.value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.key, g.key, COUNT(*)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key, g.key, COUNT(*)
+FROM src f JOIN src g ON(f.key = g.key)
+GROUP BY f.key, g.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string), (_col1 * _col3) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.ctinyint, g.ctinyint, SUM(f.cbigint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cint = g.cint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.ctinyint, g.ctinyint, SUM(f.cbigint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cint = g.cint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-3 depends on stages: Stage-2
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int), cbigint (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col0 (type: tinyint), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: int)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: tinyint), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col2 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col2, _col3, _col5
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col3 (type: tinyint), (_col2 * _col5) (type: bigint)
+ outputColumnNames: _col0, _col3, _col6
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col6)
+ keys: _col0 (type: tinyint), _col3 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: tinyint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint)
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: tinyint), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: int)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.cbigint, g.cbigint, MAX(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cbigint = g.cbigint)
+GROUP BY f.cbigint, g.cbigint
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.cbigint, g.cbigint, MAX(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.cbigint = g.cbigint)
+GROUP BY f.cbigint, g.cbigint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cbigint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cint (type: int), cbigint (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col0)
+ keys: _col1 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), _col2 (type: bigint), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cbigint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT f.ctinyint, g.ctinyint, MIN(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT f.ctinyint, g.ctinyint, MIN(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0)
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: tinyint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: tinyint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col2 (type: tinyint), _col1 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT MIN(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT MIN(f.cint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col1)
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT count(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT count(f.ctinyint)
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col1 * _col3) (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT count(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT count(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col1)
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+SELECT sum(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT sum(f.cint), f.ctinyint
+FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint)
+GROUP BY f.ctinyint, g.ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/68d6cfda/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index fbcd86a..789bedf 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -4,6 +4,7 @@ POSTHOOK: query: SHOW FUNCTIONS
POSTHOOK: type: SHOWFUNCTIONS
!
!=
+$sum0
%
&
*
[10/25] hive git commit: HIVE-6091 : Empty pipeout files are created
for connection create/close (Thiruvel Thirumoolan,
Bing Li via Ashutosh Chauhan)
Posted by pr...@apache.org.
HIVE-6091 : Empty pipeout files are created for connection create/close (Thiruvel Thirumoolan, Bing Li via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6e8eeb74
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6e8eeb74
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6e8eeb74
Branch: refs/heads/llap
Commit: 6e8eeb7439d44b2e37f70a77f2abc27d59ef8993
Parents: 7cfe374
Author: Thiruvel Thirumoolan <th...@yahoo-inc.com>
Authored: Fri Dec 20 14:09:00 2013 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Sep 23 08:34:21 2015 -0700
----------------------------------------------------------------------
ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java | 3 +++
1 file changed, 3 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6e8eeb74/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 5f528167..014941e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -677,6 +677,9 @@ public class SessionState {
if (localSessionPath != null) {
FileSystem.getLocal(conf).delete(localSessionPath, true);
}
+ if (this.getTmpOutputFile().exists()) {
+ this.getTmpOutputFile().delete();
+ }
}
/**
[04/25] hive git commit: HIVE-11572: Datanucleus loads Log4j1.x
Logger from AppClassLoader (Prasanth Jayachandran reviewed by Gopal V)
Posted by pr...@apache.org.
HIVE-11572: Datanucleus loads Log4j1.x Logger from AppClassLoader (Prasanth Jayachandran reviewed by Gopal V)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e82bf253
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e82bf253
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e82bf253
Branch: refs/heads/llap
Commit: e82bf253fa62881f6d976e97d1bf4646ad4187c6
Parents: 1c52a7e
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Tue Sep 22 19:06:51 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Tue Sep 22 19:06:51 2015 -0500
----------------------------------------------------------------------
bin/hive | 2 +-
packaging/src/main/assembly/bin.xml | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e82bf253/bin/hive
----------------------------------------------------------------------
diff --git a/bin/hive b/bin/hive
index ad7139e..505aee0 100755
--- a/bin/hive
+++ b/bin/hive
@@ -171,7 +171,7 @@ export HADOOP_HOME_WARN_SUPPRESS=true
# pass classpath to hadoop
if [ "$HADOOP_CLASSPATH" != "" ]; then
- export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${CLASSPATH}"
+ export HADOOP_CLASSPATH="${CLASSPATH}:${HADOOP_CLASSPATH}"
else
export HADOOP_CLASSPATH="$CLASSPATH"
fi
http://git-wip-us.apache.org/repos/asf/hive/blob/e82bf253/packaging/src/main/assembly/bin.xml
----------------------------------------------------------------------
diff --git a/packaging/src/main/assembly/bin.xml b/packaging/src/main/assembly/bin.xml
index 0fa6af8..b21732b 100644
--- a/packaging/src/main/assembly/bin.xml
+++ b/packaging/src/main/assembly/bin.xml
@@ -42,6 +42,7 @@
<exclude>org.apache.hadoop:*</exclude>
<exclude>org.apache.hive.hcatalog:*</exclude>
<exclude>org.slf4j:*</exclude>
+ <exclude>log4j:*</exclude>
<exclude>commons-configuration:commons-configuration</exclude>
</excludes>
</dependencySet>
[11/25] hive git commit: HIVE-10328 : Loop optimization for SIMD in
IfExprColumnColumn.txt (Teddy Choi via Ashutosh Chauhan)
Posted by pr...@apache.org.
HIVE-10328 : Loop optimization for SIMD in IfExprColumnColumn.txt (Teddy Choi via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b98a60df
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b98a60df
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b98a60df
Branch: refs/heads/llap
Commit: b98a60df2f3779acf82d94965d11ed951b618fad
Parents: 6e8eeb7
Author: Teddy Choi <tc...@hortonworks.com>
Authored: Tue Aug 11 16:26:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Sep 23 10:45:02 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ant/GenVectorCode.java | 31 ----
.../vectorization/VectorizationBench.java | 32 +++-
.../ExpressionTemplates/IfExprColumnColumn.txt | 186 -------------------
.../IfExprDoubleColumnDoubleColumn.java | 167 +++++++++++++++++
.../expressions/IfExprLongColumnLongColumn.java | 166 +++++++++++++++++
.../hive/ql/udf/generic/GenericUDFIf.java | 4 +-
.../exec/vector/TestVectorizationContext.java | 4 +-
.../TestVectorConditionalExpressions.java | 3 +-
8 files changed, 369 insertions(+), 224 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
----------------------------------------------------------------------
diff --git a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
index fede273..ba7648c 100644
--- a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
+++ b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
@@ -788,8 +788,6 @@ public class GenVectorCode extends Task {
// IF conditional expression
// fileHeader, resultType, arg2Type, arg3Type
- {"IfExprColumnColumn", "long"},
- {"IfExprColumnColumn", "double"},
{"IfExprColumnScalar", "long", "long"},
{"IfExprColumnScalar", "double", "long"},
{"IfExprColumnScalar", "long", "double"},
@@ -1051,8 +1049,6 @@ public class GenVectorCode extends Task {
generateFilterStringGroupColumnCompareStringGroupColumn(tdesc);
} else if (tdesc[0].equals("StringGroupColumnCompareStringGroupColumn")) {
generateStringGroupColumnCompareStringGroupColumn(tdesc);
- } else if (tdesc[0].equals("IfExprColumnColumn")) {
- generateIfExprColumnColumn(tdesc);
} else if (tdesc[0].equals("IfExprColumnScalar")) {
generateIfExprColumnScalar(tdesc);
} else if (tdesc[0].equals("IfExprScalarColumn")) {
@@ -1644,33 +1640,6 @@ public class GenVectorCode extends Task {
className, templateString);
}
- private void generateIfExprColumnColumn(String[] tdesc) throws Exception {
- String operandType = tdesc[1];
- String inputColumnVectorType = this.getColumnVectorType(operandType);
- String outputColumnVectorType = inputColumnVectorType;
- String returnType = operandType;
- String className = "IfExpr" + getCamelCaseType(operandType) + "Column"
- + getCamelCaseType(operandType) + "Column";
- String outputFile = joinPath(this.expressionOutputDirectory, className + ".java");
- File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt"));
- String templateString = readFile(templateFile);
- // Expand, and write result
- templateString = templateString.replaceAll("<ClassName>", className);
- templateString = templateString.replaceAll("<InputColumnVectorType>", inputColumnVectorType);
- templateString = templateString.replaceAll("<OperandType>", operandType);
- String vectorExprArgType = operandType;
-
- // Toss in timestamp and date.
- if (operandType.equals("long")) {
- // Let comparisons occur for DATE and TIMESTAMP, too.
- vectorExprArgType = "int_datetime_interval_family";
- }
- templateString = templateString.replaceAll("<VectorExprArgType>", vectorExprArgType);
-
- writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory,
- className, templateString);
- }
-
private void generateIfExprColumnScalar(String[] tdesc) throws Exception {
String operandType2 = tdesc[1];
String operandType3 = tdesc[2];
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
index 0e880c6..dcd9501 100644
--- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
@@ -17,6 +17,8 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn;
@@ -40,6 +42,7 @@ import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
+import java.lang.Override;
import java.util.Random;
import java.util.concurrent.TimeUnit;
@@ -302,9 +305,36 @@ public class VectorizationBench {
}
}
+ public static class IfExprLongColumnLongColumnBench extends AbstractExpression {
+ @Override
+ public void setup() {
+ rowBatch = buildRowBatch(new LongColumnVector(), 3, getBooleanLongColumnVector(),
+ getLongColumnVector(), getLongColumnVector());
+ expression = new IfExprLongColumnLongColumn(0, 1, 2, 3);
+ }
+ }
+
+ public static class IfExprRepeatingLongColumnLongColumnBench extends AbstractExpression {
+ @Override
+ public void setup() {
+ rowBatch = buildRowBatch(new LongColumnVector(), 3, getBooleanLongColumnVector(),
+ getRepeatingLongColumnVector(), getLongColumnVector());
+ expression = new IfExprLongColumnLongColumn(0, 1, 2, 3);
+ }
+ }
+
+ public static class IfExprLongColumnRepeatingLongColumnBench extends AbstractExpression {
+ @Override
+ public void setup() {
+ rowBatch = buildRowBatch(new LongColumnVector(), 3, getBooleanLongColumnVector(),
+ getLongColumnVector(), getRepeatingLongColumnVector());
+ expression = new IfExprLongColumnLongColumn(0, 1, 2, 3);
+ }
+ }
+
public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder().include(".*" + VectorizationBench.class.getSimpleName() +
".*").build();
new Runner(opt).run();
}
-}
\ No newline at end of file
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt
deleted file mode 100644
index 27d769c..0000000
--- a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt
+++ /dev/null
@@ -1,186 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
-
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-
-/**
- * Compute IF(expr1, expr2, expr3) for 3 input column expressions.
- * The first is always a boolean (LongColumnVector).
- * The second and third are long columns or long expression results.
- */
-public class <ClassName> extends VectorExpression {
-
- private static final long serialVersionUID = 1L;
-
- private int arg1Column, arg2Column, arg3Column;
- private int outputColumn;
-
- public <ClassName>(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
- this.arg1Column = arg1Column;
- this.arg2Column = arg2Column;
- this.arg3Column = arg3Column;
- this.outputColumn = outputColumn;
- }
-
- public <ClassName>() {
- }
-
- @Override
- public void evaluate(VectorizedRowBatch batch) {
-
- if (childExpressions != null) {
- super.evaluateChildren(batch);
- }
-
- LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
- <InputColumnVectorType> arg2ColVector = (<InputColumnVectorType>) batch.cols[arg2Column];
- <InputColumnVectorType> arg3ColVector = (<InputColumnVectorType>) batch.cols[arg3Column];
- <InputColumnVectorType> outputColVector = (<InputColumnVectorType>) batch.cols[outputColumn];
- int[] sel = batch.selected;
- boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
- outputColVector.isRepeating = false; // may override later
- int n = batch.size;
- long[] vector1 = arg1ColVector.vector;
- <OperandType>[] vector2 = arg2ColVector.vector;
- <OperandType>[] vector3 = arg3ColVector.vector;
- <OperandType>[] outputVector = outputColVector.vector;
-
- // return immediately if batch is empty
- if (n == 0) {
- return;
- }
-
- /* All the code paths below propagate nulls even if neither arg2 nor arg3
- * have nulls. This is to reduce the number of code paths and shorten the
- * code, at the expense of maybe doing unnecessary work if neither input
- * has nulls. This could be improved in the future by expanding the number
- * of code paths.
- */
- if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
- arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
- } else {
- arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
- }
- return;
- }
-
- // extend any repeating values and noNulls indicator in the inputs
- arg2ColVector.flatten(batch.selectedInUse, sel, n);
- arg3ColVector.flatten(batch.selectedInUse, sel, n);
-
- if (arg1ColVector.noNulls) {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
- outputIsNull[i] = (vector1[i] == 1 ?
- arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
- outputIsNull[i] = (vector1[i] == 1 ?
- arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
- }
- }
- } else /* there are nulls */ {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
- vector2[i] : vector3[i]);
- outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
- arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
- vector2[i] : vector3[i]);
- outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
- arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
- }
- }
- }
-
- // restore repeating and no nulls indicators
- arg2ColVector.unFlatten();
- arg3ColVector.unFlatten();
- }
-
- @Override
- public int getOutputColumn() {
- return outputColumn;
- }
-
- @Override
- public String getOutputType() {
- return "<OperandType>";
- }
-
- public int getArg1Column() {
- return arg1Column;
- }
-
- public void setArg1Column(int colNum) {
- this.arg1Column = colNum;
- }
-
- public int getArg2Column() {
- return arg2Column;
- }
-
- public void setArg2Column(int colNum) {
- this.arg2Column = colNum;
- }
-
- public int getArg3Column() {
- return arg3Column;
- }
-
- public void setArg3Column(int colNum) {
- this.arg3Column = colNum;
- }
-
- public void setOutputColumn(int outputColumn) {
- this.outputColumn = outputColumn;
- }
-
- @Override
- public VectorExpressionDescriptor.Descriptor getDescriptor() {
- return (new VectorExpressionDescriptor.Builder())
- .setMode(
- VectorExpressionDescriptor.Mode.PROJECTION)
- .setNumArguments(3)
- .setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.getType("long"),
- VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType>"),
- VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType>"))
- .setInputExpressionTypes(
- VectorExpressionDescriptor.InputExpressionType.COLUMN,
- VectorExpressionDescriptor.InputExpressionType.COLUMN,
- VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
new file mode 100644
index 0000000..71c99f6
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
@@ -0,0 +1,167 @@
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Compute IF(expr1, expr2, expr3) for 3 input column expressions.
+ * The first is always a boolean (LongColumnVector).
+ * The second and third are long columns or long expression results.
+ */
+public class IfExprDoubleColumnDoubleColumn extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private int arg1Column, arg2Column, arg3Column;
+ private int outputColumn;
+
+ public IfExprDoubleColumnDoubleColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
+ this.outputColumn = outputColumn;
+ }
+
+ public IfExprDoubleColumnDoubleColumn() {
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
+ DoubleColumnVector arg2ColVector = (DoubleColumnVector) batch.cols[arg2Column];
+ DoubleColumnVector arg3ColVector = (DoubleColumnVector) batch.cols[arg3Column];
+ DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
+ outputColVector.isRepeating = false; // may override later
+ int n = batch.size;
+ long[] vector1 = arg1ColVector.vector;
+ double[] vector2 = arg2ColVector.vector;
+ double[] vector3 = arg3ColVector.vector;
+ double[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ /* All the code paths below propagate nulls even if neither arg2 nor arg3
+ * have nulls. This is to reduce the number of code paths and shorten the
+ * code, at the expense of maybe doing unnecessary work if neither input
+ * has nulls. This could be improved in the future by expanding the number
+ * of code paths.
+ */
+ if (arg1ColVector.isRepeating) {
+ if (vector1[0] == 1) {
+ arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ } else {
+ arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ }
+ return;
+ }
+
+ // extend any repeating values and noNulls indicator in the inputs
+ arg2ColVector.flatten(batch.selectedInUse, sel, n);
+ arg3ColVector.flatten(batch.selectedInUse, sel, n);
+
+ if (arg1ColVector.noNulls) {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
+ outputIsNull[i] = (vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
+ outputIsNull[i] = (vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ }
+ } else /* there are nulls */ {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ vector2[i] : vector3[i]);
+ outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ vector2[i] : vector3[i]);
+ outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ }
+ }
+
+ // restore repeating and no nulls indicators
+ arg2ColVector.unFlatten();
+ arg3ColVector.unFlatten();
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "double";
+ }
+
+ public int getArg1Column() {
+ return arg1Column;
+ }
+
+ public void setArg1Column(int colNum) {
+ this.arg1Column = colNum;
+ }
+
+ public int getArg2Column() {
+ return arg2Column;
+ }
+
+ public void setArg2Column(int colNum) {
+ this.arg2Column = colNum;
+ }
+
+ public int getArg3Column() {
+ return arg3Column;
+ }
+
+ public void setArg3Column(int colNum) {
+ this.arg3Column = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(3)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("long"),
+ VectorExpressionDescriptor.ArgumentType.getType("double"),
+ VectorExpressionDescriptor.ArgumentType.getType("double"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
new file mode 100644
index 0000000..00485a2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
@@ -0,0 +1,166 @@
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Compute IF(expr1, expr2, expr3) for 3 input column expressions.
+ * The first is always a boolean (LongColumnVector).
+ * The second and third are long columns or long expression results.
+ */
+public class IfExprLongColumnLongColumn extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private int arg1Column, arg2Column, arg3Column;
+ private int outputColumn;
+
+ public IfExprLongColumnLongColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
+ this.outputColumn = outputColumn;
+ }
+
+ public IfExprLongColumnLongColumn() {
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
+ LongColumnVector arg2ColVector = (LongColumnVector) batch.cols[arg2Column];
+ LongColumnVector arg3ColVector = (LongColumnVector) batch.cols[arg3Column];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
+ outputColVector.isRepeating = false; // may override later
+ int n = batch.size;
+ long[] vector1 = arg1ColVector.vector;
+ long[] vector2 = arg2ColVector.vector;
+ long[] vector3 = arg3ColVector.vector;
+ long[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ /* All the code paths below propagate nulls even if neither arg2 nor arg3
+ * have nulls. This is to reduce the number of code paths and shorten the
+ * code, at the expense of maybe doing unnecessary work if neither input
+ * has nulls. This could be improved in the future by expanding the number
+ * of code paths.
+ */
+ if (arg1ColVector.isRepeating) {
+ if (vector1[0] == 1) {
+ arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ } else {
+ arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ }
+ return;
+ }
+
+ // extend any repeating values and noNulls indicator in the inputs
+ arg2ColVector.flatten(batch.selectedInUse, sel, n);
+ arg3ColVector.flatten(batch.selectedInUse, sel, n);
+
+ if (arg1ColVector.noNulls) {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = (~(vector1[i] - 1) & vector2[i]) | ((vector1[i] - 1) & vector3[i]);
+ outputIsNull[i] = (vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = (~(vector1[i] - 1) & vector2[i]) | ((vector1[i] - 1) & vector3[i]);
+ outputIsNull[i] = (vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ }
+ } else /* there are nulls */ {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ vector2[i] : vector3[i]);
+ outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ vector2[i] : vector3[i]);
+ outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ }
+ }
+ }
+
+ // restore repeating and no nulls indicators
+ arg2ColVector.unFlatten();
+ arg3ColVector.unFlatten();
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "long";
+ }
+
+ public int getArg1Column() {
+ return arg1Column;
+ }
+
+ public void setArg1Column(int colNum) {
+ this.arg1Column = colNum;
+ }
+
+ public int getArg2Column() {
+ return arg2Column;
+ }
+
+ public void setArg2Column(int colNum) {
+ this.arg2Column = colNum;
+ }
+
+ public int getArg3Column() {
+ return arg3Column;
+ }
+
+ public void setArg3Column(int colNum) {
+ this.arg3Column = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(3)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("long"),
+ VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"),
+ VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
index 568fd46..b5e2837 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
@@ -28,8 +28,6 @@ import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnLongScalar;
@@ -42,6 +40,8 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLon
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnCharScalar;
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
index 8470c47..704c654 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
@@ -42,6 +42,8 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleTo
import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringGroupColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnCharScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar;
@@ -68,11 +70,9 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar;
http://git-wip-us.apache.org/repos/asf/hive/blob/b98a60df/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java
index a711b55..47ebe57 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java
@@ -24,8 +24,7 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar;
[13/25] hive git commit: HIVE-11911 : The stats table limits are too
large for innodb (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Posted by pr...@apache.org.
HIVE-11911 : The stats table limits are too large for innodb (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f73157fe
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f73157fe
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f73157fe
Branch: refs/heads/llap
Commit: f73157fe45a0c9ea7efeef11ca1c02e47136a63c
Parents: cdc65dc
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 23 14:39:23 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 23 14:39:23 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java | 13 +++++++++++--
.../hive/ql/stats/jdbc/JDBCStatsSetupConstants.java | 4 ++--
2 files changed, 13 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f73157fe/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
index 4228957..aeb3d27 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
@@ -289,7 +289,16 @@ public class JDBCStatsPublisher implements StatsPublisher {
boolean tblExists = rs.next();
if (!tblExists) { // Table does not exist, create it
String createTable = JDBCStatsUtils.getCreate("");
- stmt.executeUpdate(createTable);
+ try {
+ stmt.executeUpdate(createTable);
+ } catch (SQLException ex) {
+ String msg = ex.getMessage();
+ if (msg != null && msg.contains("Specified key was too long")) {
+ throw new RuntimeException(msg + "; try using innodb with "
+ + "Barracuda file format and innodb_large_prefix", ex);
+ }
+ throw ex;
+ }
} else {
// Upgrade column name to allow for longer paths.
String idColName = JDBCStatsUtils.getIdColumnName();
@@ -301,7 +310,7 @@ public class JDBCStatsPublisher implements StatsPublisher {
colSize = rs.getInt("COLUMN_SIZE");
if (colSize < JDBCStatsSetupConstants.ID_COLUMN_VARCHAR_SIZE) {
String alterTable = JDBCStatsUtils.getAlterIdColumn();
- stmt.executeUpdate(alterTable);
+ stmt.executeUpdate(alterTable);
}
} else {
LOG.warn("Failed to update " + idColName + " - column not found");
http://git-wip-us.apache.org/repos/asf/hive/blob/f73157fe/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
index 17e109a..e39fc5b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
@@ -34,6 +34,6 @@ public final class JDBCStatsSetupConstants {
public static final String PART_STAT_RAW_DATA_SIZE_COLUMN_NAME = "RAW_DATA_SIZE";
- // MySQL - 65535, SQL Server - 8000, Oracle - 4000, Derby - 32762, Postgres - large.
- public static final int ID_COLUMN_VARCHAR_SIZE = 4000;
+ // MySQL - 3072/3 (innodb+utf8), SQL Server - 8000, Oracle - 4000, Derby - 32762, Postgres - large.
+ public static final int ID_COLUMN_VARCHAR_SIZE = 1000;
}
[22/25] hive git commit: HIVE-11791 : Add unit test for HIVE-10122
(Illya Yalovyy via Ashutosh Chauhan, Gopal V)
Posted by pr...@apache.org.
HIVE-11791 : Add unit test for HIVE-10122 (Illya Yalovyy via Ashutosh Chauhan, Gopal V)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3eefcb54
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3eefcb54
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3eefcb54
Branch: refs/heads/llap
Commit: 3eefcb54f21222e5f3d3a1d097497a7e82429572
Parents: 41a12cb
Author: Illya Yalovyy <ya...@amazon.com>
Authored: Wed Sep 23 10:50:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Sep 25 07:43:23 2015 -0700
----------------------------------------------------------------------
.../hive/ql/optimizer/ppr/PartitionPruner.java | 26 +++--
.../TestNegativePartitionPrunerCompactExpr.java | 27 +++++
.../TestPositivePartitionPrunerCompactExpr.java | 115 +++++++++++++++++++
3 files changed, 161 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3eefcb54/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
index 8eab603..5644662 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
@@ -27,6 +27,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import com.google.common.annotations.VisibleForTesting;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.ObjectPair;
@@ -262,7 +264,8 @@ public class PartitionPruner implements Transform {
* @param expr original partition pruning expression.
* @return partition pruning expression that only contains partition columns.
*/
- static private ExprNodeDesc compactExpr(ExprNodeDesc expr) {
+ @VisibleForTesting
+ static ExprNodeDesc compactExpr(ExprNodeDesc expr) {
// If this is a constant boolean expression, return the value.
if (expr == null) {
return null;
@@ -298,40 +301,49 @@ public class PartitionPruner implements Transform {
allTrue = false;
}
}
-
+
+ if (allTrue) {
+ return new ExprNodeConstantDesc(Boolean.TRUE);
+ }
if (newChildren.size() == 0) {
return null;
}
if (newChildren.size() == 1) {
return newChildren.get(0);
}
- if (allTrue) {
- return new ExprNodeConstantDesc(Boolean.TRUE);
- }
+
// Nothing to compact, update expr with compacted children.
((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
} else if (isOr) {
// Non-partition expressions are converted to nulls.
List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
boolean allFalse = true;
+ boolean isNull = false;
for (ExprNodeDesc child : children) {
ExprNodeDesc compactChild = compactExpr(child);
if (compactChild != null) {
if (isTrueExpr(compactChild)) {
return new ExprNodeConstantDesc(Boolean.TRUE);
}
- if (!isFalseExpr(compactChild)) {
+ if (!isNull && !isFalseExpr(compactChild)) {
newChildren.add(compactChild);
allFalse = false;
}
} else {
- return null;
+ isNull = true;
}
}
+ if (isNull) {
+ return null;
+ }
if (allFalse) {
return new ExprNodeConstantDesc(Boolean.FALSE);
}
+ if (newChildren.size() == 1) {
+ return newChildren.get(0);
+ }
+
// Nothing to compact, update expr with compacted children.
((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/3eefcb54/ql/src/test/org/apache/hadoop/hive/ql/optimizer/ppr/TestNegativePartitionPrunerCompactExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/ppr/TestNegativePartitionPrunerCompactExpr.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/ppr/TestNegativePartitionPrunerCompactExpr.java
new file mode 100644
index 0000000..36a8e63
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/ppr/TestNegativePartitionPrunerCompactExpr.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2015 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.ppr;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.junit.Test;
+
+public final class TestNegativePartitionPrunerCompactExpr {
+
+ @Test(expected = IllegalStateException.class)
+ public void testCompactExprWhenConstNonBooleanThenException() {
+ PartitionPruner.compactExpr(new ExprNodeConstantDesc("Some String"));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/3eefcb54/ql/src/test/org/apache/hadoop/hive/ql/optimizer/ppr/TestPositivePartitionPrunerCompactExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/ppr/TestPositivePartitionPrunerCompactExpr.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/ppr/TestPositivePartitionPrunerCompactExpr.java
new file mode 100644
index 0000000..6830ea4
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/ppr/TestPositivePartitionPrunerCompactExpr.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2015 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.ppr;
+
+import java.util.Arrays;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import static org.junit.Assert.*;
+
+@RunWith(Parameterized.class)
+public final class TestPositivePartitionPrunerCompactExpr {
+
+ private final ExprNodeDesc expression;
+ private final String expected;
+
+ public TestPositivePartitionPrunerCompactExpr(ExprNodeDesc expression, String expected) {
+ this.expression = expression;
+ this.expected = expected;
+ }
+
+ @Parameterized.Parameters(name = "{index}: {0} => {1}")
+ public static Iterable<Object[]> data() {
+ ExprNodeDesc trueExpr = new ExprNodeConstantDesc(Boolean.TRUE);
+ ExprNodeDesc falseExpr = new ExprNodeConstantDesc(Boolean.FALSE);
+ ExprNodeDesc col1Expr = new ExprNodeColumnDesc(TypeInfoFactory.booleanTypeInfo, "col1", "t1", true);
+ ExprNodeDesc col2Expr = new ExprNodeColumnDesc(TypeInfoFactory.booleanTypeInfo, "col2", "t1", true);
+ ExprNodeDesc udf1Expr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPNull(), Arrays.<ExprNodeDesc>asList(col1Expr));
+ ExprNodeDesc udf2Expr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPNull(), Arrays.<ExprNodeDesc>asList(col2Expr));
+
+ return Arrays.asList(new Object[][]{
+ {null, null},
+ {and(null, null), null},
+ {and(falseExpr, null), "false"},
+ {and(null, falseExpr), "false"},
+ {and(trueExpr, null), null},
+ {and(null, trueExpr), null},
+ {and(udf1Expr, null), "col1 is null"},
+ {and(null, udf2Expr), "col2 is null"},
+ {and(udf1Expr, udf2Expr), "(col1 is null and col2 is null)"},
+ {and(falseExpr, falseExpr), "false"},
+ {and(trueExpr, falseExpr), "false"},
+ {and(falseExpr, trueExpr), "false"},
+ {and(udf1Expr, falseExpr), "false"},
+ {and(falseExpr, udf2Expr), "false"},
+ {and(trueExpr, trueExpr), "true"},
+ {and(udf1Expr, trueExpr), "col1 is null"},
+ {and(trueExpr, udf2Expr), "col2 is null"},
+ {or(null, null), null},
+ {or(falseExpr, null), null},
+ {or(null, falseExpr), null},
+ {or(trueExpr, null), "true"},
+ {or(null, trueExpr), "true"},
+ {or(udf1Expr, null), null},
+ {or(null, udf2Expr), null},
+ {or(udf1Expr, udf2Expr), "(col1 is null or col2 is null)"},
+ {or(falseExpr, falseExpr), "false"},
+ {or(trueExpr, falseExpr), "true"},
+ {or(falseExpr, trueExpr), "true"},
+ {or(udf1Expr, falseExpr), "col1 is null"},
+ {or(falseExpr, udf2Expr), "col2 is null"},
+ {or(trueExpr, trueExpr), "true"},
+ {or(udf1Expr, trueExpr), "true"},
+ {or(trueExpr, udf2Expr), "true"},
+ {or(and(udf1Expr, udf2Expr), udf2Expr), "((col1 is null and col2 is null) or col2 is null)"},
+ {and(or(udf1Expr, udf2Expr), udf2Expr), "((col1 is null or col2 is null) and col2 is null)"},
+ });
+ }
+
+ @Test
+ public void testCompactExpr() {
+ ExprNodeDesc actual = PartitionPruner.compactExpr(expression);
+ if (expected == null) {
+ assertNull(actual);
+ } else {
+ assertNotNull("Expected not NULL expression", actual);
+ assertNotNull("Expected not NULL expression string", actual.getExprString());
+ assertEquals(expected, actual.getExprString());
+ }
+ }
+
+ private static ExprNodeDesc or(ExprNodeDesc left, ExprNodeDesc right) {
+ return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPOr(), Arrays.<ExprNodeDesc>asList(left, right));
+ }
+
+ private static ExprNodeDesc and(ExprNodeDesc left, ExprNodeDesc right) {
+ return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPAnd(), Arrays.<ExprNodeDesc>asList(left, right));
+ }
+}
[02/25] hive git commit: HIVE-11902 - Abort txn cleanup thread throws
SyntaxErrorException (Deepesh Khandelwal via Eugene Koifman)
Posted by pr...@apache.org.
HIVE-11902 - Abort txn cleanup thread throws SyntaxErrorException (Deepesh Khandelwal via Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5a5539c3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5a5539c3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5a5539c3
Branch: refs/heads/llap
Commit: 5a5539c36ef2e473edb143dc4320f33e7f380891
Parents: 44741da
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Tue Sep 22 15:44:16 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Tue Sep 22 15:44:32 2015 -0700
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5a5539c3/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 9ecb82a..8597d9f 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -1302,6 +1302,9 @@ public class TxnHandler {
private int abortTxns(Connection dbConn, List<Long> txnids) throws SQLException {
Statement stmt = null;
int updateCnt = 0;
+ if (txnids.isEmpty()) {
+ return 0;
+ }
try {
stmt = dbConn.createStatement();
@@ -1921,7 +1924,7 @@ public class TxnHandler {
abortTxns(dbConn, batchToAbort);
dbConn.commit();
//todo: add TXNS.COMMENT filed and set it to 'aborted by system due to timeout'
- LOG.info("Aborted the following transactions due to timeout: " + timedOutTxns.toString());
+ LOG.info("Aborted the following transactions due to timeout: " + batchToAbort.toString());
}
int numTxnsAborted = (timedOutTxns.size() - 1) * TIMED_OUT_TXN_ABORT_BATCH_SIZE +
timedOutTxns.get(timedOutTxns.size() - 1).size();
[23/25] hive git commit: HIVE-11939: TxnDbUtil should turn off jdbc
auto commit (Jimmy, reviewed by Alan)
Posted by pr...@apache.org.
HIVE-11939: TxnDbUtil should turn off jdbc auto commit (Jimmy, reviewed by Alan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1c0a314b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1c0a314b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1c0a314b
Branch: refs/heads/llap
Commit: 1c0a314be99cdd300c836fa80803f885b4954fca
Parents: 3eefcb5
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Wed Sep 23 13:41:49 2015 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Fri Sep 25 08:04:11 2015 -0700
----------------------------------------------------------------------
.../src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1c0a314b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java
index 37808ce..c465c84 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java
@@ -229,7 +229,9 @@ public final class TxnDbUtil {
ShimLoader.getHadoopShims().getPassword(conf, HiveConf.ConfVars.METASTOREPWD.varname);
prop.setProperty("user", user);
prop.setProperty("password", passwd);
- return driver.connect(driverUrl, prop);
+ Connection conn = driver.connect(driverUrl, prop);
+ conn.setAutoCommit(false);
+ return conn;
}
private static void closeResources(Connection conn, Statement stmt, ResultSet rs) {
[03/25] hive git commit: HIVE-11762: TestHCatLoaderEncryption
failures when using Hadoop 2.7 (Jason Dere, reviewed by Sergio Pena)
Posted by pr...@apache.org.
HIVE-11762: TestHCatLoaderEncryption failures when using Hadoop 2.7 (Jason Dere, reviewed by Sergio Pena)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1c52a7e7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1c52a7e7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1c52a7e7
Branch: refs/heads/llap
Commit: 1c52a7e72ab9c2a27902592599bc588e9e3d8be8
Parents: 5a5539c
Author: Jason Dere <jd...@hortonworks.com>
Authored: Tue Sep 22 16:31:07 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Tue Sep 22 16:31:07 2015 -0700
----------------------------------------------------------------------
shims/0.23/pom.xml | 1 -
.../apache/hadoop/hive/shims/Hadoop23Shims.java | 23 +++++++++++++++++++-
2 files changed, 22 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1c52a7e7/shims/0.23/pom.xml
----------------------------------------------------------------------
diff --git a/shims/0.23/pom.xml b/shims/0.23/pom.xml
index 2e16956..3b1fb97 100644
--- a/shims/0.23/pom.xml
+++ b/shims/0.23/pom.xml
@@ -61,7 +61,6 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop-23.version}</version>
- <optional>true</optional>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
http://git-wip-us.apache.org/repos/asf/hive/blob/1c52a7e7/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 83369ee..c08e76d 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -532,13 +532,34 @@ public class Hadoop23Shims extends HadoopShimsSecure {
// else the updates do not get flushed properly
KeyProviderCryptoExtension keyProvider = miniDFSCluster.getNameNode().getNamesystem().getProvider();
if (keyProvider != null) {
- miniDFSCluster.getFileSystem().getClient().setKeyProvider(keyProvider);
+ try {
+ setKeyProvider(miniDFSCluster.getFileSystem().getClient(), keyProvider);
+ } catch (Exception err) {
+ throw new IOException(err);
+ }
}
cluster = new MiniDFSShim(miniDFSCluster);
return cluster;
}
+ private static void setKeyProvider(DFSClient dfsClient, KeyProviderCryptoExtension provider)
+ throws Exception {
+ Method setKeyProviderHadoop27Method = null;
+ try {
+ setKeyProviderHadoop27Method = DFSClient.class.getMethod("setKeyProvider", KeyProvider.class);
+ } catch (NoSuchMethodException err) {
+ // We can just use setKeyProvider() as it is
+ }
+
+ if (setKeyProviderHadoop27Method != null) {
+ // Method signature changed in Hadoop 2.7. Cast provider to KeyProvider
+ setKeyProviderHadoop27Method.invoke(dfsClient, (KeyProvider) provider);
+ } else {
+ dfsClient.setKeyProvider(provider);
+ }
+ }
+
/**
* MiniDFSShim.
*