You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/09/30 21:29:13 UTC
[01/14] hive git commit: HIVE-11134 - HS2 should log open session
failure (Thejas Nair via Vaibhav Gumashta)
Repository: hive
Updated Branches:
refs/heads/llap 7273a4c44 -> f272ccb25
HIVE-11134 - HS2 should log open session failure (Thejas Nair via Vaibhav Gumashta)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/39214581
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/39214581
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/39214581
Branch: refs/heads/llap
Commit: 39214581297d4df20599a36afdcc84dca70caacb
Parents: 1cf7e25
Author: Thejas Nair <th...@hortonworks.com>
Authored: Sat Jun 27 07:04:40 2015 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Mon Sep 28 13:44:36 2015 -0700
----------------------------------------------------------------------
.../org/apache/hive/service/cli/session/SessionManager.java | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/39214581/service/src/java/org/apache/hive/service/cli/session/SessionManager.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java
index 77c5e66..1119fd3 100644
--- a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java
+++ b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java
@@ -289,13 +289,14 @@ public class SessionManager extends CompositeService {
try {
session.open(sessionConf);
} catch (Exception e) {
+ LOG.warn("Failed to open session", e);
try {
session.close();
} catch (Throwable t) {
LOG.warn("Error closing session", t);
}
session = null;
- throw new HiveSQLException("Failed to open new session: " + e, e);
+ throw new HiveSQLException("Failed to open new session: " + e.getMessage(), e);
}
if (isOperationLogEnabled) {
session.setOperationLogSessionDir(operationLogRootDir);
@@ -303,13 +304,14 @@ public class SessionManager extends CompositeService {
try {
executeSessionHooks(session);
} catch (Exception e) {
+ LOG.warn("Failed to execute session hooks", e);
try {
session.close();
} catch (Throwable t) {
LOG.warn("Error closing session", t);
}
session = null;
- throw new HiveSQLException("Failed to execute session hooks", e);
+ throw new HiveSQLException("Failed to execute session hooks: " + e.getMessage(), e);
}
handleToSession.put(session.getSessionHandle(), session);
return session.getSessionHandle();
[04/14] hive git commit: HIVE-11468: (addendum) Vectorize Struct IN()
clauses (Matt McCline, via Gopal V)
Posted by se...@apache.org.
HIVE-11468: (addendum) Vectorize Struct IN() clauses (Matt McCline, via Gopal V)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b801d12c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b801d12c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b801d12c
Branch: refs/heads/llap
Commit: b801d12cb43c48d45731aaafccc06f14484fc6ab
Parents: a5ffa71
Author: Gopal V <go...@apache.org>
Authored: Tue Sep 29 14:57:54 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Tue Sep 29 14:57:54 2015 -0700
----------------------------------------------------------------------
.../ql/optimizer/physical/Vectorizer.java.rej | 86 --------------------
1 file changed, 86 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b801d12c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
deleted file mode 100644
index 5a10b58..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
+++ /dev/null
@@ -1,86 +0,0 @@
-***************
-*** 1255,1272 ****
- LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
- return false;
- }
- if (desc instanceof ExprNodeGenericFuncDesc) {
- ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
- boolean r = validateGenericUdf(d);
- if (!r) {
- return false;
- }
- }
- if (desc.getChildren() != null) {
-- for (ExprNodeDesc d: desc.getChildren()) {
-- // Don't restrict child expressions for projection. Always use looser FILTER mode.
-- boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
-- if (!r) {
- return false;
- }
- }
---- 1265,1329 ----
- LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
- return false;
- }
-+ boolean isInExpression = false;
- if (desc instanceof ExprNodeGenericFuncDesc) {
- ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
- boolean r = validateGenericUdf(d);
- if (!r) {
- return false;
- }
-+ GenericUDF genericUDF = d.getGenericUDF();
-+ isInExpression = (genericUDF instanceof GenericUDFIn);
- }
- if (desc.getChildren() != null) {
-+ if (isInExpression &&
-+ desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
-+ boolean r = validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER);
-+ } else {
-+ for (ExprNodeDesc d: desc.getChildren()) {
-+ // Don't restrict child expressions for projection. Always use looser FILTER mode.
-+ boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
-+ if (!r) {
-+ return false;
-+ }
-+ }
-+ }
-+ }
-+ return true;
-+ }
-+
-+ private boolean validateStructInExpression(ExprNodeDesc desc,
-+ VectorExpressionDescriptor.Mode mode) {
-+
-+ for (ExprNodeDesc d: desc.getChildren()) {
-+ TypeInfo typeInfo = d.getTypeInfo();
-+ if (typeInfo.getCategory() != Category.STRUCT){
-+ return false;
-+ }
-+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
-+
-+ ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
-+ ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
-+ final int fieldCount = fieldTypeInfos.size();
-+ for (int f = 0; f < fieldCount; f++) {
-+ TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
-+ Category category = fieldTypeInfo.getCategory();
-+ if (category != Category.PRIMITIVE){
-+ LOG.info("Cannot vectorize struct field " + fieldNames.get(f) +
-+ " of type " + fieldTypeInfo.getTypeName());
-+ return false;
-+ }
-+ PrimitiveTypeInfo fieldPrimitiveTypeInfo = (PrimitiveTypeInfo) fieldTypeInfo;
-+ InConstantType inConstantType =
-+ VectorizationContext.getInConstantTypeFromPrimitiveCategory(
-+ fieldPrimitiveTypeInfo.getPrimitiveCategory());
-+
-+ // For now, limit the data types we support for Vectorized Struct IN().
-+ if (inConstantType != InConstantType.INT_FAMILY &&
-+ inConstantType != InConstantType.FLOAT_FAMILY &&
-+ inConstantType != InConstantType.STRING_FAMILY) {
-+ LOG.info("Cannot vectorize struct field " + fieldNames.get(f) +
-+ " of type " + fieldTypeInfo.getTypeName());
- return false;
- }
- }
[12/14] hive git commit: HIVE-11920: ADD JAR failing with URL schemes
other than file/ivy/hdfs (Jason Dere, reviewed by Hari Subramaniyan)
Posted by se...@apache.org.
HIVE-11920: ADD JAR failing with URL schemes other than file/ivy/hdfs (Jason Dere, reviewed by Hari Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8c8cc19f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8c8cc19f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8c8cc19f
Branch: refs/heads/llap
Commit: 8c8cc19fd4b2ddfc616905a021fd0588878ab121
Parents: 265e42c
Author: Jason Dere <jd...@hortonworks.com>
Authored: Wed Sep 30 11:21:31 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Wed Sep 30 11:21:31 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/session/SessionState.java | 9 ++-------
ql/src/test/queries/clientpositive/add_jar_pfile.q | 8 ++++++++
ql/src/test/results/clientpositive/add_jar_pfile.q.out | 12 ++++++++++++
3 files changed, 22 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8cc19f/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 0bd347c..dc8c336 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -1252,11 +1252,8 @@ public class SessionState {
String scheme = uri.getScheme() == null ? null : uri.getScheme().toLowerCase();
if (scheme == null || scheme.equals("file")) {
return "file";
- } else if (scheme.equals("hdfs") || scheme.equals("ivy")) {
- return scheme;
- } else {
- throw new RuntimeException("invalid url: " + uri + ", expecting ( file | hdfs | ivy) as url scheme. ");
}
+ return scheme;
}
List<URI> resolveAndDownload(ResourceType t, String value, boolean convertToUnix) throws URISyntaxException,
@@ -1266,10 +1263,8 @@ public class SessionState {
return Arrays.asList(uri);
} else if (getURLType(value).equals("ivy")) {
return dependencyResolver.downloadDependencies(uri);
- } else if (getURLType(value).equals("hdfs")) {
- return Arrays.asList(createURI(downloadResource(value, convertToUnix)));
} else {
- throw new RuntimeException("Invalid url " + uri);
+ return Arrays.asList(createURI(downloadResource(value, convertToUnix)));
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8cc19f/ql/src/test/queries/clientpositive/add_jar_pfile.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/add_jar_pfile.q b/ql/src/test/queries/clientpositive/add_jar_pfile.q
new file mode 100644
index 0000000..ed55518
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/add_jar_pfile.q
@@ -0,0 +1,8 @@
+
+dfs -copyFromLocal ${system:maven.local.repository}/org/apache/hive/hive-contrib/${system:hive.version}/hive-contrib-${system:hive.version}.jar pfile://${system:test.tmp.dir}/hive-contrib-${system:hive.version}.jar;
+
+add jar pfile://${system:test.tmp.dir}/hive-contrib-${system:hive.version}.jar;
+
+CREATE TEMPORARY FUNCTION example_add AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd';
+
+DROP TEMPORARY FUNCTION example_add;
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8cc19f/ql/src/test/results/clientpositive/add_jar_pfile.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/add_jar_pfile.q.out b/ql/src/test/results/clientpositive/add_jar_pfile.q.out
new file mode 100644
index 0000000..60c65cc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/add_jar_pfile.q.out
@@ -0,0 +1,12 @@
+PREHOOK: query: CREATE TEMPORARY FUNCTION example_add AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: example_add
+POSTHOOK: query: CREATE TEMPORARY FUNCTION example_add AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: example_add
+PREHOOK: query: DROP TEMPORARY FUNCTION example_add
+PREHOOK: type: DROPFUNCTION
+PREHOOK: Output: example_add
+POSTHOOK: query: DROP TEMPORARY FUNCTION example_add
+POSTHOOK: type: DROPFUNCTION
+POSTHOOK: Output: example_add
[14/14] hive git commit: HIVE-12000 : LLAP: Merge master into branch
(Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-12000 : LLAP: Merge master into branch (Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f272ccb2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f272ccb2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f272ccb2
Branch: refs/heads/llap
Commit: f272ccb25bc495d600a713058f6c2082ebd6d966
Parents: 7273a4c 064e37c
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 30 12:22:32 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 30 12:22:32 2015 -0700
----------------------------------------------------------------------
.../common/metrics/common/MetricsConstant.java | 1 +
.../hive/hcatalog/templeton/AppConfig.java | 21 +++
.../apache/hive/hcatalog/templeton/Server.java | 12 +-
itests/qtest/pom.xml | 2 +-
.../test/resources/testconfiguration.properties | 27 ++-
.../hadoop/hive/ql/io/orc/MetadataReader.java | 2 +-
.../hive/ql/io/orc/MetadataReaderImpl.java | 20 ++-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 49 ++++++
.../hadoop/hive/ql/io/orc/ReaderImpl.java | 12 +-
.../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 35 ++++
.../hive/ql/io/orc/RecordReaderUtils.java | 5 +-
.../hive/ql/optimizer/StatsOptimizer.java | 46 ++++-
.../ql/optimizer/calcite/RelOptHiveTable.java | 15 ++
.../ql/optimizer/physical/Vectorizer.java.rej | 86 ----------
.../hadoop/hive/ql/session/SessionState.java | 9 +-
.../test/queries/clientpositive/add_jar_pfile.q | 8 +
.../clientpositive/metadata_only_queries.q | 15 ++
.../results/clientpositive/add_jar_pfile.q.out | 12 ++
.../clientpositive/metadata_only_queries.q.out | 158 +++++++++++++++++
.../spark/metadata_only_queries.q.out | 170 +++++++++++++++++++
.../tez/metadata_only_queries.q.out | 170 +++++++++++++++++++
.../tez/vector_groupby_reduce.q.out | 70 +++++---
.../clientpositive/vector_groupby_reduce.q.out | 69 +++++---
.../hive/service/cli/operation/Operation.java | 11 ++
.../service/cli/operation/OperationManager.java | 11 ++
.../service/cli/session/HiveSessionProxy.java | 6 +
.../service/cli/session/SessionManager.java | 6 +-
.../thrift/EmbeddedThriftBinaryCLIService.java | 2 +-
.../thrift/ThreadPoolExecutorWithOomHook.java | 55 ++++++
.../cli/thrift/ThriftBinaryCLIService.java | 12 +-
.../service/cli/thrift/ThriftCLIService.java | 3 +
.../cli/thrift/ThriftHttpCLIService.java | 10 +-
.../apache/hive/service/server/HiveServer2.java | 12 +-
.../hive/service/auth/TestPlainSaslHelper.java | 2 +-
.../session/TestPluggableHiveSessionImpl.java | 2 +-
.../cli/session/TestSessionGlobalInitFile.java | 2 +-
36 files changed, 957 insertions(+), 191 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/itests/qtest/pom.xml
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
index 35cc05f,43d2933..cea324c
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
@@@ -18,17 -18,102 +18,17 @@@
package org.apache.hadoop.hive.ql.io.orc;
import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.DiskRange;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeFooter;
-import com.google.common.collect.Lists;
+public interface MetadataReader {
+ RecordReaderImpl.Index readRowIndex(StripeInformation stripe, StripeFooter footer,
+ boolean[] included, RowIndex[] indexes, boolean[] sargColumns,
+ BloomFilterIndex[] bloomFilterIndices) throws IOException;
-public class MetadataReader {
- private final FSDataInputStream file;
- private final CompressionCodec codec;
- private final int bufferSize;
- private final int typeCount;
+ StripeFooter readStripeFooter(StripeInformation stripe) throws IOException;
- public MetadataReader(FileSystem fileSystem, Path path,
- CompressionCodec codec, int bufferSize, int typeCount) throws IOException {
- this(fileSystem.open(path), codec, bufferSize, typeCount);
- }
-
- public MetadataReader(FSDataInputStream file,
- CompressionCodec codec, int bufferSize, int typeCount) {
- this.file = file;
- this.codec = codec;
- this.bufferSize = bufferSize;
- this.typeCount = typeCount;
- }
-
- public RecordReaderImpl.Index readRowIndex(StripeInformation stripe, OrcProto.StripeFooter footer,
- boolean[] included, OrcProto.RowIndex[] indexes, boolean[] sargColumns,
- OrcProto.BloomFilterIndex[] bloomFilterIndices) throws IOException {
- if (footer == null) {
- footer = readStripeFooter(stripe);
- }
- if (indexes == null) {
- indexes = new OrcProto.RowIndex[typeCount];
- }
- if (bloomFilterIndices == null) {
- bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
- }
- long offset = stripe.getOffset();
- List<OrcProto.Stream> streams = footer.getStreamsList();
- for (int i = 0; i < streams.size(); i++) {
- OrcProto.Stream stream = streams.get(i);
- OrcProto.Stream nextStream = null;
- if (i < streams.size() - 1) {
- nextStream = streams.get(i+1);
- }
- int col = stream.getColumn();
- int len = (int) stream.getLength();
- // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
- // filter and combine the io to read row index and bloom filters for that column together
- if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) {
- boolean readBloomFilter = false;
- if (sargColumns != null && sargColumns[col] &&
- nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
- len += nextStream.getLength();
- i += 1;
- readBloomFilter = true;
- }
- if ((included == null || included[col]) && indexes[col] == null) {
- byte[] buffer = new byte[len];
- file.readFully(offset, buffer, 0, buffer.length);
- ByteBuffer[] bb = new ByteBuffer[] {ByteBuffer.wrap(buffer)};
- indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
- bb, new long[]{0}, stream.getLength(), codec, bufferSize));
- if (readBloomFilter) {
- bb[0].position((int) stream.getLength());
- bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(
- InStream.create("bloom_filter", bb, new long[]{0}, nextStream.getLength(),
- codec, bufferSize));
- }
- }
- }
- offset += len;
- }
-
- RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices);
- return index;
- }
-
- public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
- long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
- int tailLength = (int) stripe.getFooterLength();
-
- // read the footer
- ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
- file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
- return OrcProto.StripeFooter.parseFrom(InStream.create("footer",
- Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
- tailLength, codec, bufferSize));
- }
-
- public void close() throws IOException {
- file.close();
- }
+ void close() throws IOException;
- }
+ }
http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
index 83594f7,0000000..1456df3
mode 100644,000000..100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
@@@ -1,119 -1,0 +1,123 @@@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
+
+import com.google.common.collect.Lists;
+
+public class MetadataReaderImpl implements MetadataReader {
+ private final FSDataInputStream file;
+ private final CompressionCodec codec;
+ private final int bufferSize;
+ private final int typeCount;
+
- public MetadataReaderImpl(FileSystem fileSystem, Path path, CompressionCodec codec,
- int bufferSize, int typeCount) throws IOException {
- this.file = fileSystem.open(path);
++ public MetadataReaderImpl(FileSystem fileSystem, Path path,
++ CompressionCodec codec, int bufferSize, int typeCount) throws IOException {
++ this(fileSystem.open(path), codec, bufferSize, typeCount);
++ }
++
++ public MetadataReaderImpl(FSDataInputStream file,
++ CompressionCodec codec, int bufferSize, int typeCount) {
++ this.file = file;
+ this.codec = codec;
+ this.bufferSize = bufferSize;
+ this.typeCount = typeCount;
+ }
+
+ @Override
+ public RecordReaderImpl.Index readRowIndex(StripeInformation stripe,
+ OrcProto.StripeFooter footer, boolean[] included, OrcProto.RowIndex[] indexes,
+ boolean[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices) throws IOException {
+ if (footer == null) {
+ footer = readStripeFooter(stripe);
+ }
+ if (indexes == null) {
+ indexes = new OrcProto.RowIndex[typeCount];
+ }
+ if (bloomFilterIndices == null) {
+ bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
+ }
+ long offset = stripe.getOffset();
+ List<OrcProto.Stream> streams = footer.getStreamsList();
+ for (int i = 0; i < streams.size(); i++) {
+ OrcProto.Stream stream = streams.get(i);
+ OrcProto.Stream nextStream = null;
+ if (i < streams.size() - 1) {
+ nextStream = streams.get(i+1);
+ }
+ int col = stream.getColumn();
+ int len = (int) stream.getLength();
+ // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
+ // filter and combine the io to read row index and bloom filters for that column together
+ if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) {
+ boolean readBloomFilter = false;
+ if (sargColumns != null && sargColumns[col] &&
+ nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
+ len += nextStream.getLength();
+ i += 1;
+ readBloomFilter = true;
+ }
+ if ((included == null || included[col]) && indexes[col] == null) {
+ byte[] buffer = new byte[len];
++ file.readFully(offset, buffer, 0, buffer.length);
+ ByteBuffer bb = ByteBuffer.wrap(buffer);
- file.seek(offset);
- file.readFully(buffer);
+ indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create(null, "index",
+ Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), stream.getLength(),
- codec, bufferSize));
++ codec, bufferSize));
+ if (readBloomFilter) {
+ bb.position((int) stream.getLength());
+ bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(InStream.create(
+ null, "bloom_filter", Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)),
+ nextStream.getLength(), codec, bufferSize));
+ }
+ }
+ }
+ offset += len;
+ }
+
+ RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices);
+ return index;
+ }
+
+ @Override
+ public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
+ long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
+ int tailLength = (int) stripe.getFooterLength();
++
+ // read the footer
+ ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
- file.seek(offset);
- file.readFully(tailBuf.array(), tailBuf.arrayOffset(), tailLength);
++ file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
+ return OrcProto.StripeFooter.parseFrom(InStream.create(null, "footer",
+ Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
+ tailLength, codec, bufferSize));
+ }
+
+ @Override
+ public void close() throws IOException {
+ file.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
----------------------------------------------------------------------
[07/14] hive git commit: HIVE-11937: Improve StatsOptimizer to deal
with query with additional constant columns (Pengcheng Xiong,
reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-11937: Improve StatsOptimizer to deal with query with additional constant columns (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cdaf3567
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cdaf3567
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cdaf3567
Branch: refs/heads/llap
Commit: cdaf356740195cde6f5b6bfdade2f614e1c618d3
Parents: 6a8d7e4
Author: Pengcheng Xiong <px...@apache.org>
Authored: Tue Sep 29 17:47:39 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Tue Sep 29 17:47:39 2015 -0700
----------------------------------------------------------------------
.../hive/ql/optimizer/StatsOptimizer.java | 46 ++++-
.../clientpositive/metadata_only_queries.q | 15 ++
.../clientpositive/metadata_only_queries.q.out | 158 +++++++++++++++++
.../spark/metadata_only_queries.q.out | 170 +++++++++++++++++++
.../tez/metadata_only_queries.q.out | 170 +++++++++++++++++++
5 files changed, 552 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index bc8d8f7..5a21e6b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -235,10 +237,23 @@ public class StatsOptimizer implements Transform {
return null;
}
Operator<?> last = (Operator<?>) stack.get(5);
+ SelectOperator cselOp = null;
+ Map<Integer,Object> posToConstant = new HashMap<>();
if (last instanceof SelectOperator) {
- SelectOperator cselOp = (SelectOperator) last;
+ cselOp = (SelectOperator) last;
if (!cselOp.isIdentitySelect()) {
- return null; // todo we can do further by providing operator to fetch task
+ for (int pos = 0; pos < cselOp.getConf().getColList().size(); pos++) {
+ ExprNodeDesc desc = cselOp.getConf().getColList().get(pos);
+ if (desc instanceof ExprNodeConstantDesc) {
+ //We store the position to the constant value for later use.
+ posToConstant.put(pos, ((ExprNodeConstantDesc)desc).getValue());
+ } else {
+ if (!(desc instanceof ExprNodeColumnDesc)) {
+ // Probably an expression, cant handle that
+ return null;
+ }
+ }
+ }
}
last = (Operator<?>) stack.get(6);
}
@@ -588,13 +603,30 @@ public class StatsOptimizer implements Transform {
List<List<Object>> allRows = new ArrayList<List<Object>>();
- allRows.add(oneRow);
-
List<String> colNames = new ArrayList<String>();
List<ObjectInspector> ois = new ArrayList<ObjectInspector>();
- for (ColumnInfo colInfo: cgbyOp.getSchema().getSignature()) {
- colNames.add(colInfo.getInternalName());
- ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+ if (cselOp == null) {
+ allRows.add(oneRow);
+ for (ColumnInfo colInfo : cgbyOp.getSchema().getSignature()) {
+ colNames.add(colInfo.getInternalName());
+ ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+ }
+ } else {
+ int aggrPos = 0;
+ List<Object> oneRowWithConstant = new ArrayList<>();
+ for (int pos = 0; pos < cselOp.getSchema().getSignature().size(); pos++) {
+ if (posToConstant.containsKey(pos)) {
+ // This position is a constant.
+ oneRowWithConstant.add(posToConstant.get(pos));
+ } else {
+ // This position is an aggregation.
+ oneRowWithConstant.add(oneRow.get(aggrPos++));
+ }
+ ColumnInfo colInfo = cselOp.getSchema().getSignature().get(pos);
+ colNames.add(colInfo.getInternalName());
+ ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+ }
+ allRows.add(oneRowWithConstant);
}
StandardStructObjectInspector sOI = ObjectInspectorFactory.
getStandardStructObjectInspector(colNames, ois);
http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/queries/clientpositive/metadata_only_queries.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/metadata_only_queries.q b/ql/src/test/queries/clientpositive/metadata_only_queries.q
index 56f3a78..70fac92 100644
--- a/ql/src/test/queries/clientpositive/metadata_only_queries.q
+++ b/ql/src/test/queries/clientpositive/metadata_only_queries.q
@@ -57,6 +57,11 @@ select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), co
explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
+explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
+explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
+
analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin;
analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
@@ -69,6 +74,12 @@ explain
select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl;
select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl;
+explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl;
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl;
+
+
+
explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part;
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part;
@@ -76,6 +87,10 @@ explain
select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part;
select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part;
+explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part;
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part;
+
explain select count(ts) from stats_tbl_part;
drop table stats_tbl;
http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
index 2dcd437..65a4dfa 100644
--- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
@@ -276,6 +276,114 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl_part
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
PREHOOK: type: QUERY
PREHOOK: Input: default@stats_tbl
@@ -364,6 +472,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
PREHOOK: type: QUERY
@@ -414,6 +547,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl_part
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain select count(ts) from stats_tbl_part
PREHOOK: type: QUERY
POSTHOOK: query: explain select count(ts) from stats_tbl_part
http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
index b2221fc..0d85f4e 100644
--- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
@@ -288,6 +288,126 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl_part
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
PREHOOK: type: QUERY
PREHOOK: Input: default@stats_tbl
@@ -376,6 +496,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
PREHOOK: type: QUERY
@@ -426,6 +571,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl_part
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain select count(ts) from stats_tbl_part
PREHOOK: type: QUERY
POSTHOOK: query: explain select count(ts) from stats_tbl_part
http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
index f43440e..ab86ab0 100644
--- a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
@@ -288,6 +288,126 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl_part
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
PREHOOK: type: QUERY
PREHOOK: Input: default@stats_tbl
@@ -376,6 +496,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
PREHOOK: type: QUERY
@@ -426,6 +571,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl_part
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain select count(ts) from stats_tbl_part
PREHOOK: type: QUERY
POSTHOOK: query: explain select count(ts) from stats_tbl_part
[08/14] hive git commit: HIVE-11984: Add HS2 open operation metrics
(Jimmy, reviewed by Szehon)
Posted by se...@apache.org.
HIVE-11984: Add HS2 open operation metrics (Jimmy, reviewed by Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1cb30733
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1cb30733
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1cb30733
Branch: refs/heads/llap
Commit: 1cb307336c32104f00351a7aa1282c3fdc7f12d0
Parents: cdaf356
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Mon Sep 28 15:24:44 2015 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Wed Sep 30 07:49:02 2015 -0700
----------------------------------------------------------------------
.../hive/common/metrics/common/MetricsConstant.java | 1 +
.../apache/hive/service/cli/operation/Operation.java | 13 ++++++++++++-
.../hive/service/cli/operation/OperationManager.java | 11 +++++++++++
3 files changed, 24 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1cb30733/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
index d1ebe12..13c3cf9 100644
--- a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
+++ b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
@@ -27,6 +27,7 @@ public class MetricsConstant {
public static String JVM_EXTRA_SLEEP = "jvm.pause.extraSleepTime";
public static String OPEN_CONNECTIONS = "open_connections";
+ public static String OPEN_OPERATIONS = "open_operations";
public static String JDO_ACTIVE_TRANSACTIONS = "active_jdo_transactions";
public static String JDO_ROLLBACK_TRANSACTIONS = "rollbacked_jdo_transactions";
http://git-wip-us.apache.org/repos/asf/hive/blob/1cb30733/service/src/java/org/apache/hive/service/cli/operation/Operation.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/operation/Operation.java b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
index 19153b6..0ab38c9 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
@@ -25,6 +25,9 @@ import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.metrics.common.Metrics;
+import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
+import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.OperationLog;
@@ -251,9 +254,17 @@ public abstract class Operation {
*/
protected abstract void runInternal() throws HiveSQLException;
- public void run() throws HiveSQLException {
+ public final void run() throws HiveSQLException {
beforeRun();
try {
+ Metrics metrics = MetricsFactory.getInstance();
+ if (metrics != null) {
+ try {
+ metrics.incrementCounter(MetricsConstant.OPEN_OPERATIONS);
+ } catch (Exception e) {
+ LOG.warn("Error Reporting open operation to Metrics system", e);
+ }
+ }
runInternal();
} finally {
afterRun();
http://git-wip-us.apache.org/repos/asf/hive/blob/1cb30733/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
index 304a525..e29b4b6 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
@@ -26,6 +26,9 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.metrics.common.Metrics;
+import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
+import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -209,6 +212,14 @@ public class OperationManager extends AbstractService {
if (operation == null) {
throw new HiveSQLException("Operation does not exist!");
}
+ Metrics metrics = MetricsFactory.getInstance();
+ if (metrics != null) {
+ try {
+ metrics.decrementCounter(MetricsConstant.OPEN_OPERATIONS);
+ } catch (Exception e) {
+ LOG.warn("Error Reporting close operation to Metrics system", e);
+ }
+ }
operation.close();
}
[11/14] hive git commit: HIVE-11984: Add HS2 open operation metrics
(addendum) (Jimmy, reviewed by Szehon)
Posted by se...@apache.org.
HIVE-11984: Add HS2 open operation metrics (addendum) (Jimmy, reviewed by Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/265e42c5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/265e42c5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/265e42c5
Branch: refs/heads/llap
Commit: 265e42c5f45f7c160861132bbfa6ab78a85830e9
Parents: e9b4d7e
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Wed Sep 30 10:39:08 2015 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Wed Sep 30 10:40:46 2015 -0700
----------------------------------------------------------------------
.../src/java/org/apache/hive/service/cli/operation/Operation.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/265e42c5/service/src/java/org/apache/hive/service/cli/operation/Operation.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/operation/Operation.java b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
index 0ab38c9..515299c 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
@@ -254,7 +254,7 @@ public abstract class Operation {
*/
protected abstract void runInternal() throws HiveSQLException;
- public final void run() throws HiveSQLException {
+ public void run() throws HiveSQLException {
beforeRun();
try {
Metrics metrics = MetricsFactory.getInstance();
[02/14] hive git commit: HIVE-11945: ORC with non-local reads may not
be reusing connection to DN (Rajesh Balamohan reviewed by Sergey Shelukhin,
Prasanth Jayachandran)
Posted by se...@apache.org.
HIVE-11945: ORC with non-local reads may not be reusing connection to DN (Rajesh Balamohan reviewed by Sergey Shelukhin, Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a4c43f03
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a4c43f03
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a4c43f03
Branch: refs/heads/llap
Commit: a4c43f0335b33a75d2e9f3dc53b3cd33f8f115cf
Parents: 3921458
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Mon Sep 28 20:10:50 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Mon Sep 28 20:10:50 2015 -0500
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/io/orc/MetadataReader.java | 6 ++----
.../org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java | 12 +++++-------
.../apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java | 5 +++--
3 files changed, 10 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a4c43f03/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
index cdc0372..43d2933 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
@@ -82,8 +82,7 @@ public class MetadataReader {
}
if ((included == null || included[col]) && indexes[col] == null) {
byte[] buffer = new byte[len];
- file.seek(offset);
- file.readFully(buffer);
+ file.readFully(offset, buffer, 0, buffer.length);
ByteBuffer[] bb = new ByteBuffer[] {ByteBuffer.wrap(buffer)};
indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
bb, new long[]{0}, stream.getLength(), codec, bufferSize));
@@ -108,8 +107,7 @@ public class MetadataReader {
// read the footer
ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
- file.seek(offset);
- file.readFully(tailBuf.array(), tailBuf.arrayOffset(), tailLength);
+ file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
return OrcProto.StripeFooter.parseFrom(InStream.create("footer",
Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
tailLength, codec, bufferSize));
http://git-wip-us.apache.org/repos/asf/hive/blob/a4c43f03/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index ab539c4..23b3b55 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -244,9 +244,8 @@ public class ReaderImpl implements Reader {
if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) {
// If it isn't there, this may be the 0.11.0 version of ORC.
// Read the first 3 bytes of the file to check for the header
- in.seek(0);
byte[] header = new byte[len];
- in.readFully(header, 0, len);
+ in.readFully(0, header, 0, len);
// if it isn't there, this isn't an ORC file
if (!Text.decode(header, 0 , len).equals(OrcFile.MAGIC)) {
throw new FileFormatException("Malformed ORC file " + path +
@@ -472,10 +471,10 @@ public class ReaderImpl implements Reader {
//read last bytes into buffer to get PostScript
int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
- file.seek(size - readSize);
ByteBuffer buffer = ByteBuffer.allocate(readSize);
assert buffer.position() == 0;
- file.readFully(buffer.array(), buffer.arrayOffset(), readSize);
+ file.readFully((size - readSize),
+ buffer.array(), buffer.arrayOffset(), readSize);
buffer.position(0);
//read the PostScript
@@ -495,10 +494,9 @@ public class ReaderImpl implements Reader {
int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
if (extra > 0) {
//more bytes need to be read, seek back to the right place and read extra bytes
- file.seek(size - readSize - extra);
ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
- file.readFully(extraBuf.array(),
- extraBuf.arrayOffset() + extraBuf.position(), extra);
+ file.readFully((size - readSize - extra), extraBuf.array(),
+ extraBuf.arrayOffset() + extraBuf.position(), extra);
extraBuf.position(extra);
//append with already read bytes
extraBuf.put(buffer);
http://git-wip-us.apache.org/repos/asf/hive/blob/a4c43f03/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
index ded3979..9c9a1c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
@@ -246,8 +246,8 @@ public class RecordReaderUtils {
}
int len = (int) (range.getEnd() - range.getOffset());
long off = range.getOffset();
- file.seek(base + off);
if (zcr != null) {
+ file.seek(base + off);
boolean hasReplaced = false;
while (len > 0) {
ByteBuffer partial = zcr.readBuffer(len, false);
@@ -264,12 +264,13 @@ public class RecordReaderUtils {
off += read;
}
} else if (doForceDirect) {
+ file.seek(base + off);
ByteBuffer directBuf = ByteBuffer.allocateDirect(len);
readDirect(file, len, directBuf);
range = range.replaceSelfWith(new BufferChunk(directBuf, range.getOffset()));
} else {
byte[] buffer = new byte[len];
- file.readFully(buffer, 0, buffer.length);
+ file.readFully((base + off), buffer, 0, buffer.length);
range = range.replaceSelfWith(new BufferChunk(ByteBuffer.wrap(buffer), range.getOffset()));
}
range = range.next;
[13/14] hive git commit: HIVE-11823 : create a self-contained
translation for SARG to be used by metastore (Sergey Shelukhin,
reviewed by Prasanth Jayachandran)
Posted by se...@apache.org.
HIVE-11823 : create a self-contained translation for SARG to be used by metastore (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/064e37c4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/064e37c4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/064e37c4
Branch: refs/heads/llap
Commit: 064e37c460d1c464431f740e480a6f08353d69e6
Parents: 8c8cc19
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 30 11:35:36 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 30 11:35:47 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 49 ++++++++++++++++++++
.../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 35 ++++++++++++++
2 files changed, 84 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/064e37c4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 52e1b06..c45b6e6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -254,6 +255,40 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
}
}
+ /**
+ * Modifies the SARG, replacing column names with column indexes in target table schema. This
+ * basically does the same thing as all the shennannigans with included columns, except for the
+ * last step where ORC gets direct subtypes of root column and uses the ordered match to map
+ * table columns to file columns. The numbers put into predicate leaf should allow to go into
+ * said subtypes directly by index to get the proper index in the file.
+ * This won't work with schema evolution, although it's probably much easier to reason about
+ * if schema evolution was to be supported, because this is a clear boundary between table
+ * schema columns and all things ORC. None of the ORC stuff is used here and none of the
+ * table schema stuff is used after that - ORC doesn't need a bunch of extra crap to apply
+ * the SARG thus modified.
+ */
+ public static void translateSargToTableColIndexes(
+ SearchArgument sarg, Configuration conf, int rootColumn) {
+ String nameStr = getNeededColumnNamesString(conf), idStr = getSargColumnIDsString(conf);
+ String[] knownNames = nameStr.split(",");
+ String[] idStrs = (idStr == null) ? null : idStr.split(",");
+ assert idStrs == null || knownNames.length == idStrs.length;
+ HashMap<String, Integer> nameIdMap = new HashMap<>();
+ for (int i = 0; i < knownNames.length; ++i) {
+ nameIdMap.put(knownNames[i], idStrs != null ? Integer.parseInt(idStrs[i]) : i);
+ }
+ List<PredicateLeaf> leaves = sarg.getLeaves();
+ for (int i = 0; i < leaves.size(); ++i) {
+ PredicateLeaf pl = leaves.get(i);
+ Integer colId = nameIdMap.get(pl.getColumnName());
+ String newColName = RecordReaderImpl.encodeTranslatedSargColumn(rootColumn, colId);
+ SearchArgumentFactory.setPredicateLeafColumn(pl, newColName);
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("SARG translated into " + sarg);
+ }
+ }
+
public static boolean[] genIncludedColumns(
List<OrcProto.Type> types, List<Integer> included, boolean isOriginal) {
int rootColumn = getRootColumn(isOriginal);
@@ -1342,6 +1377,20 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
directory);
}
+ public static boolean[] pickStripesViaTranslatedSarg(SearchArgument sarg,
+ WriterVersion writerVersion, List<OrcProto.Type> types,
+ List<StripeStatistics> stripeStats, int stripeCount) {
+ LOG.info("Translated ORC pushdown predicate: " + sarg);
+ assert sarg != null;
+ if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
+ return null; // only do split pruning if HIVE-8732 has been fixed in the writer
+ }
+ // eliminate stripes that doesn't satisfy the predicate condition
+ List<PredicateLeaf> sargLeaves = sarg.getLeaves();
+ int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves);
+ return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null);
+ }
+
private static boolean[] pickStripes(SearchArgument sarg, String[] sargColNames,
WriterVersion writerVersion, boolean isOriginal, List<StripeStatistics> stripeStats,
int stripeCount, Path filePath) {
http://git-wip-us.apache.org/repos/asf/hive/blob/064e37c4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index ba304ba..77d2cc6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -1221,4 +1221,39 @@ class RecordReaderImpl implements RecordReader {
// if we aren't to the right row yet, advance in the stripe.
advanceToNextRow(reader, rowNumber, true);
}
+
+ private static final String TRANSLATED_SARG_SEPARATOR = "_";
+ public static String encodeTranslatedSargColumn(int rootColumn, Integer indexInSourceTable) {
+ return rootColumn + TRANSLATED_SARG_SEPARATOR
+ + ((indexInSourceTable == null) ? -1 : indexInSourceTable);
+ }
+
+ public static int[] mapTranslatedSargColumns(
+ List<OrcProto.Type> types, List<PredicateLeaf> sargLeaves) {
+ int[] result = new int[sargLeaves.size()];
+ OrcProto.Type lastRoot = null; // Root will be the same for everyone as of now.
+ String lastRootStr = null;
+ for (int i = 0; i < result.length; ++i) {
+ String[] rootAndIndex = sargLeaves.get(i).getColumnName().split(TRANSLATED_SARG_SEPARATOR);
+ assert rootAndIndex.length == 2;
+ String rootStr = rootAndIndex[0], indexStr = rootAndIndex[1];
+ int index = Integer.parseInt(indexStr);
+ // First, check if the column even maps to anything.
+ if (index == -1) {
+ result[i] = -1;
+ continue;
+ }
+ assert index >= 0;
+ // Then, find the root type if needed.
+ if (!rootStr.equals(lastRootStr)) {
+ lastRoot = types.get(Integer.parseInt(rootStr));
+ lastRootStr = rootStr;
+ }
+ // Subtypes of the root types correspond, in order, to the columns in the table schema
+ // (disregarding schema evolution that doesn't presently work). Get the index for the
+ // corresponding subtype.
+ result[i] = lastRoot.getSubtypes(index);
+ }
+ return result;
+ }
}
[10/14] hive git commit: HIVE-11989: vector_groupby_reduce.q is
failing on CLI and MiniTez drivers on master (Pengcheng Xiong,
reviewed by Matt McCline)
Posted by se...@apache.org.
HIVE-11989: vector_groupby_reduce.q is failing on CLI and MiniTez drivers on master (Pengcheng Xiong, reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9b4d7e4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9b4d7e4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9b4d7e4
Branch: refs/heads/llap
Commit: e9b4d7e4e89cd68eeda58b2e2e6014ff24d0a690
Parents: 1f08789
Author: Pengcheng Xiong <px...@apache.org>
Authored: Wed Sep 30 10:06:36 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Wed Sep 30 10:06:36 2015 -0700
----------------------------------------------------------------------
.../tez/vector_groupby_reduce.q.out | 70 +++++++++++++-------
.../clientpositive/vector_groupby_reduce.q.out | 69 +++++++++++++------
2 files changed, 94 insertions(+), 45 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e9b4d7e4/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
index fe7e829..1635462 100644
--- a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
@@ -399,7 +399,7 @@ STAGE PLANS:
sort order: +
Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Reducer 3
+ Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
@@ -562,31 +562,32 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 1
Map Operator Tree:
TableScan
alias: store_sales
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
- outputColumnNames: _col0, _col1, _col2
+ expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int)
+ outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(_col2)
- keys: _col0 (type: int), _col1 (type: int)
+ aggregations: min(ss_quantity)
+ keys: ss_item_sk (type: int), ss_ticket_number (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: int)
sort order: ++
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: int)
Execution mode: vectorized
- Reducer 2
+ Reducer 2
Reduce Operator Tree:
Group By Operator
aggregations: min(VALUE._col0)
@@ -595,18 +596,33 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(_col1), sum(_col2)
- keys: _col0 (type: int)
- mode: complete
+ aggregations: sum(_col0), sum(_col2)
+ keys: _col1 (type: int)
+ mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
Execution mode: vectorized
- Reducer 3
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Execution mode: vectorized
+ Reducer 4
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint)
@@ -771,18 +787,18 @@ STAGE PLANS:
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 1
Map Operator Tree:
TableScan
alias: store_sales
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
- outputColumnNames: _col0, _col1, _col2
+ expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int)
+ outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(_col2)
- keys: _col0 (type: int), _col1 (type: int)
+ aggregations: min(ss_quantity)
+ keys: ss_item_sk (type: int), ss_ticket_number (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
@@ -793,7 +809,7 @@ STAGE PLANS:
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: int)
Execution mode: vectorized
- Reducer 2
+ Reducer 2
Reduce Operator Tree:
Group By Operator
aggregations: min(VALUE._col0)
@@ -807,13 +823,17 @@ STAGE PLANS:
mode: complete
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
Execution mode: vectorized
- Reducer 3
+ Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint)
http://git-wip-us.apache.org/repos/asf/hive/blob/e9b4d7e4/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
index fc1997c..3e7077e 100644
--- a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
+++ b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
@@ -550,7 +550,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
STAGE PLANS:
Stage: Stage-1
@@ -560,19 +561,19 @@ STAGE PLANS:
alias: store_sales
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
- outputColumnNames: _col0, _col1, _col2
+ expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int)
+ outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(_col2)
- keys: _col0 (type: int), _col1 (type: int)
+ aggregations: min(ss_quantity)
+ keys: ss_item_sk (type: int), ss_ticket_number (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: int)
sort order: ++
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: int)
Execution mode: vectorized
@@ -584,11 +585,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(_col1), sum(_col2)
- keys: _col0 (type: int)
- mode: complete
+ aggregations: sum(_col0), sum(_col2)
+ keys: _col1 (type: int)
+ mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -603,6 +604,30 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
Reduce Operator Tree:
@@ -769,12 +794,12 @@ STAGE PLANS:
alias: store_sales
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
- outputColumnNames: _col0, _col1, _col2
+ expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int)
+ outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(_col2)
- keys: _col0 (type: int), _col1 (type: int)
+ aggregations: min(ss_quantity)
+ keys: ss_item_sk (type: int), ss_ticket_number (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
@@ -798,12 +823,16 @@ STAGE PLANS:
mode: complete
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
[06/14] hive git commit: HIVE-11819 : HiveServer2 catches OOMs on
request threads (Sergey Shelukhin, reviewed by Vaibhav Gumashta)
Posted by se...@apache.org.
HIVE-11819 : HiveServer2 catches OOMs on request threads (Sergey Shelukhin, reviewed by Vaibhav Gumashta)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6a8d7e4c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6a8d7e4c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6a8d7e4c
Branch: refs/heads/llap
Commit: 6a8d7e4cd55e5317aeb5a71005e5c98e09b22cc2
Parents: e1ce9a2
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Sep 29 15:42:23 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Sep 29 15:42:23 2015 -0700
----------------------------------------------------------------------
.../service/cli/session/HiveSessionProxy.java | 6 +++
.../thrift/EmbeddedThriftBinaryCLIService.java | 2 +-
.../thrift/ThreadPoolExecutorWithOomHook.java | 55 ++++++++++++++++++++
.../cli/thrift/ThriftBinaryCLIService.java | 12 +++--
.../service/cli/thrift/ThriftCLIService.java | 3 ++
.../cli/thrift/ThriftHttpCLIService.java | 10 ++--
.../apache/hive/service/server/HiveServer2.java | 12 +++--
.../hive/service/auth/TestPlainSaslHelper.java | 2 +-
.../session/TestPluggableHiveSessionImpl.java | 2 +-
.../cli/session/TestSessionGlobalInitFile.java | 2 +-
10 files changed, 90 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java b/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java
index 5b10521..433f14e 100644
--- a/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java
+++ b/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java
@@ -79,6 +79,12 @@ public class HiveSessionProxy implements InvocationHandler {
} catch (InvocationTargetException e) {
if (e.getCause() instanceof HiveSQLException) {
throw (HiveSQLException)e.getCause();
+ } else if (e.getCause() instanceof OutOfMemoryError) {
+ throw (OutOfMemoryError)e.getCause();
+ } else if (e.getCause() instanceof Error) {
+ // TODO: maybe we should throw this as-is too. ThriftCLIService currently catches Exception,
+ // so the combination determines what would kill the HS2 executor thread. For now,
+ // let's only allow OOM to propagate.
}
throw new RuntimeException(e.getCause());
} catch (IllegalArgumentException e) {
http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
index a57fc8f..e9a5830 100644
--- a/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
+++ b/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
@@ -30,7 +30,7 @@ import org.apache.hive.service.cli.ICLIService;
public class EmbeddedThriftBinaryCLIService extends ThriftBinaryCLIService {
public EmbeddedThriftBinaryCLIService() {
- super(new CLIService(null));
+ super(new CLIService(null), null);
isEmbedded = true;
HiveConf.setLoadHiveServer2Config(true);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/thrift/ThreadPoolExecutorWithOomHook.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThreadPoolExecutorWithOomHook.java b/service/src/java/org/apache/hive/service/cli/thrift/ThreadPoolExecutorWithOomHook.java
new file mode 100644
index 0000000..51731ad
--- /dev/null
+++ b/service/src/java/org/apache/hive/service/cli/thrift/ThreadPoolExecutorWithOomHook.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.service.cli.thrift;
+
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.Future;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+final class ThreadPoolExecutorWithOomHook extends ThreadPoolExecutor {
+ private final Runnable oomHook;
+
+ public ThreadPoolExecutorWithOomHook(int corePoolSize, int maximumPoolSize, long keepAliveTime,
+ TimeUnit unit, BlockingQueue<Runnable> workQueue, ThreadFactory threadFactory,
+ Runnable oomHook) {
+ super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory);
+ this.oomHook = oomHook;
+ }
+
+ @Override
+ protected void afterExecute(Runnable r, Throwable t) {
+ super.afterExecute(r, t);
+ if (t == null && r instanceof Future<?>) {
+ try {
+ Future<?> future = (Future<?>) r;
+ if (future.isDone()) {
+ future.get();
+ }
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ } catch (Throwable t2) {
+ t = t2;
+ }
+ }
+ if (t instanceof OutOfMemoryError) {
+ oomHook.run();
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
index 6c9efba..54f9914 100644
--- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
+++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
@@ -22,7 +22,6 @@ import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
-import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -39,9 +38,11 @@ import org.apache.thrift.transport.TTransportFactory;
public class ThriftBinaryCLIService extends ThriftCLIService {
+ private final Runnable oomHook;
- public ThriftBinaryCLIService(CLIService cliService) {
+ public ThriftBinaryCLIService(CLIService cliService, Runnable oomHook) {
super(cliService, ThriftBinaryCLIService.class.getSimpleName());
+ this.oomHook = oomHook;
}
@Override
@@ -49,9 +50,10 @@ public class ThriftBinaryCLIService extends ThriftCLIService {
try {
// Server thread pool
String threadPoolName = "HiveServer2-Handler-Pool";
- ExecutorService executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads,
- workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
- new ThreadFactoryWithGarbageCleanup(threadPoolName));
+ ExecutorService executorService = new ThreadPoolExecutorWithOomHook(minWorkerThreads,
+ maxWorkerThreads, workerKeepAliveTime, TimeUnit.SECONDS,
+ new SynchronousQueue<Runnable>(), new ThreadFactoryWithGarbageCleanup(threadPoolName),
+ oomHook);
// Thrift configs
hiveAuthFactory = new HiveAuthFactory(hiveConf);
http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
index 67bc778..1c3e899 100644
--- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
+++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -512,6 +512,9 @@ public abstract class ThriftCLIService extends AbstractService implements TCLISe
resp.setOperationHandle(operationHandle.toTOperationHandle());
resp.setStatus(OK_STATUS);
} catch (Exception e) {
+ // Note: it's rather important that this (and other methods) catch Exception, not Throwable;
+ // in combination with HiveSessionProxy.invoke code, perhaps unintentionally, it used
+ // to also catch all errors; and now it allows OOMs only to propagate.
LOG.warn("Error executing statement: ", e);
resp.setStatus(HiveSQLException.toTStatus(e));
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
index 3b57efa..046958e 100644
--- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
+++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
@@ -46,9 +46,11 @@ import org.eclipse.jetty.util.thread.ExecutorThreadPool;
public class ThriftHttpCLIService extends ThriftCLIService {
+ private final Runnable oomHook;
- public ThriftHttpCLIService(CLIService cliService) {
+ public ThriftHttpCLIService(CLIService cliService, Runnable oomHook) {
super(cliService, ThriftHttpCLIService.class.getSimpleName());
+ this.oomHook = oomHook;
}
/**
@@ -65,9 +67,9 @@ public class ThriftHttpCLIService extends ThriftCLIService {
// Server thread pool
// Start with minWorkerThreads, expand till maxWorkerThreads and reject subsequent requests
String threadPoolName = "HiveServer2-HttpHandler-Pool";
- ExecutorService executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads,
- workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
- new ThreadFactoryWithGarbageCleanup(threadPoolName));
+ ExecutorService executorService = new ThreadPoolExecutorWithOomHook(minWorkerThreads,
+ maxWorkerThreads, workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
+ new ThreadFactoryWithGarbageCleanup(threadPoolName), oomHook);
ExecutorThreadPool threadPool = new ExecutorThreadPool(executorService);
httpServer.setThreadPool(threadPool);
http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/server/HiveServer2.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java
index d7ba964..601c5db 100644
--- a/service/src/java/org/apache/hive/service/server/HiveServer2.java
+++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java
@@ -97,10 +97,17 @@ public class HiveServer2 extends CompositeService {
public synchronized void init(HiveConf hiveConf) {
cliService = new CLIService(this);
addService(cliService);
+ final HiveServer2 hiveServer2 = this;
+ Runnable oomHook = new Runnable() {
+ @Override
+ public void run() {
+ hiveServer2.stop();
+ }
+ };
if (isHTTPTransportMode(hiveConf)) {
- thriftCLIService = new ThriftHttpCLIService(cliService);
+ thriftCLIService = new ThriftHttpCLIService(cliService, oomHook);
} else {
- thriftCLIService = new ThriftBinaryCLIService(cliService);
+ thriftCLIService = new ThriftBinaryCLIService(cliService, oomHook);
}
addService(thriftCLIService);
super.init(hiveConf);
@@ -111,7 +118,6 @@ public class HiveServer2 extends CompositeService {
throw new Error("Unable to intitialize HiveServer2", t);
}
// Add a shutdown hook for catching SIGTERM & SIGINT
- final HiveServer2 hiveServer2 = this;
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/test/org/apache/hive/service/auth/TestPlainSaslHelper.java
----------------------------------------------------------------------
diff --git a/service/src/test/org/apache/hive/service/auth/TestPlainSaslHelper.java b/service/src/test/org/apache/hive/service/auth/TestPlainSaslHelper.java
index 03f3964..8ae0eeb 100644
--- a/service/src/test/org/apache/hive/service/auth/TestPlainSaslHelper.java
+++ b/service/src/test/org/apache/hive/service/auth/TestPlainSaslHelper.java
@@ -41,7 +41,7 @@ public class TestPlainSaslHelper extends TestCase {
CLIService cliService = new CLIService(null);
cliService.init(hconf);
- ThriftCLIService tcliService = new ThriftBinaryCLIService(cliService);
+ ThriftCLIService tcliService = new ThriftBinaryCLIService(cliService, null);
tcliService.init(hconf);
TProcessorFactory procFactory = PlainSaslHelper.getPlainProcessorFactory(tcliService);
assertEquals("doAs enabled processor for unsecure mode",
http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/test/org/apache/hive/service/cli/session/TestPluggableHiveSessionImpl.java
----------------------------------------------------------------------
diff --git a/service/src/test/org/apache/hive/service/cli/session/TestPluggableHiveSessionImpl.java b/service/src/test/org/apache/hive/service/cli/session/TestPluggableHiveSessionImpl.java
index 8c7546c..f4bcbc3 100644
--- a/service/src/test/org/apache/hive/service/cli/session/TestPluggableHiveSessionImpl.java
+++ b/service/src/test/org/apache/hive/service/cli/session/TestPluggableHiveSessionImpl.java
@@ -26,7 +26,7 @@ public class TestPluggableHiveSessionImpl extends TestCase {
hiveConf = new HiveConf();
hiveConf.setVar(HiveConf.ConfVars.HIVE_SESSION_IMPL_CLASSNAME, TestHiveSessionImpl.class.getName());
cliService = new CLIService(null);
- service = new ThriftBinaryCLIService(cliService);
+ service = new ThriftBinaryCLIService(cliService, null);
service.init(hiveConf);
client = new ThriftCLIServiceClient(service);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java
----------------------------------------------------------------------
diff --git a/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java b/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java
index 37b698b..840a551 100644
--- a/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java
+++ b/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java
@@ -52,7 +52,7 @@ public class TestSessionGlobalInitFile extends TestCase {
*/
private class FakeEmbeddedThriftBinaryCLIService extends ThriftBinaryCLIService {
public FakeEmbeddedThriftBinaryCLIService(HiveConf hiveConf) {
- super(new CLIService(null));
+ super(new CLIService(null), null);
isEmbedded = true;
cliService.init(hiveConf);
cliService.start();
[03/14] hive git commit: HIVE-11724 : WebHcat get jobs to order jobs
on time order with latest at top (Kiran Kumar Kolli,
reviewed by Hari Subramaniyan)
Posted by se...@apache.org.
HIVE-11724 : WebHcat get jobs to order jobs on time order with latest at top (Kiran Kumar Kolli, reviewed by Hari Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a5ffa719
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a5ffa719
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a5ffa719
Branch: refs/heads/llap
Commit: a5ffa71908f5f15c5acaef476ad557d98583f2fa
Parents: a4c43f0
Author: Hari Subramaniyan <ha...@apache.org>
Authored: Tue Sep 29 11:56:51 2015 -0700
Committer: Hari Subramaniyan <ha...@apache.org>
Committed: Tue Sep 29 11:56:51 2015 -0700
----------------------------------------------------------------------
.../hive/hcatalog/templeton/AppConfig.java | 21 ++++++++++++++++++++
.../apache/hive/hcatalog/templeton/Server.java | 12 +++++++++--
2 files changed, 31 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a5ffa719/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
----------------------------------------------------------------------
diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
index 062d5a0..403ff14 100644
--- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
+++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
@@ -89,6 +89,11 @@ public class AppConfig extends Configuration {
"webhcat-site.xml"
};
+ public enum JobsListOrder {
+ lexicographicalasc,
+ lexicographicaldesc,
+ }
+
public static final String PORT = "templeton.port";
public static final String EXEC_ENCODING_NAME = "templeton.exec.encoding";
public static final String EXEC_ENVS_NAME = "templeton.exec.envs";
@@ -105,6 +110,7 @@ public class AppConfig extends Configuration {
public static final String HIVE_PATH_NAME = "templeton.hive.path";
public static final String MAPPER_MEMORY_MB = "templeton.mapper.memory.mb";
public static final String MR_AM_MEMORY_MB = "templeton.mr.am.memory.mb";
+ public static final String TEMPLETON_JOBSLIST_ORDER = "templeton.jobs.listorder";
/**
* see webhcat-default.xml
@@ -281,6 +287,21 @@ public class AppConfig extends Configuration {
}
}
+ public JobsListOrder getListJobsOrder() {
+ String requestedOrder = get(TEMPLETON_JOBSLIST_ORDER);
+ if (requestedOrder != null) {
+ try {
+ return JobsListOrder.valueOf(requestedOrder.toLowerCase());
+ }
+ catch(IllegalArgumentException ex) {
+ LOG.warn("Ignoring setting " + TEMPLETON_JOBSLIST_ORDER + " configured with in-correct value " + requestedOrder);
+ }
+ }
+
+ // Default to lexicographicalasc
+ return JobsListOrder.lexicographicalasc;
+ }
+
public void startCleanup() {
JobState.getStorageInstance(this).startCleanup(this);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a5ffa719/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java
----------------------------------------------------------------------
diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java
index 27b8e38..bba16c5 100644
--- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java
+++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java
@@ -1002,8 +1002,16 @@ public class Server {
throw new BadParam("Invalid numrecords format: numrecords should be an integer > 0");
}
- // Sort the list lexicographically
- Collections.sort(list);
+ // Sort the list as requested
+ switch (appConf.getListJobsOrder()) {
+ case lexicographicaldesc:
+ Collections.sort(list, Collections.reverseOrder());
+ break;
+ case lexicographicalasc:
+ default:
+ Collections.sort(list);
+ break;
+ }
for (String job : list) {
// If numRecords = -1, fetch all records.
[05/14] hive git commit: HIVE-11952 : disable q tests that are both slow and less relevant (Sergey Shelukhin, reviewed by Sergio Peña)
Posted by se...@apache.org.
HIVE-11952 : disable q tests that are both slow and less relevant (Sergey Shelukhin, reviewed by Sergio Peña)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e1ce9a23
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e1ce9a23
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e1ce9a23
Branch: refs/heads/llap
Commit: e1ce9a23a7045618da6850e3315f785ea8c62d4d
Parents: b801d12
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Sep 29 15:29:35 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Sep 29 15:29:35 2015 -0700
----------------------------------------------------------------------
itests/qtest/pom.xml | 2 +-
.../test/resources/testconfiguration.properties | 27 +++++++++++++++-----
2 files changed, 21 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e1ce9a23/itests/qtest/pom.xml
----------------------------------------------------------------------
diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml
index 739d06a..74ca88f 100644
--- a/itests/qtest/pom.xml
+++ b/itests/qtest/pom.xml
@@ -436,7 +436,7 @@
templatePath="${basedir}/${hive.path.to.root}/ql/src/test/templates/" template="TestCliDriver.vm"
queryDirectory="${basedir}/${hive.path.to.root}/ql/src/test/queries/clientpositive/"
queryFile="${qfile}"
- excludeQueryFile="${minimr.query.files},${minitez.query.files},${encrypted.query.files},${spark.only.query.files}"
+ excludeQueryFile="${minimr.query.files},${minitez.query.files},${encrypted.query.files},${spark.only.query.files},${disabled.query.files}"
queryFileRegex="${qfile_regex}"
clusterMode="${clustermode}"
runDisabled="${run_disabled}"
http://git-wip-us.apache.org/repos/asf/hive/blob/e1ce9a23/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 4f7b25f..700ea63 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -34,9 +34,6 @@ minimr.query.files=auto_sortmerge_join_16.q,\
non_native_window_udf.q, \
optrstat_groupby.q,\
parallel_orderby.q,\
- ql_rewrite_gbtoidx.q,\
- ql_rewrite_gbtoidx_cbo_1.q,\
- ql_rewrite_gbtoidx_cbo_2.q,\
quotedid_smb.q,\
reduce_deduplicate.q,\
remote_script.q,\
@@ -45,7 +42,6 @@ minimr.query.files=auto_sortmerge_join_16.q,\
schemeAuthority2.q,\
scriptfile1.q,\
scriptfile1_win.q,\
- smb_mapjoin_8.q,\
stats_counter.q,\
stats_counter_partitioned.q,\
table_nonprintable.q,\
@@ -54,6 +50,20 @@ minimr.query.files=auto_sortmerge_join_16.q,\
uber_reduce.q,\
udf_using.q
+# These tests are disabled for minimr
+# ql_rewrite_gbtoidx.q,\
+# ql_rewrite_gbtoidx_cbo_1.q,\
+# ql_rewrite_gbtoidx_cbo_2.q,\
+# smb_mapjoin_8.q,\
+
+
+# Tests that are not enabled for CLI Driver
+disabled.query.files=ql_rewrite_gbtoidx.q,\
+ ql_rewrite_gbtoidx_cbo_1.q,\
+ ql_rewrite_gbtoidx_cbo_2.q,\
+ rcfile_merge1.q,\
+ smb_mapjoin_8.q
+
minitez.query.files.shared=alter_merge_2_orc.q,\
alter_merge_orc.q,\
alter_merge_stats_orc.q,\
@@ -1171,8 +1181,6 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
load_hdfs_file_with_space_in_the_name.q,\
optrstat_groupby.q,\
parallel_orderby.q,\
- ql_rewrite_gbtoidx.q,\
- ql_rewrite_gbtoidx_cbo_1.q,\
quotedid_smb.q,\
reduce_deduplicate.q,\
remote_script.q,\
@@ -1181,13 +1189,18 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
schemeAuthority2.q,\
scriptfile1.q,\
scriptfile1_win.q,\
- smb_mapjoin_8.q,\
stats_counter.q,\
stats_counter_partitioned.q,\
temp_table_external.q,\
truncate_column_buckets.q,\
uber_reduce.q
+# These tests are removed from miniSparkOnYarn.query.files
+# ql_rewrite_gbtoidx.q,\
+# ql_rewrite_gbtoidx_cbo_1.q,\
+# smb_mapjoin_8.q,\
+
+
spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
groupby2_multi_distinct.q,\
groupby3_map_skew_multi_distinct.q,\
[09/14] hive git commit: HIVE-11964: RelOptHiveTable.hiveColStatsMap
might contain mismatched column stats (Chaoyu Tang,
reviewed by Laljo John Pullokkaran)
Posted by se...@apache.org.
HIVE-11964: RelOptHiveTable.hiveColStatsMap might contain mismatched column stats (Chaoyu Tang, reviewed by Laljo John Pullokkaran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1f087893
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1f087893
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1f087893
Branch: refs/heads/llap
Commit: 1f08789395db979c1ca298fbaf7a46abfb7d4e1f
Parents: 1cb3073
Author: ctang <ct...@gmail.com>
Authored: Wed Sep 30 12:15:16 2015 -0400
Committer: ctang <ct...@gmail.com>
Committed: Wed Sep 30 12:15:16 2015 -0400
----------------------------------------------------------------------
.../hive/ql/optimizer/calcite/RelOptHiveTable.java | 15 +++++++++++++++
1 file changed, 15 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1f087893/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 6c0bd25..1bd241b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -312,6 +312,19 @@ public class RelOptHiveTable extends RelOptAbstractTable {
setOfFiledCols.removeAll(setOfObtainedColStats);
colNamesFailedStats.addAll(setOfFiledCols);
+ } else {
+ // Column stats in hiveColStats might not be in the same order as the columns in
+ // nonPartColNamesThatRqrStats. reorder hiveColStats so we can build hiveColStatsMap
+ // using nonPartColIndxsThatRqrStats as below
+ Map<String, ColStatistics> columnStatsMap =
+ new HashMap<String, ColStatistics>(hiveColStats.size());
+ for (ColStatistics cs : hiveColStats) {
+ columnStatsMap.put(cs.getColumnName(), cs);
+ }
+ hiveColStats.clear();
+ for (String colName : nonPartColNamesThatRqrStats) {
+ hiveColStats.add(columnStatsMap.get(colName));
+ }
}
} else {
// 2.2 Obtain col stats for partitioned table.
@@ -349,6 +362,8 @@ public class RelOptHiveTable extends RelOptAbstractTable {
if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) {
for (int i = 0; i < hiveColStats.size(); i++) {
+ // the columns in nonPartColIndxsThatRqrStats/nonPartColNamesThatRqrStats/hiveColStats
+ // are in same order
hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), hiveColStats.get(i));
}
}