You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/06/07 21:07:03 UTC
[2/2] hive git commit: HIVE-19793 : disable LLAP IO batch-to-row
wrapper for ACID deletes/updates (Sergey Shelukhin,
reviewed by Eugene Koifman and Prasanth Jayachandran)
HIVE-19793 : disable LLAP IO batch-to-row wrapper for ACID deletes/updates (Sergey Shelukhin, reviewed by Eugene Koifman and Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f5f485e3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f5f485e3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f5f485e3
Branch: refs/heads/branch-3
Commit: f5f485e3aef1a0cd85167b865d1ae54cc82dcf6d
Parents: 02f4931
Author: sergey <se...@apache.org>
Authored: Thu Jun 7 13:43:14 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Thu Jun 7 13:43:51 2018 -0700
----------------------------------------------------------------------
common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 1 +
.../apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java | 7 +++++--
.../apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java | 4 ++--
.../hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java | 4 +++-
.../hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java | 3 +++
5 files changed, 14 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index ee543ab..b24bef5 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3768,6 +3768,7 @@ public class HiveConf extends Configuration {
false, "Use Tez cartesian product edge to speed up cross product"),
// The default is different on the client and server, so it's null here.
LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer is enabled."),
+ LLAP_IO_ROW_WRAPPER_ENABLED("hive.llap.io.row.wrapper.enabled", true, "Whether the LLAP IO row wrapper is enabled for non-vectorized queries."),
LLAP_IO_ACID_ENABLED("hive.llap.io.acid", true, "Whether the LLAP IO layer is enabled for ACID."),
LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb",
new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false),
http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index 6d29163..40f7c83 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -20,7 +20,7 @@
package org.apache.hadoop.hive.llap.io.api.impl;
import org.apache.hadoop.hive.ql.io.BatchToRowInputFormat;
-
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import java.io.IOException;
@@ -114,7 +114,10 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB
// For non-vectorized operator case, wrap the reader if possible.
RecordReader<NullWritable, VectorizedRowBatch> result = rr;
if (!Utilities.getIsVectorized(job)) {
- result = wrapLlapReader(tableIncludedCols, rr, split);
+ result = null;
+ if (HiveConf.getBoolVar(job, ConfVars.LLAP_IO_ROW_WRAPPER_ENABLED)) {
+ result = wrapLlapReader(tableIncludedCols, rr, split);
+ }
if (result == null) {
// Cannot wrap a reader for non-vectorized pipeline.
return sourceInputFormat.getRecordReader(split, job, reporter);
http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index 6897336..cb57a11 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -178,8 +178,8 @@ class LlapRecordReader
this.isVectorized = HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
if (isAcidScan) {
- this.acidReader = new VectorizedOrcAcidRowBatchReader((OrcSplit) split, jobConf, Reporter.NULL, null, rbCtx,
- true);
+ this.acidReader = new VectorizedOrcAcidRowBatchReader(
+ (OrcSplit) split, jobConf, Reporter.NULL, null, rbCtx, true);
}
// Create the consumer of encoded data; it will coordinate decoding to CVBs.
http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
index d2e1a68..a4568de 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
@@ -324,8 +324,9 @@ public class VectorizedOrcAcidRowBatchReader
private static boolean needSyntheticRowIds(boolean isOriginal, boolean hasDeletes, boolean rowIdProjected) {
return isOriginal && (hasDeletes || rowIdProjected);
}
+
private static boolean areRowIdsProjected(VectorizedRowBatchCtx rbCtx) {
- if(rbCtx.getVirtualColumnCount() == 0) {
+ if (rbCtx.getVirtualColumnCount() == 0) {
return false;
}
for(VirtualColumn vc : rbCtx.getNeededVirtualColumns()) {
@@ -474,6 +475,7 @@ public class VectorizedOrcAcidRowBatchReader
}
}
if (rowIdProjected) {
+ // TODO: could we just do: int ix = rbCtx.findVirtualColumnNum(VirtualColumn.ROWID); value.cols[ix] = recordIdColumnVector;
rbCtx.setRecordIdColumnVector(recordIdColumnVector);
}
progress = baseReader.getProgress();
http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 8e5118b..7925151 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -34,6 +34,7 @@ import org.antlr.runtime.TokenRewriteStream;
import org.antlr.runtime.tree.Tree;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -521,6 +522,8 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
// references.
// todo: this may be a perf issue as it prevents the optimizer.. or not
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
+ // Disable LLAP IO wrapper; doesn't propagate extra ACID columns correctly.
+ HiveConf.setBoolVar(conf, ConfVars.LLAP_IO_ROW_WRAPPER_ENABLED, false);
// Parse the rewritten query string
Context rewrittenCtx;
try {