You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/06/07 21:07:03 UTC

[2/2] hive git commit: HIVE-19793 : disable LLAP IO batch-to-row wrapper for ACID deletes/updates (Sergey Shelukhin, reviewed by Eugene Koifman and Prasanth Jayachandran)

HIVE-19793 : disable LLAP IO batch-to-row wrapper for ACID deletes/updates (Sergey Shelukhin, reviewed by Eugene Koifman and Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f5f485e3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f5f485e3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f5f485e3

Branch: refs/heads/branch-3
Commit: f5f485e3aef1a0cd85167b865d1ae54cc82dcf6d
Parents: 02f4931
Author: sergey <se...@apache.org>
Authored: Thu Jun 7 13:43:14 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Thu Jun 7 13:43:51 2018 -0700

----------------------------------------------------------------------
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java     | 1 +
 .../apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java  | 7 +++++--
 .../apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java | 4 ++--
 .../hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java       | 4 +++-
 .../hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java    | 3 +++
 5 files changed, 14 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index ee543ab..b24bef5 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3768,6 +3768,7 @@ public class HiveConf extends Configuration {
         false, "Use Tez cartesian product edge to speed up cross product"),
     // The default is different on the client and server, so it's null here.
     LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer is enabled."),
+    LLAP_IO_ROW_WRAPPER_ENABLED("hive.llap.io.row.wrapper.enabled", true, "Whether the LLAP IO row wrapper is enabled for non-vectorized queries."),
     LLAP_IO_ACID_ENABLED("hive.llap.io.acid", true, "Whether the LLAP IO layer is enabled for ACID."),
     LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb",
         new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false),

http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index 6d29163..40f7c83 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -20,7 +20,7 @@
 package org.apache.hadoop.hive.llap.io.api.impl;
 
 import org.apache.hadoop.hive.ql.io.BatchToRowInputFormat;
-
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 
 import java.io.IOException;
@@ -114,7 +114,10 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB
       // For non-vectorized operator case, wrap the reader if possible.
       RecordReader<NullWritable, VectorizedRowBatch> result = rr;
       if (!Utilities.getIsVectorized(job)) {
-        result = wrapLlapReader(tableIncludedCols, rr, split);
+        result = null;
+        if (HiveConf.getBoolVar(job, ConfVars.LLAP_IO_ROW_WRAPPER_ENABLED)) {
+          result = wrapLlapReader(tableIncludedCols, rr, split);
+        }
         if (result == null) {
           // Cannot wrap a reader for non-vectorized pipeline.
           return sourceInputFormat.getRecordReader(split, job, reporter);

http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index 6897336..cb57a11 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -178,8 +178,8 @@ class LlapRecordReader
 
     this.isVectorized = HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
     if (isAcidScan) {
-      this.acidReader = new VectorizedOrcAcidRowBatchReader((OrcSplit) split, jobConf, Reporter.NULL, null, rbCtx,
-        true);
+      this.acidReader = new VectorizedOrcAcidRowBatchReader(
+          (OrcSplit) split, jobConf, Reporter.NULL, null, rbCtx, true);
     }
 
     // Create the consumer of encoded data; it will coordinate decoding to CVBs.

http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
index d2e1a68..a4568de 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
@@ -324,8 +324,9 @@ public class VectorizedOrcAcidRowBatchReader
   private static boolean needSyntheticRowIds(boolean isOriginal, boolean hasDeletes, boolean rowIdProjected) {
     return isOriginal && (hasDeletes || rowIdProjected);
   }
+
   private static boolean areRowIdsProjected(VectorizedRowBatchCtx rbCtx) {
-    if(rbCtx.getVirtualColumnCount() == 0) {
+    if (rbCtx.getVirtualColumnCount() == 0) {
       return false;
     }
     for(VirtualColumn vc : rbCtx.getNeededVirtualColumns()) {
@@ -474,6 +475,7 @@ public class VectorizedOrcAcidRowBatchReader
       }
     }
     if (rowIdProjected) {
+      // TODO: could we just do: int ix = rbCtx.findVirtualColumnNum(VirtualColumn.ROWID); value.cols[ix] = recordIdColumnVector;
       rbCtx.setRecordIdColumnVector(recordIdColumnVector);
     }
     progress = baseReader.getProgress();

http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 8e5118b..7925151 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -34,6 +34,7 @@ import org.antlr.runtime.TokenRewriteStream;
 import org.antlr.runtime.tree.Tree;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -521,6 +522,8 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
     // references.
     // todo: this may be a perf issue as it prevents the optimizer.. or not
     HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
+    // Disable LLAP IO wrapper; doesn't propagate extra ACID columns correctly.
+    HiveConf.setBoolVar(conf, ConfVars.LLAP_IO_ROW_WRAPPER_ENABLED, false);
     // Parse the rewritten query string
     Context rewrittenCtx;
     try {