You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/07/22 04:43:49 UTC
[hive] branch master updated: HIVE-23734: Untangle LlapRecordReader Includes construction (Panos G via Ashutosh Chauhan)

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 47da936  HIVE-23734: Untangle LlapRecordReader Includes construction (Panos G via Ashutosh Chauhan)
47da936 is described below

commit 47da936a06e50ba11e7cee9cbdc32715077709cb
Author: Panos Garefalakis <pg...@cloudera.com>
AuthorDate: Sat Jun 20 22:12:07 2020 +0100

    HIVE-23734: Untangle LlapRecordReader Includes construction (Panos G via Ashutosh Chauhan)
    
    Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
 .../hive/llap/io/api/impl/LlapRecordReader.java    | 50 +++++++++++-----------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index c148dd4..a257a06 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -648,8 +648,10 @@ class LlapRecordReader implements RecordReader<NullWritable, VectorizedRowBatch>
           // Note: columnIds below makes additional changes for ACID. Don't use this var directly.
       this.readerSchema = readerSchema;
       this.jobConf = jobConf;
+      this.includeAcidColumns = includeAcidColumns;
+
+      // Assume including everything means the VRB will have everything.
       if (tableIncludedCols == null) {
-        // Assume including everything means the VRB will have everything.
         // TODO: this is rather brittle, esp. in view of schema evolution (in abstract, not as 
         //       currently implemented in Hive). The compile should supply the columns it expects
         //       to see, which is not "all, of any schema". Is VRB row CVs the right mechanism
@@ -659,43 +661,39 @@ class LlapRecordReader implements RecordReader<NullWritable, VectorizedRowBatch>
           tableIncludedCols.add(i);
         }
       }
-      LOG.debug("Logical table includes: {}", tableIncludedCols);
+
       this.readerLogicalColumnIds = tableIncludedCols;
+      LOG.debug("Logical table includes: {}", readerLogicalColumnIds);
+
       // Note: schema evolution currently does not support column index changes.
       //       So, the indices should line up... to be fixed in SE v2?
-      List<Integer> filePhysicalColumnIds = readerLogicalColumnIds;
       if (isAcidScan) {
         int rootCol = OrcInputFormat.getRootColumn(false);
-        filePhysicalColumnIds = new ArrayList<>(filePhysicalColumnIds.size() + rootCol);
+        this.filePhysicalColumnIds = new ArrayList<>(readerLogicalColumnIds.size() + rootCol);
         this.acidStructColumnId = rootCol - 1; // OrcRecordUpdater.ROW. This is somewhat fragile...
-        // Note: this guarantees that physical column IDs are in order.
-        for (int i = 0; i < rootCol; ++i) {
-          // We don't want to include the root struct in ACID case; it would cause the whole
-          // struct to get read without projection.
-          if (acidStructColumnId == i) continue;
-          if(!includeAcidColumns) {
-            /*
-              if not including acid columns, we still want to number the
-              physical columns as if acid columns are included becase
-              {@link #generateFileIncludes(TypeDescription)} takes the file
-              schema as input
-              (eg <op, owid, writerId, rowid, cwid, <f1, ... fn>>)
-             */
-            continue;
+        if (includeAcidColumns) {
+          // Up to acidStructColumnId: as we don't want to include the root struct in ACID case;
+          // it would cause the whole struct to get read without projection.
+          for (int i = 0; i < acidStructColumnId; ++i) {
+            // Note: this guarantees that physical column IDs are in order.
+            filePhysicalColumnIds.add(i);
           }
-          filePhysicalColumnIds.add(i);
         }
+        /**
+         * Even when NOT including acid columns, we still want to number the
+         * physical columns as if acid columns are included because
+         * {@link #generateFileIncludes(TypeDescription)} takes the file
+         * schema as input
+         * (eg <op, owid, writerId, rowid, cwid, <f1, ... fn>>)
+         */
         for (int tableColumnId : readerLogicalColumnIds) {
-          //but make sure to generate correct ids in type tree in-order
-          //walk order
+          // Make sure to generate correct ids in type tree in-order traversal
+          /* ok, so if filePhysicalColumnIds include acid column ids, we end up decoding the vectors*/
           filePhysicalColumnIds.add(rootCol + tableColumnId);
         }
-        /*ok, so if filePhysicalColumnIds include acid column ids, we end up
-         decoding the vectors*/
+      } else {
+        this.filePhysicalColumnIds = readerLogicalColumnIds;
       }
- 
-      this.filePhysicalColumnIds = filePhysicalColumnIds;
-      this.includeAcidColumns = includeAcidColumns;
     }
 
     @Override