You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ma...@apache.org on 2022/07/18 12:22:38 UTC
[hive] branch master updated: HIVE-26394 : Query based compaction fails for table with more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko)

This is an automated email from the ASF dual-hosted git repository.

mahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 69e6a5a4151 HIVE-26394 : Query based compaction fails for table with more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko)
69e6a5a4151 is described below

commit 69e6a5a4151100849d2b03b6b14b1605c3abc3f1
Author: mahesh kumar behera <ma...@apache.org>
AuthorDate: Mon Jul 18 17:52:29 2022 +0530

    HIVE-26394 : Query based compaction fails for table with more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko)
---
 .../ql/txn/compactor/TestCrudCompactorOnTez.java   |  4 +--
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java      | 32 +++++++++++++++++++---
 .../hadoop/hive/ql/io/orc/OrcNewInputFormat.java   | 12 +++++---
 3 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
index cffb58bc41c..eb9f4c4e2a8 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
@@ -1484,8 +1484,8 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
     String tableName = "testMinorCompaction";
     executeStatementOnDriver("drop table if exists " + tableName, driver);
     executeStatementOnDriver(
-        "CREATE TABLE " + tableName + "(a INT, b STRING) " + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')",
-        driver);
+            "CREATE TABLE " + tableName + "(a INT, b STRING, c int, d int, e int, f int, j int, i int) " +
+                    " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
     CompactorTestUtil.runStreamingAPI(conf, dbName, tableName, Lists
         .newArrayList(new CompactorTestUtil.StreamingConnectionOption(false, false),
             new CompactorTestUtil.StreamingConnectionOption(true, false),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 0f1333b9a68..4b70ff5c5b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -251,12 +251,29 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
 
 
     OrcRecordReader(Reader file, Configuration conf,
-                    FileSplit split) throws IOException {
+                    InputSplit inputSplit) throws IOException {
       this.file = file;
       numColumns = file.getSchema().getChildren().size();
+      FileSplit split = (FileSplit)inputSplit;
       this.offset = split.getStart();
       this.length = split.getLength();
-      this.reader = createReaderFromFile(file, conf, offset, length);
+
+      // In case of query based compaction, the ACID table location is used as the location of the external table.
+      // The assumption is that the table is treated as a external table. But as per file, the table is ACID and thus
+      // the file schema can not be used to judge if the table is original or not. It has to be as per the file split.
+
+      // CREATE temporary external table delete_delta_default_tmp_compactor_testminorcompaction_1657797233724_result(
+      // `operation` int, `originalTransaction` bigint, `bucket` int, `rowId` bigint, `currentTransaction` bigint,
+      // `row` struct<`a` :int, `b` :string, `c` :int, `d` :int, `e` :int, `f` :int, `j` :int, `i` :int>)
+      // clustered by (`bucket`) sorted by (`originalTransaction`, `bucket`, `rowId`) into 1 buckets stored as
+      // orc LOCATION 'file:/warehouse/testminorcompaction/delete_delta_0000001_0000006_v0000009'
+      // TBLPROPERTIES ('compactiontable'='true', 'bucketing_version'='2', 'transactional'='false')
+      if (inputSplit instanceof OrcSplit) {
+        this.reader = createReaderFromFile(file, conf, offset, length, ((OrcSplit) inputSplit).isOriginal());
+      } else {
+        this.reader = createReaderFromFile(file, conf, offset, length);
+      }
+
       this.stats = new SerDeStats();
     }
 
@@ -327,6 +344,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
   public static RecordReader createReaderFromFile(Reader file,
                                                   Configuration conf,
                                                   long offset, long length
+  ) throws IOException {
+    return createReaderFromFile(file, conf, offset, length, isOriginal(file));
+  }
+
+  public static RecordReader createReaderFromFile(Reader file,
+                                                  Configuration conf,
+                                                  long offset, long length,
+                                                  boolean isOriginal
                                                   ) throws IOException {
     if (AcidUtils.isFullAcidScan(conf)) {
       raiseAcidTablesMustBeReadWithAcidReaderException(conf);
@@ -339,7 +364,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
 
     Reader.Options options = new Reader.Options(conf).range(offset, length);
     options.schema(schema);
-    boolean isOriginal = isOriginal(file);
     if (schema == null) {
       schema = file.getSchema();
     }
@@ -1979,7 +2003,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
         return new OrcRecordReader(OrcFile.createReader(
             ((FileSplit) inputSplit).getPath(),
             readerOptions),
-            conf, (FileSplit) inputSplit);
+            conf, inputSplit);
       }
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
index 645f00602a9..acf600266d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
@@ -54,7 +54,7 @@ public class OrcNewInputFormat extends InputFormat<NullWritable, OrcStruct>{
     return new OrcRecordReader(OrcFile.createReader(path,
                                                    OrcFile.readerOptions(conf)),
         ShimLoader.getHadoopShims().getConfiguration(context),
-        fileSplit.getStart(), fileSplit.getLength());
+        fileSplit.getStart(), fileSplit.getLength(), inputSplit);
   }
 
   private static class OrcRecordReader
@@ -65,11 +65,15 @@ public class OrcNewInputFormat extends InputFormat<NullWritable, OrcStruct>{
     private float progress = 0.0f;
 
     OrcRecordReader(Reader file, Configuration conf,
-                    long offset, long length) throws IOException {
+                    long offset, long length, InputSplit inputSplit) throws IOException {
       numColumns = file.getSchema().getChildren().size();
       value = new OrcStruct(numColumns);
-      this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset,
-          length);
+      if (inputSplit instanceof OrcNewSplit) {
+        this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset,
+                length, ((OrcNewSplit)inputSplit).isOriginal());
+      } else {
+        this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, length);
+      }
     }
 
     @Override