You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ma...@apache.org on 2022/07/18 12:22:38 UTC
[hive] branch master updated: HIVE-26394 : Query based compaction fails for table with more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository.
mahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 69e6a5a4151 HIVE-26394 : Query based compaction fails for table with more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko)
69e6a5a4151 is described below
commit 69e6a5a4151100849d2b03b6b14b1605c3abc3f1
Author: mahesh kumar behera <ma...@apache.org>
AuthorDate: Mon Jul 18 17:52:29 2022 +0530
HIVE-26394 : Query based compaction fails for table with more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko)
---
.../ql/txn/compactor/TestCrudCompactorOnTez.java | 4 +--
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 32 +++++++++++++++++++---
.../hadoop/hive/ql/io/orc/OrcNewInputFormat.java | 12 +++++---
3 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
index cffb58bc41c..eb9f4c4e2a8 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
@@ -1484,8 +1484,8 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
String tableName = "testMinorCompaction";
executeStatementOnDriver("drop table if exists " + tableName, driver);
executeStatementOnDriver(
- "CREATE TABLE " + tableName + "(a INT, b STRING) " + " STORED AS ORC TBLPROPERTIES ('transactional'='true')",
- driver);
+ "CREATE TABLE " + tableName + "(a INT, b STRING, c int, d int, e int, f int, j int, i int) " +
+ " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
CompactorTestUtil.runStreamingAPI(conf, dbName, tableName, Lists
.newArrayList(new CompactorTestUtil.StreamingConnectionOption(false, false),
new CompactorTestUtil.StreamingConnectionOption(true, false),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 0f1333b9a68..4b70ff5c5b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -251,12 +251,29 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
OrcRecordReader(Reader file, Configuration conf,
- FileSplit split) throws IOException {
+ InputSplit inputSplit) throws IOException {
this.file = file;
numColumns = file.getSchema().getChildren().size();
+ FileSplit split = (FileSplit)inputSplit;
this.offset = split.getStart();
this.length = split.getLength();
- this.reader = createReaderFromFile(file, conf, offset, length);
+
+ // In case of query based compaction, the ACID table location is used as the location of the external table.
+ // The assumption is that the table is treated as a external table. But as per file, the table is ACID and thus
+ // the file schema can not be used to judge if the table is original or not. It has to be as per the file split.
+
+ // CREATE temporary external table delete_delta_default_tmp_compactor_testminorcompaction_1657797233724_result(
+ // `operation` int, `originalTransaction` bigint, `bucket` int, `rowId` bigint, `currentTransaction` bigint,
+ // `row` struct<`a` :int, `b` :string, `c` :int, `d` :int, `e` :int, `f` :int, `j` :int, `i` :int>)
+ // clustered by (`bucket`) sorted by (`originalTransaction`, `bucket`, `rowId`) into 1 buckets stored as
+ // orc LOCATION 'file:/warehouse/testminorcompaction/delete_delta_0000001_0000006_v0000009'
+ // TBLPROPERTIES ('compactiontable'='true', 'bucketing_version'='2', 'transactional'='false')
+ if (inputSplit instanceof OrcSplit) {
+ this.reader = createReaderFromFile(file, conf, offset, length, ((OrcSplit) inputSplit).isOriginal());
+ } else {
+ this.reader = createReaderFromFile(file, conf, offset, length);
+ }
+
this.stats = new SerDeStats();
}
@@ -327,6 +344,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
public static RecordReader createReaderFromFile(Reader file,
Configuration conf,
long offset, long length
+ ) throws IOException {
+ return createReaderFromFile(file, conf, offset, length, isOriginal(file));
+ }
+
+ public static RecordReader createReaderFromFile(Reader file,
+ Configuration conf,
+ long offset, long length,
+ boolean isOriginal
) throws IOException {
if (AcidUtils.isFullAcidScan(conf)) {
raiseAcidTablesMustBeReadWithAcidReaderException(conf);
@@ -339,7 +364,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
Reader.Options options = new Reader.Options(conf).range(offset, length);
options.schema(schema);
- boolean isOriginal = isOriginal(file);
if (schema == null) {
schema = file.getSchema();
}
@@ -1979,7 +2003,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
return new OrcRecordReader(OrcFile.createReader(
((FileSplit) inputSplit).getPath(),
readerOptions),
- conf, (FileSplit) inputSplit);
+ conf, inputSplit);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
index 645f00602a9..acf600266d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
@@ -54,7 +54,7 @@ public class OrcNewInputFormat extends InputFormat<NullWritable, OrcStruct>{
return new OrcRecordReader(OrcFile.createReader(path,
OrcFile.readerOptions(conf)),
ShimLoader.getHadoopShims().getConfiguration(context),
- fileSplit.getStart(), fileSplit.getLength());
+ fileSplit.getStart(), fileSplit.getLength(), inputSplit);
}
private static class OrcRecordReader
@@ -65,11 +65,15 @@ public class OrcNewInputFormat extends InputFormat<NullWritable, OrcStruct>{
private float progress = 0.0f;
OrcRecordReader(Reader file, Configuration conf,
- long offset, long length) throws IOException {
+ long offset, long length, InputSplit inputSplit) throws IOException {
numColumns = file.getSchema().getChildren().size();
value = new OrcStruct(numColumns);
- this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset,
- length);
+ if (inputSplit instanceof OrcNewSplit) {
+ this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset,
+ length, ((OrcNewSplit)inputSplit).isOriginal());
+ } else {
+ this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, length);
+ }
}
@Override