You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by lp...@apache.org on 2022/09/16 13:38:06 UTC
[hive] branch master updated: HIVE-26521: Iceberg: Raise exception when running delete/update statements on V1 tables (#3579) (Laszlo Pinter, reviewed by Adam Szita)
This is an automated email from the ASF dual-hosted git repository.
lpinter pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f232be7879 HIVE-26521: Iceberg: Raise exception when running delete/update statements on V1 tables (#3579) (Laszlo Pinter, reviewed by Adam Szita)
f232be7879 is described below
commit f232be787943ef903218532e319b9df7d495f4c7
Author: László Pintér <47...@users.noreply.github.com>
AuthorDate: Fri Sep 16 15:37:59 2022 +0200
HIVE-26521: Iceberg: Raise exception when running delete/update statements on V1 tables (#3579) (Laszlo Pinter, reviewed by Adam Szita)
---
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 8 ++-
.../apache/iceberg/mr/hive/TestHiveIcebergV2.java | 35 ++++++++++++-
.../results/negative/merge_split_update_off.q.out | 2 +-
.../results/negative/update_split_update_off.q.out | 2 +-
.../llap/vectorized_iceberg_read_mixed.q.out | 58 ++++++++++------------
.../llap/vectorized_iceberg_read_orc.q.out | 58 ++++++++++------------
.../llap/vectorized_iceberg_read_parquet.q.out | 58 ++++++++++------------
.../positive/vectorized_iceberg_read_mixed.q.out | 30 +++++------
.../positive/vectorized_iceberg_read_orc.q.out | 30 +++++------
.../positive/vectorized_iceberg_read_parquet.q.out | 30 +++++------
.../org/apache/hadoop/hive/ql/io/AcidUtils.java | 4 +-
.../hive/ql/metadata/HiveStorageHandler.java | 11 ++--
.../HiveRelOptMaterializationValidator.java | 5 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +-
14 files changed, 168 insertions(+), 165 deletions(-)
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index e7dcbf67ef..a050b0456a 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -574,8 +574,12 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
}
@Override
- public AcidSupportType supportsAcidOperations() {
- return AcidSupportType.WITHOUT_TRANSACTIONS;
+ public AcidSupportType supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table table) {
+ if (table.getParameters() != null && "2".equals(table.getParameters().get(TableProperties.FORMAT_VERSION))) {
+ return AcidSupportType.WITHOUT_TRANSACTIONS;
+ }
+
+ return AcidSupportType.NONE;
}
@Override
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
index 016fea5a09..ed06496741 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java
@@ -22,6 +22,7 @@ package org.apache.iceberg.mr.hive;
import java.io.IOException;
import java.util.List;
import java.util.stream.StreamSupport;
+import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.FileFormat;
@@ -376,7 +377,8 @@ public class TestHiveIcebergV2 extends HiveIcebergStorageHandlerWithEngineBase {
Schema schema = new Schema(required(1, columnName, type));
List<Record> records = TestHelper.generateRandomRecords(schema, 1, 0L);
- Table table = testTables.createTable(shell, tableName, schema, fileFormat, records, 2);
+ Table table = testTables.createTable(shell, tableName, schema, PartitionSpec.unpartitioned(), fileFormat, records,
+ 2);
shell.executeStatement("DELETE FROM " + tableName);
HiveIcebergTestUtils.validateData(table, ImmutableList.of(), 0);
@@ -555,7 +557,8 @@ public class TestHiveIcebergV2 extends HiveIcebergStorageHandlerWithEngineBase {
Schema schema = new Schema(required(1, columnName, type));
List<Record> originalRecords = TestHelper.generateRandomRecords(schema, 1, 0L);
- Table table = testTables.createTable(shell, tableName, schema, fileFormat, originalRecords, 2);
+ Table table = testTables.createTable(shell, tableName, schema, PartitionSpec.unpartitioned(), fileFormat,
+ originalRecords, 2);
List<Record> newRecords = TestHelper.generateRandomRecords(schema, 1, 3L);
shell.executeStatement(testTables.getUpdateQuery(tableName, newRecords.get(0)));
@@ -563,6 +566,34 @@ public class TestHiveIcebergV2 extends HiveIcebergStorageHandlerWithEngineBase {
}
}
+ @Test
+ public void testDeleteStatementFormatV1() {
+ // create and insert an initial batch of records
+ testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+ PartitionSpec.unpartitioned(), fileFormat, HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2);
+ // insert one more batch so that we have multiple data files within the same partition
+ shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1,
+ TableIdentifier.of("default", "customers"), false));
+ AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class,
+ "Attempt to do update or delete on table", () -> {
+ shell.executeStatement("DELETE FROM customers WHERE customer_id=3 or first_name='Joanna'");
+ });
+ }
+
+ @Test
+ public void testUpdateStatementFormatV1() {
+ // create and insert an initial batch of records
+ testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+ PartitionSpec.unpartitioned(), fileFormat, HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2);
+ // insert one more batch so that we have multiple data files within the same partition
+ shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1,
+ TableIdentifier.of("default", "customers"), false));
+ AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class,
+ "Attempt to do update or delete on table", () -> {
+ shell.executeStatement("UPDATE customers SET last_name='Changed' WHERE customer_id=3 or first_name='Joanna'");
+ });
+ }
+
private static <T> PositionDelete<T> positionDelete(CharSequence path, long pos, T row) {
PositionDelete<T> positionDelete = PositionDelete.create();
return positionDelete.set(path, pos, row);
diff --git a/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out b/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
index b89353d661..61be29b091 100644
--- a/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
+++ b/iceberg/iceberg-handler/src/test/results/negative/merge_split_update_off.q.out
@@ -22,4 +22,4 @@ POSTHOOK: query: create external table test_merge_source (a int, b string, c int
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_merge_source
-FAILED: SemanticException [Error 10435]: Update and Merge into non-native ACID table is only supported when hive.split.update is true.
+FAILED: SemanticException [Error 10294]: Attempt to do update or delete using transaction manager that does not support these operations.
diff --git a/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out b/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
index 270d6ee09b..1c1aa42be6 100644
--- a/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
+++ b/iceberg/iceberg-handler/src/test/results/negative/update_split_update_off.q.out
@@ -10,4 +10,4 @@ POSTHOOK: query: create external table test_update (id int, value string) stored
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_update
-FAILED: SemanticException [Error 10435]: Update and Merge into non-native ACID table is only supported when hive.split.update is true.
+FAILED: SemanticException [Error 10294]: Attempt to do update or delete using transaction manager that does not support these operations.
diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
index e173ea1c84..b3d9d7f5e8 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
@@ -69,24 +69,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_mixed
Statistics: Num rows: 19 Data size: 1748 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: a (type: int), b (type: string)
- outputColumnNames: a, b
- Statistics: Num rows: 19 Data size: 1748 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(a)
- keys: b (type: string)
- minReductionHashAggr: 0.4736842
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: max(a)
+ keys: b (type: string)
+ minReductionHashAggr: 0.4736842
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 10 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 10 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int)
+ value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
@@ -211,24 +207,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_mixed_all_types
Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: t_float (type: float), t_double (type: double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
- outputColumnNames: t_float, t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+ Group By Operator
+ aggregations: max(t_float)
+ keys: t_double (type: double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(t_float)
- keys: t_double (type: double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
+ null sort order: zzzzzzzzz
+ sort order: +++++++++
+ Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
- null sort order: zzzzzzzzz
- sort order: +++++++++
- Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
- Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col9 (type: float)
+ value expressions: _col9 (type: float)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
index 6b2a8780d6..dbfa9b6c90 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out
@@ -53,24 +53,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_orc
Statistics: Num rows: 10 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: a (type: int), b (type: string)
- outputColumnNames: a, b
- Statistics: Num rows: 10 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(a)
- keys: b (type: string)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: max(a)
+ keys: b (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int)
+ value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
@@ -174,24 +170,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_orc_all_types
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: t_float (type: float), t_double (type: double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
- outputColumnNames: t_float, t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+ Group By Operator
+ aggregations: max(t_float)
+ keys: t_double (type: double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(t_float)
- keys: t_double (type: double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
+ null sort order: zzzzzzzzz
+ sort order: +++++++++
+ Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
- null sort order: zzzzzzzzz
- sort order: +++++++++
- Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
- Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col9 (type: float)
+ value expressions: _col9 (type: float)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
index fce8eaa59f..ba31ed21cb 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
@@ -53,24 +53,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_parquet
Statistics: Num rows: 10 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: a (type: int), b (type: string)
- outputColumnNames: a, b
- Statistics: Num rows: 10 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(a)
- keys: b (type: string)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: max(a)
+ keys: b (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int)
+ value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
@@ -174,24 +170,20 @@ STAGE PLANS:
TableScan
alias: tbl_ice_parquet_all_types
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: t_float (type: float), t_double (type: double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
- outputColumnNames: t_float, t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+ Group By Operator
+ aggregations: max(t_float)
+ keys: t_double (type: double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: max(t_float)
- keys: t_double (type: double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
+ null sort order: zzzzzzzzz
+ sort order: +++++++++
+ Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
- null sort order: zzzzzzzzz
- sort order: +++++++++
- Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
- Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col9 (type: float)
+ value expressions: _col9 (type: float)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
index 2701494205..e6d990caa4 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
@@ -62,18 +62,16 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_11]
- Group By Operator [GBY_10] (rows=10 width=92)
+ File Output Operator [FS_10]
+ Group By Operator [GBY_9] (rows=10 width=92)
Output:["_col0","_col1"],aggregations:["max(VALUE._col0)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0
- Group By Operator [GBY_8] (rows=10 width=92)
+ Group By Operator [GBY_7] (rows=10 width=92)
Output:["_col0","_col1"],aggregations:["max(a)"],keys:b
- Select Operator [SEL_7] (rows=19 width=92)
- Output:["a","b"]
- TableScan [TS_0] (rows=19 width=92)
- default@tbl_ice_mixed,tbl_ice_mixed,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+ TableScan [TS_0] (rows=19 width=92)
+ default@tbl_ice_mixed,tbl_ice_mixed,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
PREHOOK: query: select b, max(a) from tbl_ice_mixed group by b
PREHOOK: type: QUERY
@@ -167,20 +165,18 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=2 width=373)
+ File Output Operator [FS_11]
+ Select Operator [SEL_10] (rows=2 width=373)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
- Group By Operator [GBY_10] (rows=2 width=373)
+ Group By Operator [GBY_9] (rows=2 width=373)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Group By Operator [GBY_8] (rows=2 width=373)
+ Group By Operator [GBY_7] (rows=2 width=373)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
- Select Operator [SEL_7] (rows=2 width=373)
- Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
- TableScan [TS_0] (rows=2 width=373)
- default@tbl_ice_mixed_all_types,tbl_ice_mixed_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+ TableScan [TS_0] (rows=2 width=373)
+ default@tbl_ice_mixed_all_types,tbl_ice_mixed_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
index e6b1ceb69b..91c7bb2c54 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out
@@ -46,18 +46,16 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_11]
- Group By Operator [GBY_10] (rows=5 width=92)
+ File Output Operator [FS_10]
+ Group By Operator [GBY_9] (rows=5 width=92)
Output:["_col0","_col1"],aggregations:["max(VALUE._col0)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0
- Group By Operator [GBY_8] (rows=5 width=92)
+ Group By Operator [GBY_7] (rows=5 width=92)
Output:["_col0","_col1"],aggregations:["max(a)"],keys:b
- Select Operator [SEL_7] (rows=10 width=92)
- Output:["a","b"]
- TableScan [TS_0] (rows=10 width=92)
- default@tbl_ice_orc,tbl_ice_orc,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+ TableScan [TS_0] (rows=10 width=92)
+ default@tbl_ice_orc,tbl_ice_orc,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
PREHOOK: query: select b, max(a) from tbl_ice_orc group by b
PREHOOK: type: QUERY
@@ -130,20 +128,18 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=1 width=372)
+ File Output Operator [FS_11]
+ Select Operator [SEL_10] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
- Group By Operator [GBY_10] (rows=1 width=372)
+ Group By Operator [GBY_9] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Group By Operator [GBY_8] (rows=1 width=372)
+ Group By Operator [GBY_7] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
- Select Operator [SEL_7] (rows=1 width=372)
- Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
- TableScan [TS_0] (rows=1 width=372)
- default@tbl_ice_orc_all_types,tbl_ice_orc_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+ TableScan [TS_0] (rows=1 width=372)
+ default@tbl_ice_orc_all_types,tbl_ice_orc_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_orc_all_types
group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
index 34faa886d7..43da19476b 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out
@@ -46,18 +46,16 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_11]
- Group By Operator [GBY_10] (rows=5 width=92)
+ File Output Operator [FS_10]
+ Group By Operator [GBY_9] (rows=5 width=92)
Output:["_col0","_col1"],aggregations:["max(VALUE._col0)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0
- Group By Operator [GBY_8] (rows=5 width=92)
+ Group By Operator [GBY_7] (rows=5 width=92)
Output:["_col0","_col1"],aggregations:["max(a)"],keys:b
- Select Operator [SEL_7] (rows=10 width=92)
- Output:["a","b"]
- TableScan [TS_0] (rows=10 width=92)
- default@tbl_ice_parquet,tbl_ice_parquet,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+ TableScan [TS_0] (rows=10 width=92)
+ default@tbl_ice_parquet,tbl_ice_parquet,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
PREHOOK: query: select b, max(a) from tbl_ice_parquet group by b
PREHOOK: type: QUERY
@@ -130,20 +128,18 @@ Stage-0
limit:-1
Stage-1
Reducer 2 vectorized
- File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=1 width=372)
+ File Output Operator [FS_11]
+ Select Operator [SEL_10] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
- Group By Operator [GBY_10] (rows=1 width=372)
+ Group By Operator [GBY_9] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_9]
+ SHUFFLE [RS_8]
PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Group By Operator [GBY_8] (rows=1 width=372)
+ Group By Operator [GBY_7] (rows=1 width=372)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
- Select Operator [SEL_7] (rows=1 width=372)
- Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
- TableScan [TS_0] (rows=1 width=372)
- default@tbl_ice_parquet_all_types,tbl_ice_parquet_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+ TableScan [TS_0] (rows=1 width=372)
+ default@tbl_ice_parquet_all_types,tbl_ice_parquet_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_parquet_all_types
group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 4582878817..ca2516123c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -3364,7 +3364,7 @@ public class AcidUtils {
public static boolean isNonNativeAcidTable(Table table) {
return table != null && table.getStorageHandler() != null &&
- table.getStorageHandler().supportsAcidOperations() != HiveStorageHandler.AcidSupportType.NONE;
+ table.getStorageHandler().supportsAcidOperations(table) != HiveStorageHandler.AcidSupportType.NONE;
}
/**
@@ -3386,7 +3386,7 @@ public class AcidUtils {
public static boolean acidTableWithoutTransactions(Table table) {
return table != null && table.getStorageHandler() != null &&
- table.getStorageHandler().supportsAcidOperations() == HiveStorageHandler.AcidSupportType.WITHOUT_TRANSACTIONS;
+ table.getStorageHandler().supportsAcidOperations(table) == HiveStorageHandler.AcidSupportType.WITHOUT_TRANSACTIONS;
}
static class DirInfoValue {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 65e458cc65..bdfdf3fde3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -291,7 +291,7 @@ public interface HiveStorageHandler extends Configurable {
*
* @return the table's ACID support type
*/
- default AcidSupportType supportsAcidOperations() {
+ default AcidSupportType supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table table) {
return AcidSupportType.NONE;
}
@@ -299,7 +299,8 @@ public interface HiveStorageHandler extends Configurable {
* Specifies which additional virtual columns should be added to the virtual column registry during compilation
* for tables that support ACID operations.
*
- * Should only return a non-empty list if {@link HiveStorageHandler#supportsAcidOperations()} ()} returns something
+ * Should only return a non-empty list if
+ * {@link HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)} ()} returns something
* other NONE.
*
* @return the list of ACID virtual columns
@@ -318,7 +319,8 @@ public interface HiveStorageHandler extends Configurable {
*
* This method specifies which columns should be injected into the <selectCols> part of the rewritten query.
*
- * Should only return a non-empty list if {@link HiveStorageHandler#supportsAcidOperations()} returns something
+ * Should only return a non-empty list if
+ * {@link HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)} returns something
* other NONE.
*
* @param table the table which is being deleted/updated/merged into
@@ -336,7 +338,8 @@ public interface HiveStorageHandler extends Configurable {
*
* This method specifies which columns should be injected into the <sortCols> part of the rewritten query.
*
- * Should only return a non-empty list if {@link HiveStorageHandler#supportsAcidOperations()} returns something
+ * Should only return a non-empty list if
+ * {@link HiveStorageHandler#supportsAcidOperations(org.apache.hadoop.hive.ql.metadata.Table)} returns something
* other NONE.
*
* @param table the table which is being deleted/updated/merged into
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
index 3897a2912a..d8f0a3041c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
@@ -89,8 +89,9 @@ public class HiveRelOptMaterializationValidator extends HiveRelShuttleImpl {
if (tab.isTemporary()) {
fail(tab.getTableName() + " is a temporary table");
}
- if (tab.getTableType() == TableType.EXTERNAL_TABLE && !AcidUtils.isNonNativeAcidTable(tab)) {
- fail(tab.getFullyQualifiedName() + " is an external table");
+ if (tab.getTableType() == TableType.EXTERNAL_TABLE &&
+ !(tab.getStorageHandler() != null && tab.getStorageHandler().areSnapshotsSupported())) {
+ fail(tab.getFullyQualifiedName() + " is an external table and does not support snapshots");
}
return hiveScan;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 3e12bf4efd..2458cbb096 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -14222,7 +14222,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
if (AcidUtils.isTransactionalTable(table)) {
++nativeAcidCount;
- } else if (AcidUtils.isNonNativeAcidTable(table) && table.getStorageHandler().areSnapshotsSupported()) {
+ } else if (table.isNonNative() && table.getStorageHandler().areSnapshotsSupported()) {
++supportsSnapshotCount;
} else {
throw new SemanticException("Automatic rewriting for materialized view cannot "