You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2022/12/16 21:01:15 UTC
[impala] branch master updated: IMPALA-11708: Add support for mixed Iceberg tables with AVRO file format
This is an automated email from the ASF dual-hosted git repository.
wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 390a93206 IMPALA-11708: Add support for mixed Iceberg tables with AVRO file format
390a93206 is described below
commit 390a932064ba48c44e70e431f2c20e53beddd970
Author: noemi <np...@cloudera.com>
AuthorDate: Wed Sep 21 19:07:59 2022 +0200
IMPALA-11708: Add support for mixed Iceberg tables with AVRO file format
This patch extends the support of Iceberg tables containing multiple
file formats. Now AVRO data files can also be read in a mixed table
besides Parquet and ORC.
Impala uses its avro scanner to read AVRO files, therefore all the
avro related limitations apply here as well: writes/metadata
changes are not supported.
testing:
- E2E testing: extending 'iceberg-mixed-file-format.test' to include
AVRO files as well, in order to test reading all three currently
supported file formats: avro+orc+parquet
Change-Id: I941adfb659218283eb5fec1b394bb3003f8072a6
Reviewed-on: http://gerrit.cloudera.org:8080/19353
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exec/hdfs-scan-node-base.cc | 22 ++-
.../org/apache/impala/planner/IcebergScanNode.java | 11 +-
...19aad52ced-job_16629766502890_0016-1-00001.avro | Bin 723 -> 0 bytes
...24d958ebc7-job_16629766502890_0017-1-00001.avro | Bin 723 -> 0 bytes
...bec36d7a93c-job_16629766502890_0018-1-00001.orc | Bin 534 -> 0 bytes
...7912ad1-job_16629766502890_0019-1-00001.parquet | Bin 1109 -> 0 bytes
.../13c55017-b018-4ccb-a407-08e37e28eec8-m0.avro | Bin 3240 -> 0 bytes
.../7b422180-e3f8-4500-b240-1424ef012246-m0.avro | Bin 3240 -> 0 bytes
.../80a79f8a-5a47-44c9-b16d-4bef4a5ecec3-m0.avro | Bin 3332 -> 0 bytes
.../8e66c338-5cd3-4b85-b986-18ec29b67d94-m0.avro | Bin 3323 -> 0 bytes
...058-1-8e66c338-5cd3-4b85-b986-18ec29b67d94.avro | Bin 2158 -> 0 bytes
...787-1-13c55017-b018-4ccb-a407-08e37e28eec8.avro | Bin 2009 -> 0 bytes
...034-1-7b422180-e3f8-4500-b240-1424ef012246.avro | Bin 2009 -> 0 bytes
...133-1-80a79f8a-5a47-44c9-b16d-4bef4a5ecec3.avro | Bin 2308 -> 0 bytes
.../iceberg_avro_mixed/metadata/v1.metadata.json | 82 ----------
.../iceberg_avro_mixed/metadata/v2.metadata.json | 83 ----------
.../iceberg_avro_mixed/metadata/v3.metadata.json | 83 ----------
.../iceberg_avro_mixed/metadata/v4.metadata.json | 112 --------------
.../iceberg_avro_mixed/metadata/v5.metadata.json | 112 --------------
.../iceberg_avro_mixed/metadata/v6.metadata.json | 117 --------------
.../iceberg_avro_mixed/metadata/v7.metadata.json | 142 -----------------
.../iceberg_avro_mixed/metadata/v8.metadata.json | 145 ------------------
.../iceberg_avro_mixed/metadata/v9.metadata.json | 170 ---------------------
.../iceberg_avro_mixed/metadata/version-hint.txt | 1 -
...cd56b94b46-job_16629766502890_0015-1-00001.avro | Bin 604 -> 0 bytes
.../a9f8d35c-a852-49fe-996a-d94ae1896c32-m0.avro | Bin 3239 -> 0 bytes
...732-1-a9f8d35c-a852-49fe-996a-d94ae1896c32.avro | Bin 2008 -> 0 bytes
.../iceberg_avro_only/metadata/v1.metadata.json | 72 ---------
.../iceberg_avro_only/metadata/v2.metadata.json | 101 ------------
.../iceberg_avro_only/metadata/version-hint.txt | 1 -
.../ice/iceberg_avro_only/version-hint.txt | 1 -
.../functional/functional_schema_template.sql | 41 +++--
.../datasets/functional/schema_constraints.csv | 4 +-
.../queries/QueryTest/iceberg-avro.test | 10 +-
.../QueryTest/iceberg-mixed-file-format.test | 11 +-
.../queries/QueryTest/iceberg-negative.test | 5 -
tests/query_test/test_iceberg.py | 2 -
37 files changed, 53 insertions(+), 1275 deletions(-)
diff --git a/be/src/exec/hdfs-scan-node-base.cc b/be/src/exec/hdfs-scan-node-base.cc
index a6fe8405d..123bfc250 100644
--- a/be/src/exec/hdfs-scan-node-base.cc
+++ b/be/src/exec/hdfs-scan-node-base.cc
@@ -241,6 +241,7 @@ Status HdfsScanPlanNode::Init(const TPlanNode& tnode, FragmentState* state) {
Status HdfsScanPlanNode::ProcessScanRangesAndInitSharedState(FragmentState* state) {
// Initialize the template tuple pool.
+ using namespace org::apache::impala::fb;
shared_state_.template_pool_.reset(new MemPool(state->query_mem_tracker()));
auto& template_tuple_map_ = shared_state_.partition_template_tuple_map_;
ObjectPool* obj_pool = shared_state_.obj_pool();
@@ -301,8 +302,27 @@ Status HdfsScanPlanNode::ProcessScanRangesAndInitSharedState(FragmentState* stat
file_desc->mtime = split.mtime();
file_desc->file_compression = CompressionTypePBToThrift(split.file_compression());
file_desc->is_erasure_coded = split.is_erasure_coded();
- file_desc->file_format = partition_desc->file_format();
file_desc->file_metadata = file_metadata;
+ if (file_metadata) {
+ DCHECK(file_metadata->iceberg_metadata() != nullptr);
+ switch (file_metadata->iceberg_metadata()->file_format()) {
+ case FbIcebergDataFileFormat::FbIcebergDataFileFormat_PARQUET:
+ file_desc->file_format = THdfsFileFormat::PARQUET;
+ break;
+ case FbIcebergDataFileFormat::FbIcebergDataFileFormat_ORC:
+ file_desc->file_format = THdfsFileFormat::ORC;
+ break;
+ case FbIcebergDataFileFormat::FbIcebergDataFileFormat_AVRO:
+ file_desc->file_format = THdfsFileFormat::AVRO;
+ break;
+ default:
+ return Status(Substitute(
+ "Unknown Iceberg file format type: $0",
+ file_metadata->iceberg_metadata()->file_format()));
+ }
+ } else {
+ file_desc->file_format = partition_desc->file_format();
+ }
RETURN_IF_ERROR(HdfsFsCache::instance()->GetConnection(
native_file_path, &file_desc->fs, &fs_cache));
shared_state_.per_type_files_[partition_desc->file_format()].push_back(file_desc);
diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
index 9325b3b29..4551355d5 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
@@ -89,16 +89,7 @@ public class IcebergScanNode extends HdfsScanNode {
}
if (hasParquet) fileFormats_.add(HdfsFileFormat.PARQUET);
if (hasOrc) fileFormats_.add(HdfsFileFormat.ORC);
-
- //TODO IMPALA-11708: Currently mixed file format Iceberg tables containing AVRO files
- // cannot be read.
- if (hasAvro) {
- fileFormats_.add(HdfsFileFormat.AVRO);
- if (hasOrc || hasParquet) {
- throw new ImpalaRuntimeException("Iceberg tables containing multiple file "
- + "formats are only supported if they do not contain AVRO files.");
- }
- }
+ if (hasAvro) fileFormats_.add(HdfsFileFormat.AVRO);
}
/**
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221026130844_b228ff88-5625-494b-b27a-7819aad52ced-job_16629766502890_0016-1-00001.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221026130844_b228ff88-5625-494b-b27a-7819aad52ced-job_16629766502890_0016-1-00001.avro
deleted file mode 100644
index 5c71b0b7b..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221026130844_b228ff88-5625-494b-b27a-7819aad52ced-job_16629766502890_0016-1-00001.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221028111610_c7e89043-49e0-40fe-95a5-bf24d958ebc7-job_16629766502890_0017-1-00001.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221028111610_c7e89043-49e0-40fe-95a5-bf24d958ebc7-job_16629766502890_0017-1-00001.avro
deleted file mode 100644
index 623b428f4..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221028111610_c7e89043-49e0-40fe-95a5-bf24d958ebc7-job_16629766502890_0017-1-00001.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221028113321_fbfa5f31-421d-406a-9d46-6bec36d7a93c-job_16629766502890_0018-1-00001.orc b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221028113321_fbfa5f31-421d-406a-9d46-6bec36d7a93c-job_16629766502890_0018-1-00001.orc
deleted file mode 100644
index 09837e843..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221028113321_fbfa5f31-421d-406a-9d46-6bec36d7a93c-job_16629766502890_0018-1-00001.orc and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221028114730_e2f7d99d-7ad8-478c-a814-19e2d7912ad1-job_16629766502890_0019-1-00001.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221028114730_e2f7d99d-7ad8-478c-a814-19e2d7912ad1-job_16629766502890_0019-1-00001.parquet
deleted file mode 100644
index d927f7882..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/data/00000-0-data-noemi_20221028114730_e2f7d99d-7ad8-478c-a814-19e2d7912ad1-job_16629766502890_0019-1-00001.parquet and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/13c55017-b018-4ccb-a407-08e37e28eec8-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/13c55017-b018-4ccb-a407-08e37e28eec8-m0.avro
deleted file mode 100644
index b799ffed5..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/13c55017-b018-4ccb-a407-08e37e28eec8-m0.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/7b422180-e3f8-4500-b240-1424ef012246-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/7b422180-e3f8-4500-b240-1424ef012246-m0.avro
deleted file mode 100644
index e09e5242f..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/7b422180-e3f8-4500-b240-1424ef012246-m0.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/80a79f8a-5a47-44c9-b16d-4bef4a5ecec3-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/80a79f8a-5a47-44c9-b16d-4bef4a5ecec3-m0.avro
deleted file mode 100644
index bde2cf8b7..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/80a79f8a-5a47-44c9-b16d-4bef4a5ecec3-m0.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/8e66c338-5cd3-4b85-b986-18ec29b67d94-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/8e66c338-5cd3-4b85-b986-18ec29b67d94-m0.avro
deleted file mode 100644
index c74e2096e..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/8e66c338-5cd3-4b85-b986-18ec29b67d94-m0.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-1131576191504541058-1-8e66c338-5cd3-4b85-b986-18ec29b67d94.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-1131576191504541058-1-8e66c338-5cd3-4b85-b986-18ec29b67d94.avro
deleted file mode 100644
index 8616a6aaf..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-1131576191504541058-1-8e66c338-5cd3-4b85-b986-18ec29b67d94.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-1744181916149214787-1-13c55017-b018-4ccb-a407-08e37e28eec8.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-1744181916149214787-1-13c55017-b018-4ccb-a407-08e37e28eec8.avro
deleted file mode 100644
index f0f2d3b07..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-1744181916149214787-1-13c55017-b018-4ccb-a407-08e37e28eec8.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-3243718219085059034-1-7b422180-e3f8-4500-b240-1424ef012246.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-3243718219085059034-1-7b422180-e3f8-4500-b240-1424ef012246.avro
deleted file mode 100644
index 84d945b5f..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-3243718219085059034-1-7b422180-e3f8-4500-b240-1424ef012246.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-5089000375160183133-1-80a79f8a-5a47-44c9-b16d-4bef4a5ecec3.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-5089000375160183133-1-80a79f8a-5a47-44c9-b16d-4bef4a5ecec3.avro
deleted file mode 100644
index 81bad5b85..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-5089000375160183133-1-80a79f8a-5a47-44c9-b16d-4bef4a5ecec3.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v1.metadata.json
deleted file mode 100644
index af0442032..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v1.metadata.json
+++ /dev/null
@@ -1,82 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "a7acd8de-5471-4015-88aa-d861d75f7967",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed",
- "last-updated-ms" : 1666948270383,
- "last-column-id" : 4,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "bucketing_version" : "2",
- "EXTERNAL" : "TRUE",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler"
- },
- "current-snapshot-id" : -1,
- "refs" : { },
- "snapshots" : [ ],
- "snapshot-log" : [ ],
- "metadata-log" : [ ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v2.metadata.json
deleted file mode 100644
index 611e1e9b5..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v2.metadata.json
+++ /dev/null
@@ -1,83 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "c84d8735-8499-4240-aeaf-63a1e58a7c9a",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed",
- "last-updated-ms" : 1666782261541,
- "last-column-id" : 4,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "EXTERNAL" : "TRUE",
- "write.format.default" : "avro",
- "bucketing_version" : "2",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler"
- },
- "current-snapshot-id" : -1,
- "refs" : { },
- "snapshots" : [ ],
- "snapshot-log" : [ ],
- "metadata-log" : [ ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v3.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v3.metadata.json
deleted file mode 100644
index 6fad5e1dd..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v3.metadata.json
+++ /dev/null
@@ -1,83 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "ab152578-db4a-493a-8796-5dc9e3e36004",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed",
- "last-updated-ms" : 1666948531175,
- "last-column-id" : 4,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "EXTERNAL" : "TRUE",
- "write.format.default" : "avro",
- "bucketing_version" : "2",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler"
- },
- "current-snapshot-id" : -1,
- "refs" : { },
- "snapshots" : [ ],
- "snapshot-log" : [ ],
- "metadata-log" : [ ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v4.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v4.metadata.json
deleted file mode 100644
index 782a388ac..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v4.metadata.json
+++ /dev/null
@@ -1,112 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "ab152578-db4a-493a-8796-5dc9e3e36004",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed",
- "last-updated-ms" : 1666948579101,
- "last-column-id" : 4,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "EXTERNAL" : "TRUE",
- "write.format.default" : "avro",
- "bucketing_version" : "2",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler"
- },
- "current-snapshot-id" : 3243718219085059034,
- "refs" : {
- "main" : {
- "snapshot-id" : 3243718219085059034,
- "type" : "branch"
- }
- },
- "snapshots" : [ {
- "snapshot-id" : 3243718219085059034,
- "timestamp-ms" : 1666948579101,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "1",
- "added-files-size" : "723",
- "changed-partition-count" : "1",
- "total-records" : "1",
- "total-files-size" : "723",
- "total-data-files" : "1",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-3243718219085059034-1-7b422180-e3f8-4500-b240-1424ef012246.avro",
- "schema-id" : 0
- } ],
- "snapshot-log" : [ {
- "timestamp-ms" : 1666948579101,
- "snapshot-id" : 3243718219085059034
- } ],
- "metadata-log" : [ {
- "timestamp-ms" : 1666948531175,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00000-58a6b29d-3a84-4192-bb65-8382a5222c1a.metadata.json"
- } ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v5.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v5.metadata.json
deleted file mode 100644
index 6d7eb6b6e..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v5.metadata.json
+++ /dev/null
@@ -1,112 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "c84d8735-8499-4240-aeaf-63a1e58a7c9a",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed",
- "last-updated-ms" : 1666782533443,
- "last-column-id" : 4,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "EXTERNAL" : "TRUE",
- "write.format.default" : "avro",
- "bucketing_version" : "2",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler"
- },
- "current-snapshot-id" : 1744181916149214787,
- "refs" : {
- "main" : {
- "snapshot-id" : 1744181916149214787,
- "type" : "branch"
- }
- },
- "snapshots" : [ {
- "snapshot-id" : 1744181916149214787,
- "timestamp-ms" : 1666782533443,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "1",
- "added-files-size" : "723",
- "changed-partition-count" : "1",
- "total-records" : "1",
- "total-files-size" : "723",
- "total-data-files" : "1",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-1744181916149214787-1-13c55017-b018-4ccb-a407-08e37e28eec8.avro",
- "schema-id" : 0
- } ],
- "snapshot-log" : [ {
- "timestamp-ms" : 1666782533443,
- "snapshot-id" : 1744181916149214787
- } ],
- "metadata-log" : [ {
- "timestamp-ms" : 1666782261541,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00000-547c9de5-692a-489f-ab78-751cf8952229.metadata.json"
- } ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v6.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v6.metadata.json
deleted file mode 100644
index 39320de7e..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v6.metadata.json
+++ /dev/null
@@ -1,117 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "ab152578-db4a-493a-8796-5dc9e3e36004",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed",
- "last-updated-ms" : 1666949502040,
- "last-column-id" : 4,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "last_modified_time" : "1666949501",
- "EXTERNAL" : "TRUE",
- "write.format.default" : "orc",
- "bucketing_version" : "2",
- "last_modified_by" : "noemi",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler"
- },
- "current-snapshot-id" : 3243718219085059034,
- "refs" : {
- "main" : {
- "snapshot-id" : 3243718219085059034,
- "type" : "branch"
- }
- },
- "snapshots" : [ {
- "snapshot-id" : 3243718219085059034,
- "timestamp-ms" : 1666948579101,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "1",
- "added-files-size" : "723",
- "changed-partition-count" : "1",
- "total-records" : "1",
- "total-files-size" : "723",
- "total-data-files" : "1",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-3243718219085059034-1-7b422180-e3f8-4500-b240-1424ef012246.avro",
- "schema-id" : 0
- } ],
- "snapshot-log" : [ {
- "timestamp-ms" : 1666948579101,
- "snapshot-id" : 3243718219085059034
- } ],
- "metadata-log" : [ {
- "timestamp-ms" : 1666948531175,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00000-58a6b29d-3a84-4192-bb65-8382a5222c1a.metadata.json"
- }, {
- "timestamp-ms" : 1666948579101,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00001-9a74d7ab-e873-4d72-8bd3-25baeb6a1f5c.metadata.json"
- } ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v7.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v7.metadata.json
deleted file mode 100644
index 5ea19a21c..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v7.metadata.json
+++ /dev/null
@@ -1,142 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "ab152578-db4a-493a-8796-5dc9e3e36004",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed",
- "last-updated-ms" : 1666949609311,
- "last-column-id" : 4,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "last_modified_time" : "1666949501",
- "EXTERNAL" : "TRUE",
- "write.format.default" : "orc",
- "bucketing_version" : "2",
- "last_modified_by" : "noemi",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler"
- },
- "current-snapshot-id" : 1131576191504541058,
- "refs" : {
- "main" : {
- "snapshot-id" : 1131576191504541058,
- "type" : "branch"
- }
- },
- "snapshots" : [ {
- "snapshot-id" : 3243718219085059034,
- "timestamp-ms" : 1666948579101,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "1",
- "added-files-size" : "723",
- "changed-partition-count" : "1",
- "total-records" : "1",
- "total-files-size" : "723",
- "total-data-files" : "1",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-3243718219085059034-1-7b422180-e3f8-4500-b240-1424ef012246.avro",
- "schema-id" : 0
- }, {
- "snapshot-id" : 1131576191504541058,
- "parent-snapshot-id" : 3243718219085059034,
- "timestamp-ms" : 1666949609311,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "1",
- "added-files-size" : "534",
- "changed-partition-count" : "1",
- "total-records" : "2",
- "total-files-size" : "1257",
- "total-data-files" : "2",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-1131576191504541058-1-8e66c338-5cd3-4b85-b986-18ec29b67d94.avro",
- "schema-id" : 0
- } ],
- "snapshot-log" : [ {
- "timestamp-ms" : 1666948579101,
- "snapshot-id" : 3243718219085059034
- }, {
- "timestamp-ms" : 1666949609311,
- "snapshot-id" : 1131576191504541058
- } ],
- "metadata-log" : [ {
- "timestamp-ms" : 1666948531175,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00000-58a6b29d-3a84-4192-bb65-8382a5222c1a.metadata.json"
- }, {
- "timestamp-ms" : 1666948579101,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00001-9a74d7ab-e873-4d72-8bd3-25baeb6a1f5c.metadata.json"
- }, {
- "timestamp-ms" : 1666949502040,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00002-e1863a7b-6666-46df-9015-e78e145e7878.metadata.json"
- } ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v8.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v8.metadata.json
deleted file mode 100644
index 44d99a06a..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v8.metadata.json
+++ /dev/null
@@ -1,145 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "ab152578-db4a-493a-8796-5dc9e3e36004",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed",
- "last-updated-ms" : 1666949895636,
- "last-column-id" : 4,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "last_modified_time" : "1666949895",
- "EXTERNAL" : "TRUE",
- "write.format.default" : "parquet",
- "bucketing_version" : "2",
- "last_modified_by" : "noemi",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler"
- },
- "current-snapshot-id" : 1131576191504541058,
- "refs" : {
- "main" : {
- "snapshot-id" : 1131576191504541058,
- "type" : "branch"
- }
- },
- "snapshots" : [ {
- "snapshot-id" : 3243718219085059034,
- "timestamp-ms" : 1666948579101,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "1",
- "added-files-size" : "723",
- "changed-partition-count" : "1",
- "total-records" : "1",
- "total-files-size" : "723",
- "total-data-files" : "1",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-3243718219085059034-1-7b422180-e3f8-4500-b240-1424ef012246.avro",
- "schema-id" : 0
- }, {
- "snapshot-id" : 1131576191504541058,
- "parent-snapshot-id" : 3243718219085059034,
- "timestamp-ms" : 1666949609311,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "1",
- "added-files-size" : "534",
- "changed-partition-count" : "1",
- "total-records" : "2",
- "total-files-size" : "1257",
- "total-data-files" : "2",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-1131576191504541058-1-8e66c338-5cd3-4b85-b986-18ec29b67d94.avro",
- "schema-id" : 0
- } ],
- "snapshot-log" : [ {
- "timestamp-ms" : 1666948579101,
- "snapshot-id" : 3243718219085059034
- }, {
- "timestamp-ms" : 1666949609311,
- "snapshot-id" : 1131576191504541058
- } ],
- "metadata-log" : [ {
- "timestamp-ms" : 1666948531175,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00000-58a6b29d-3a84-4192-bb65-8382a5222c1a.metadata.json"
- }, {
- "timestamp-ms" : 1666948579101,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00001-9a74d7ab-e873-4d72-8bd3-25baeb6a1f5c.metadata.json"
- }, {
- "timestamp-ms" : 1666949502040,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00002-e1863a7b-6666-46df-9015-e78e145e7878.metadata.json"
- }, {
- "timestamp-ms" : 1666949609311,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00003-5ba0f9e4-4743-48af-918d-252319c4e055.metadata.json"
- } ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v9.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v9.metadata.json
deleted file mode 100644
index 2b79f43aa..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/v9.metadata.json
+++ /dev/null
@@ -1,170 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "ab152578-db4a-493a-8796-5dc9e3e36004",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed",
- "last-updated-ms" : 1666950458526,
- "last-column-id" : 4,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- }, {
- "id" : 4,
- "name" : "bool_col",
- "required" : false,
- "type" : "boolean"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "last_modified_time" : "1666949895",
- "EXTERNAL" : "TRUE",
- "write.format.default" : "parquet",
- "bucketing_version" : "2",
- "last_modified_by" : "noemi",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler"
- },
- "current-snapshot-id" : 5089000375160183133,
- "refs" : {
- "main" : {
- "snapshot-id" : 5089000375160183133,
- "type" : "branch"
- }
- },
- "snapshots" : [ {
- "snapshot-id" : 3243718219085059034,
- "timestamp-ms" : 1666948579101,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "1",
- "added-files-size" : "723",
- "changed-partition-count" : "1",
- "total-records" : "1",
- "total-files-size" : "723",
- "total-data-files" : "1",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-3243718219085059034-1-7b422180-e3f8-4500-b240-1424ef012246.avro",
- "schema-id" : 0
- }, {
- "snapshot-id" : 1131576191504541058,
- "parent-snapshot-id" : 3243718219085059034,
- "timestamp-ms" : 1666949609311,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "1",
- "added-files-size" : "534",
- "changed-partition-count" : "1",
- "total-records" : "2",
- "total-files-size" : "1257",
- "total-data-files" : "2",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-1131576191504541058-1-8e66c338-5cd3-4b85-b986-18ec29b67d94.avro",
- "schema-id" : 0
- }, {
- "snapshot-id" : 5089000375160183133,
- "parent-snapshot-id" : 1131576191504541058,
- "timestamp-ms" : 1666950458526,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "1",
- "added-files-size" : "1109",
- "changed-partition-count" : "1",
- "total-records" : "3",
- "total-files-size" : "2366",
- "total-data-files" : "3",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/snap-5089000375160183133-1-80a79f8a-5a47-44c9-b16d-4bef4a5ecec3.avro",
- "schema-id" : 0
- } ],
- "snapshot-log" : [ {
- "timestamp-ms" : 1666948579101,
- "snapshot-id" : 3243718219085059034
- }, {
- "timestamp-ms" : 1666949609311,
- "snapshot-id" : 1131576191504541058
- }, {
- "timestamp-ms" : 1666950458526,
- "snapshot-id" : 5089000375160183133
- } ],
- "metadata-log" : [ {
- "timestamp-ms" : 1666948531175,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00000-58a6b29d-3a84-4192-bb65-8382a5222c1a.metadata.json"
- }, {
- "timestamp-ms" : 1666948579101,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00001-9a74d7ab-e873-4d72-8bd3-25baeb6a1f5c.metadata.json"
- }, {
- "timestamp-ms" : 1666949502040,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00002-e1863a7b-6666-46df-9015-e78e145e7878.metadata.json"
- }, {
- "timestamp-ms" : 1666949609311,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00003-5ba0f9e4-4743-48af-918d-252319c4e055.metadata.json"
- }, {
- "timestamp-ms" : 1666949895636,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/00004-e680e6e3-1dbd-4f48-b3d0-e676350ade56.metadata.json"
- } ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/version-hint.txt b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/version-hint.txt
deleted file mode 100644
index ec635144f..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed/metadata/version-hint.txt
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/data/00000-0-data-noemi_20221021195331_77fbb37f-2393-4a66-9656-61cd56b94b46-job_16629766502890_0015-1-00001.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/data/00000-0-data-noemi_20221021195331_77fbb37f-2393-4a66-9656-61cd56b94b46-job_16629766502890_0015-1-00001.avro
deleted file mode 100644
index 2a454d5f3..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/data/00000-0-data-noemi_20221021195331_77fbb37f-2393-4a66-9656-61cd56b94b46-job_16629766502890_0015-1-00001.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/a9f8d35c-a852-49fe-996a-d94ae1896c32-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/a9f8d35c-a852-49fe-996a-d94ae1896c32-m0.avro
deleted file mode 100644
index bd8d76fbd..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/a9f8d35c-a852-49fe-996a-d94ae1896c32-m0.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/snap-725782911885631732-1-a9f8d35c-a852-49fe-996a-d94ae1896c32.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/snap-725782911885631732-1-a9f8d35c-a852-49fe-996a-d94ae1896c32.avro
deleted file mode 100644
index 799d0f2b9..000000000
Binary files a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/snap-725782911885631732-1-a9f8d35c-a852-49fe-996a-d94ae1896c32.avro and /dev/null differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/v1.metadata.json
deleted file mode 100644
index d1fa4e378..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/v1.metadata.json
+++ /dev/null
@@ -1,72 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "a7d11596-b69a-4cb8-ba17-faef632ba9ec",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_only",
- "last-updated-ms" : 1666374749642,
- "last-column-id" : 3,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "bucketing_version" : "2",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
- "write.format.default" : "avro"
- },
- "current-snapshot-id" : -1,
- "refs" : { },
- "snapshots" : [ ],
- "snapshot-log" : [ ],
- "metadata-log" : [ ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/v2.metadata.json
deleted file mode 100644
index dbd8868a0..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/v2.metadata.json
+++ /dev/null
@@ -1,101 +0,0 @@
-{
- "format-version" : 1,
- "table-uuid" : "a7d11596-b69a-4cb8-ba17-faef632ba9ec",
- "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_only",
- "last-updated-ms" : 1666374820147,
- "last-column-id" : 3,
- "schema" : {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- } ]
- },
- "current-schema-id" : 0,
- "schemas" : [ {
- "type" : "struct",
- "schema-id" : 0,
- "fields" : [ {
- "id" : 1,
- "name" : "int_col",
- "required" : false,
- "type" : "int"
- }, {
- "id" : 2,
- "name" : "string_col",
- "required" : false,
- "type" : "string"
- }, {
- "id" : 3,
- "name" : "double_col",
- "required" : false,
- "type" : "double"
- } ]
- } ],
- "partition-spec" : [ ],
- "default-spec-id" : 0,
- "partition-specs" : [ {
- "spec-id" : 0,
- "fields" : [ ]
- } ],
- "last-partition-id" : 999,
- "default-sort-order-id" : 0,
- "sort-orders" : [ {
- "order-id" : 0,
- "fields" : [ ]
- } ],
- "properties" : {
- "engine.hive.enabled" : "true",
- "bucketing_version" : "2",
- "serialization.format" : "1",
- "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
- "write.format.default" : "avro"
- },
- "current-snapshot-id" : 725782911885631732,
- "refs" : {
- "main" : {
- "snapshot-id" : 725782911885631732,
- "type" : "branch"
- }
- },
- "snapshots" : [ {
- "snapshot-id" : 725782911885631732,
- "timestamp-ms" : 1666374820147,
- "summary" : {
- "operation" : "append",
- "added-data-files" : "1",
- "added-records" : "3",
- "added-files-size" : "604",
- "changed-partition-count" : "1",
- "total-records" : "3",
- "total-files-size" : "604",
- "total-data-files" : "1",
- "total-delete-files" : "0",
- "total-position-deletes" : "0",
- "total-equality-deletes" : "0"
- },
- "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/snap-725782911885631732-1-a9f8d35c-a852-49fe-996a-d94ae1896c32.avro",
- "schema-id" : 0
- } ],
- "snapshot-log" : [ {
- "timestamp-ms" : 1666374820147,
- "snapshot-id" : 725782911885631732
- } ],
- "metadata-log" : [ {
- "timestamp-ms" : 1666374749642,
- "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/00000-6f456c5b-0115-4a6b-8e66-2577ab899c87.metadata.json"
- } ]
-}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/version-hint.txt b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/version-hint.txt
deleted file mode 100644
index 0cfbf0888..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/metadata/version-hint.txt
+++ /dev/null
@@ -1 +0,0 @@
-2
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/version-hint.txt b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/version-hint.txt
deleted file mode 100644
index d00491fd7..000000000
--- a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only/version-hint.txt
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 7ada201b5..9a3d1417c 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3608,42 +3608,39 @@ hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/i
---- DATASET
functional
---- BASE_TABLE_NAME
-iceberg_avro_only
----- CREATE
+iceberg_avro_format
+---- CREATE_HIVE
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
int_col int,
string_col string,
- double_col double
+ double_col double,
+ bool_col boolean
)
-STORED AS ICEBERG
-TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
- 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog',
- 'iceberg.table_identifier'='ice.iceberg_avro_only',
- 'write.format.default'='avro');
----- DEPENDENT_LOAD
-`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
-hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_only /test-warehouse/iceberg_test/hadoop_catalog/ice
-
+STORED BY ICEBERG STORED AS AVRO;
+INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(1, 'A', 0.5, true),(2, 'B', 1.5, true),(3, 'C', 2.5, false);
====
---- DATASET
functional
---- BASE_TABLE_NAME
-iceberg_avro_mixed
----- CREATE
+iceberg_mixed_file_format
+---- CREATE_HIVE
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
int_col int,
string_col string,
double_col double,
bool_col boolean
)
-STORED AS ICEBERG
-TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
- 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog',
- 'iceberg.table_identifier'='ice.iceberg_avro_mixed',
- 'write.format.default'='avro');
----- DEPENDENT_LOAD
-`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
-hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_avro_mixed /test-warehouse/iceberg_test/hadoop_catalog/ice
+STORED BY ICEBERG
+TBLPROPERTIES('write.format.default'='avro');
+---- DEPENDENT_LOAD_HIVE
+-- This INSERT must run in Hive, because Impala doesn't support inserting into tables
+-- with avro and orc file formats.
+INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(1, 'avro', 0.5, true);
+ALTER TABLE {db_name}{db_suffix}.{table_name} SET TBLPROPERTIES('write.format.default'='orc');
+INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(2, 'orc', 1.5, false);
+ALTER TABLE {db_name}{db_suffix}.{table_name} SET TBLPROPERTIES('write.format.default'='parquet');
+INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(3, 'parquet', 2.5, false);
+
====
---- DATASET
functional
diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv
index 20213a95b..45d770e85 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -92,8 +92,8 @@ table_name:iceberg_v2_positional_not_all_data_files_have_delete_files_orc, const
table_name:iceberg_v2_partitioned_position_deletes, constraint:restrict_to, table_format:parquet/none/none
table_name:iceberg_v2_partitioned_position_deletes_orc, constraint:restrict_to, table_format:parquet/none/none
table_name:iceberg_multiple_storage_locations, constraint:restrict_to, table_format:parquet/none/none
-table_name:iceberg_avro_only, constraint:restrict_to, table_format:parquet/none/none
-table_name:iceberg_avro_mixed, constraint:restrict_to, table_format:parquet/none/none
+table_name:iceberg_avro_format, constraint:restrict_to, table_format:parquet/none/none
+table_name:iceberg_mixed_file_format, constraint:restrict_to, table_format:parquet/none/none
# TODO: Support Avro. Data loading currently fails for Avro because complex types
# cannot be converted to the corresponding Avro types yet.
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-avro.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-avro.test
index fde274b7f..f6182aeb6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-avro.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-avro.test
@@ -1,10 +1,10 @@
====
---- QUERY
-select * from functional_parquet.iceberg_avro_only;
+select * from functional_parquet.iceberg_avro_format;
---- RESULTS
-1,'A',0.5
-2,'B',1.5
-3,'C',2.5
+1,'A',0.5,true
+2,'B',1.5,true
+3,'C',2.5,false
---- TYPES
-INT, STRING, DOUBLE
+INT, STRING, DOUBLE, BOOLEAN
====
\ No newline at end of file
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test
index 03b19b031..352e064af 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test
@@ -1,11 +1,10 @@
====
---- QUERY
-select * from iceberg_mixed_file_format_test;
+select * from functional_parquet.iceberg_mixed_file_format;
---- RESULTS
-2,'B',1.5
-3,'C',2.5
-4,'D',3.5
-1,'A',0.5
+1,'avro',0.5,true
+3,'parquet',2.5,false
+2,'orc',1.5,false
---- TYPES
-INT, STRING, DOUBLE
+INT, STRING, DOUBLE, BOOLEAN
====
\ No newline at end of file
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test
index d55a05d41..2231c8e15 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test
@@ -662,8 +662,3 @@ select * from functional_parquet.iceberg_alltypes_part for system_time as of '20
---- CATCH
IllegalArgumentException: Cannot find a snapshot older than 2000-01-01 01:02:03
====
----- QUERY
-select * from functional_parquet.iceberg_avro_mixed;
----- CATCH
-ImpalaRuntimeException: Iceberg tables containing multiple file formats are only supported if they do not contain AVRO files.
-====
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 38b246637..1a8b8188a 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -787,8 +787,6 @@ class TestIcebergTable(IcebergTestSuite):
vector, unique_database)
def test_mixed_file_format(self, vector, unique_database):
- create_iceberg_table_from_directory(self.client, unique_database,
- "iceberg_mixed_file_format_test", "parquet")
self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
unique_database)