You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2021/11/08 10:11:26 UTC
[impala] 01/02: IMPALA-10974: Impala cannot resolve columns of
converted Iceberg table
This is an automated email from the ASF dual-hosted git repository.
boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit b02c003138388cb2546938682c53dbda19118fb8
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Tue Oct 19 14:41:03 2021 +0200
IMPALA-10974: Impala cannot resolve columns of converted Iceberg table
When a regular Parquet/ORC table is converted to Iceberg via Hive,
only the Iceberg metadata files need to be created. The data files
can stay in place.
This causes problems when the data files don't have field ids for
the schema elements. Currently Impala resolves columns in data
files based on Iceberg field ids, but since they are missing,
Impala raises an error or returns NULLs.
With this patch Impala falls back to the default column resolution
strategy when the data files lack field ids.
Testing:
* added e2e tests both for Parquet and ORC
Change-Id: I85881b09891c7bd101e7a96e92561b70bbe5af41
Reviewed-on: http://gerrit.cloudera.org:8080/17953
Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exec/orc-metadata-utils.cc | 14 +--
be/src/exec/parquet/parquet-metadata-utils.h | 16 ++--
testdata/data/README | 8 ++
.../hadoop_catalog/ice/airports_orc/000000_0 | Bin 0 -> 101946 bytes
.../1ebf435e-7da7-41e7-bebf-eb3ebf1b1002-m0.avro | Bin 0 -> 3034 bytes
...321-1-1ebf435e-7da7-41e7-bebf-eb3ebf1b1002.avro | Bin 0 -> 1874 bytes
.../ice/airports_orc/metadata/v1.metadata.json | 76 ++++++++++++++++
.../ice/airports_orc/metadata/v2.metadata.json | 99 +++++++++++++++++++++
.../ice/airports_orc/metadata/version-hint.txt | 1 +
.../hadoop_catalog/ice/airports_parquet/000000_0 | Bin 0 -> 186923 bytes
.../2d65964e-90ea-4442-bab5-71a67b84dfd9-m0.avro | Bin 0 -> 3236 bytes
...609-1-2d65964e-90ea-4442-bab5-71a67b84dfd9.avro | Bin 0 -> 1877 bytes
.../ice/airports_parquet/metadata/v1.metadata.json | 76 ++++++++++++++++
.../ice/airports_parquet/metadata/v2.metadata.json | 99 +++++++++++++++++++++
.../ice/airports_parquet/metadata/version-hint.txt | 1 +
.../functional/functional_schema_template.sql | 28 ++++++
.../datasets/functional/schema_constraints.csv | 2 +
.../QueryTest/iceberg-missing-field-ids.test | 21 +++++
tests/query_test/test_iceberg.py | 3 +
19 files changed, 433 insertions(+), 11 deletions(-)
diff --git a/be/src/exec/orc-metadata-utils.cc b/be/src/exec/orc-metadata-utils.cc
index 190acc9..976c125 100644
--- a/be/src/exec/orc-metadata-utils.cc
+++ b/be/src/exec/orc-metadata-utils.cc
@@ -26,6 +26,8 @@ using boost::algorithm::iequals;
namespace impala {
+static const std::string& ICEBERG_FIELD_ID = "iceberg.id";
+
inline int GetFieldIdFromStr(const std::string& str) {
try {
return std::stoi(str);
@@ -39,10 +41,13 @@ OrcSchemaResolver::OrcSchemaResolver(const HdfsTableDescriptor& tbl_desc,
tbl_desc_(tbl_desc), root_(root), filename_(filename),
is_table_full_acid_(is_table_acid) {
DetermineFullAcidSchema();
- if (tbl_desc_.IsIcebergTable()) {
- schema_resolution_strategy_ = TSchemaResolutionStrategy::FIELD_ID;
- } else {
- schema_resolution_strategy_ = TSchemaResolutionStrategy::POSITION;
+ schema_resolution_strategy_ = TSchemaResolutionStrategy::POSITION;
+ if (tbl_desc_.IsIcebergTable() && root_->getSubtypeCount() > 0) {
+ // Use FIELD_ID-based column resolution for Iceberg tables if possible.
+ const orc::Type* first_child = root_->getSubtype(0);
+ if (first_child->hasAttributeKey(ICEBERG_FIELD_ID)) {
+ schema_resolution_strategy_ = TSchemaResolutionStrategy::FIELD_ID;
+ }
}
}
@@ -208,7 +213,6 @@ Status OrcSchemaResolver::ResolveColumnByIcebergFieldId(const SchemaPath& col_pa
const orc::Type* OrcSchemaResolver::FindChildWithFieldId(const orc::Type* node,
const int field_id) const {
- const std::string& ICEBERG_FIELD_ID = "iceberg.id";
for (int i = 0; i < node->getSubtypeCount(); ++i) {
const orc::Type* child = node->getSubtype(i);
DCHECK(child != nullptr);
diff --git a/be/src/exec/parquet/parquet-metadata-utils.h b/be/src/exec/parquet/parquet-metadata-utils.h
index 9c84c3e..efdb05d 100644
--- a/be/src/exec/parquet/parquet-metadata-utils.h
+++ b/be/src/exec/parquet/parquet-metadata-utils.h
@@ -145,12 +145,7 @@ class ParquetSchemaResolver {
: tbl_desc_(tbl_desc),
fallback_schema_resolution_(fallback_schema_resolution),
array_resolution_(array_resolution),
- filename_(NULL) {
- // We set FIELD_ID for Iceberg tables.
- if (tbl_desc_.IsIcebergTable()) {
- fallback_schema_resolution_ = TSchemaResolutionStrategy::type::FIELD_ID;
- }
- }
+ filename_(NULL) {}
/// Parses the schema of the given file metadata into an internal schema
/// representation used in path resolution. Remembers the filename for error
@@ -158,6 +153,15 @@ class ParquetSchemaResolver {
Status Init(const parquet::FileMetaData* file_metadata, const char* filename) {
DCHECK(filename != NULL);
filename_ = filename;
+ // Use FIELD_ID-based column resolution for Iceberg tables if possible.
+ const auto& schema = file_metadata->schema;
+ if (tbl_desc_.IsIcebergTable() && schema.size() > 1) {
+ // schema[0] is the 'root', schema[1] is the first column.
+ const parquet::SchemaElement& first_column = schema[1];
+ if (first_column.__isset.field_id) {
+ fallback_schema_resolution_ = TSchemaResolutionStrategy::type::FIELD_ID;
+ }
+ }
return CreateSchemaTree(file_metadata->schema, &schema_);
}
diff --git a/testdata/data/README b/testdata/data/README
index db8bdd7..14e0078 100644
--- a/testdata/data/README
+++ b/testdata/data/README
@@ -661,3 +661,11 @@ binary_decimal_precision_and_scale_widening.parquet
Parquet file written with schema (decimal(9,2), decimal(18,2), decimal(38,2)). The rows
inside the file are carefully chosen so that they don't cause an overflow when being read
by an Impala table with a higher precision/scale.
+
+iceberg_test/hadoop_catalog/ice/airports_parquet:
+Regular Parquet table converted to Iceberg, which means that the data file doesn't contain
+field ids.
+
+iceberg_test/hadoop_catalog/ice/airports_orc:
+Regular ORC table converted to Iceberg, which means that the data file doesn't contain
+field ids.
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/000000_0 b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/000000_0
new file mode 100644
index 0000000..aec4ebe
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/000000_0 differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/1ebf435e-7da7-41e7-bebf-eb3ebf1b1002-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/1ebf435e-7da7-41e7-bebf-eb3ebf1b1002-m0.avro
new file mode 100644
index 0000000..9544de7
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/1ebf435e-7da7-41e7-bebf-eb3ebf1b1002-m0.avro differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/snap-4990977953383402321-1-1ebf435e-7da7-41e7-bebf-eb3ebf1b1002.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/snap-4990977953383402321-1-1ebf435e-7da7-41e7-bebf-eb3ebf1b1002.avro
new file mode 100644
index 0000000..9b96897
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/snap-4990977953383402321-1-1ebf435e-7da7-41e7-bebf-eb3ebf1b1002.avro differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v1.metadata.json
new file mode 100644
index 0000000..a7d9649
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v1.metadata.json
@@ -0,0 +1,76 @@
+{
+ "format-version" : 1,
+ "table-uuid" : "6f36d4ad-321a-4359-87a0-fd9e31a034a7",
+ "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_orc",
+ "last-updated-ms" : 1634575394783,
+ "last-column-id" : 7,
+ "schema" : {
+ "type" : "struct",
+ "fields" : [ {
+ "id" : 1,
+ "name" : "iata",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 2,
+ "name" : "airport",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 3,
+ "name" : "city",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 4,
+ "name" : "state",
+ "required" : false,
+ "type" : "double"
+ }, {
+ "id" : 5,
+ "name" : "country",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 6,
+ "name" : "lat",
+ "required" : false,
+ "type" : "double"
+ }, {
+ "id" : 7,
+ "name" : "lon",
+ "required" : false,
+ "type" : "double"
+ } ]
+ },
+ "partition-spec" : [ ],
+ "default-spec-id" : 0,
+ "partition-specs" : [ {
+ "spec-id" : 0,
+ "fields" : [ ]
+ } ],
+ "default-sort-order-id" : 0,
+ "sort-orders" : [ {
+ "order-id" : 0,
+ "fields" : [ ]
+ } ],
+ "properties" : {
+ "last_modified_time" : "1634575394",
+ "numRows" : "0",
+ "rawDataSize" : "0",
+ "gc.enabled" : "TRUE",
+ "bucketing_version" : "2",
+ "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+ "numFilesErasureCoded" : "0",
+ "engine.hive.enabled" : "true",
+ "totalSize" : "101946",
+ "EXTERNAL" : "TRUE",
+ "write.format.default" : "orc",
+ "numFiles" : "1",
+ "table_type" : "ICEBERG"
+ },
+ "current-snapshot-id" : -1,
+ "snapshots" : [ ],
+ "snapshot-log" : [ ],
+ "metadata-log" : [ ]
+}
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v2.metadata.json
new file mode 100644
index 0000000..2299c67
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v2.metadata.json
@@ -0,0 +1,99 @@
+{
+ "format-version" : 1,
+ "table-uuid" : "6f36d4ad-321a-4359-87a0-fd9e31a034a7",
+ "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_orc",
+ "last-updated-ms" : 1634575395550,
+ "last-column-id" : 7,
+ "schema" : {
+ "type" : "struct",
+ "fields" : [ {
+ "id" : 1,
+ "name" : "iata",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 2,
+ "name" : "airport",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 3,
+ "name" : "city",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 4,
+ "name" : "state",
+ "required" : false,
+ "type" : "double"
+ }, {
+ "id" : 5,
+ "name" : "country",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 6,
+ "name" : "lat",
+ "required" : false,
+ "type" : "double"
+ }, {
+ "id" : 7,
+ "name" : "lon",
+ "required" : false,
+ "type" : "double"
+ } ]
+ },
+ "partition-spec" : [ ],
+ "default-spec-id" : 0,
+ "partition-specs" : [ {
+ "spec-id" : 0,
+ "fields" : [ ]
+ } ],
+ "default-sort-order-id" : 0,
+ "sort-orders" : [ {
+ "order-id" : 0,
+ "fields" : [ ]
+ } ],
+ "properties" : {
+ "last_modified_time" : "1634575394",
+ "numRows" : "0",
+ "rawDataSize" : "0",
+ "gc.enabled" : "TRUE",
+ "bucketing_version" : "2",
+ "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+ "numFilesErasureCoded" : "0",
+ "engine.hive.enabled" : "true",
+ "totalSize" : "101946",
+ "EXTERNAL" : "TRUE",
+ "write.format.default" : "orc",
+ "numFiles" : "1",
+ "table_type" : "ICEBERG"
+ },
+ "current-snapshot-id" : 4990977953383402321,
+ "snapshots" : [ {
+ "snapshot-id" : 4990977953383402321,
+ "timestamp-ms" : 1634575395550,
+ "summary" : {
+ "operation" : "append",
+ "added-data-files" : "1",
+ "added-records" : "3376",
+ "added-files-size" : "101946",
+ "changed-partition-count" : "1",
+ "total-records" : "3376",
+ "total-files-size" : "101946",
+ "total-data-files" : "1",
+ "total-delete-files" : "0",
+ "total-position-deletes" : "0",
+ "total-equality-deletes" : "0"
+ },
+ "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/snap-4990977953383402321-1-1ebf435e-7da7-41e7-bebf-eb3ebf1b1002.avro"
+ } ],
+ "snapshot-log" : [ {
+ "timestamp-ms" : 1634575395550,
+ "snapshot-id" : 4990977953383402321
+ } ],
+ "metadata-log" : [ {
+ "timestamp-ms" : 1634575394783,
+ "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v1.metadata.json"
+ } ]
+}
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/version-hint.txt b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/version-hint.txt
new file mode 100644
index 0000000..0cfbf08
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/version-hint.txt
@@ -0,0 +1 @@
+2
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/000000_0 b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/000000_0
new file mode 100644
index 0000000..a52ae2f
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/000000_0 differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/2d65964e-90ea-4442-bab5-71a67b84dfd9-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/2d65964e-90ea-4442-bab5-71a67b84dfd9-m0.avro
new file mode 100644
index 0000000..89bd687
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/2d65964e-90ea-4442-bab5-71a67b84dfd9-m0.avro differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/snap-2304960110511088609-1-2d65964e-90ea-4442-bab5-71a67b84dfd9.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/snap-2304960110511088609-1-2d65964e-90ea-4442-bab5-71a67b84dfd9.avro
new file mode 100644
index 0000000..40dcc44
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/snap-2304960110511088609-1-2d65964e-90ea-4442-bab5-71a67b84dfd9.avro differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v1.metadata.json
new file mode 100644
index 0000000..9365ba9
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v1.metadata.json
@@ -0,0 +1,76 @@
+{
+ "format-version" : 1,
+ "table-uuid" : "f39041e7-f5f4-40df-a62a-3de425149db6",
+ "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_parquet",
+ "last-updated-ms" : 1634576002747,
+ "last-column-id" : 7,
+ "schema" : {
+ "type" : "struct",
+ "fields" : [ {
+ "id" : 1,
+ "name" : "iata",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 2,
+ "name" : "airport",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 3,
+ "name" : "city",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 4,
+ "name" : "state",
+ "required" : false,
+ "type" : "double"
+ }, {
+ "id" : 5,
+ "name" : "country",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 6,
+ "name" : "lat",
+ "required" : false,
+ "type" : "double"
+ }, {
+ "id" : 7,
+ "name" : "lon",
+ "required" : false,
+ "type" : "double"
+ } ]
+ },
+ "partition-spec" : [ ],
+ "default-spec-id" : 0,
+ "partition-specs" : [ {
+ "spec-id" : 0,
+ "fields" : [ ]
+ } ],
+ "default-sort-order-id" : 0,
+ "sort-orders" : [ {
+ "order-id" : 0,
+ "fields" : [ ]
+ } ],
+ "properties" : {
+ "last_modified_time" : "1634576002",
+ "numRows" : "0",
+ "rawDataSize" : "0",
+ "gc.enabled" : "TRUE",
+ "bucketing_version" : "2",
+ "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+ "numFilesErasureCoded" : "0",
+ "engine.hive.enabled" : "true",
+ "totalSize" : "186923",
+ "EXTERNAL" : "TRUE",
+ "write.format.default" : "parquet",
+ "numFiles" : "1",
+ "table_type" : "ICEBERG"
+ },
+ "current-snapshot-id" : -1,
+ "snapshots" : [ ],
+ "snapshot-log" : [ ],
+ "metadata-log" : [ ]
+}
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v2.metadata.json
new file mode 100644
index 0000000..0927e71
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v2.metadata.json
@@ -0,0 +1,99 @@
+{
+ "format-version" : 1,
+ "table-uuid" : "f39041e7-f5f4-40df-a62a-3de425149db6",
+ "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_parquet",
+ "last-updated-ms" : 1634576003865,
+ "last-column-id" : 7,
+ "schema" : {
+ "type" : "struct",
+ "fields" : [ {
+ "id" : 1,
+ "name" : "iata",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 2,
+ "name" : "airport",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 3,
+ "name" : "city",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 4,
+ "name" : "state",
+ "required" : false,
+ "type" : "double"
+ }, {
+ "id" : 5,
+ "name" : "country",
+ "required" : false,
+ "type" : "string"
+ }, {
+ "id" : 6,
+ "name" : "lat",
+ "required" : false,
+ "type" : "double"
+ }, {
+ "id" : 7,
+ "name" : "lon",
+ "required" : false,
+ "type" : "double"
+ } ]
+ },
+ "partition-spec" : [ ],
+ "default-spec-id" : 0,
+ "partition-specs" : [ {
+ "spec-id" : 0,
+ "fields" : [ ]
+ } ],
+ "default-sort-order-id" : 0,
+ "sort-orders" : [ {
+ "order-id" : 0,
+ "fields" : [ ]
+ } ],
+ "properties" : {
+ "last_modified_time" : "1634576002",
+ "numRows" : "0",
+ "rawDataSize" : "0",
+ "gc.enabled" : "TRUE",
+ "bucketing_version" : "2",
+ "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+ "numFilesErasureCoded" : "0",
+ "engine.hive.enabled" : "true",
+ "totalSize" : "186923",
+ "EXTERNAL" : "TRUE",
+ "write.format.default" : "parquet",
+ "numFiles" : "1",
+ "table_type" : "ICEBERG"
+ },
+ "current-snapshot-id" : 2304960110511088609,
+ "snapshots" : [ {
+ "snapshot-id" : 2304960110511088609,
+ "timestamp-ms" : 1634576003865,
+ "summary" : {
+ "operation" : "append",
+ "added-data-files" : "1",
+ "added-records" : "3376",
+ "added-files-size" : "186923",
+ "changed-partition-count" : "1",
+ "total-records" : "3376",
+ "total-files-size" : "186923",
+ "total-data-files" : "1",
+ "total-delete-files" : "0",
+ "total-position-deletes" : "0",
+ "total-equality-deletes" : "0"
+ },
+ "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/snap-2304960110511088609-1-2d65964e-90ea-4442-bab5-71a67b84dfd9.avro"
+ } ],
+ "snapshot-log" : [ {
+ "timestamp-ms" : 1634576003865,
+ "snapshot-id" : 2304960110511088609
+ } ],
+ "metadata-log" : [ {
+ "timestamp-ms" : 1634576002747,
+ "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/00000-56d53b2a-540d-4c67-8374-7c21be957845.metadata.json"
+ } ]
+}
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/version-hint.txt b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/version-hint.txt
new file mode 100644
index 0000000..0cfbf08
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/version-hint.txt
@@ -0,0 +1 @@
+2
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 52c16cc..21293ed 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3148,6 +3148,34 @@ hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/c
---- DATASET
functional
---- BASE_TABLE_NAME
+airports_orc
+---- CREATE
+CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
+STORED AS ICEBERG
+TBLPROPERTIES('write.format.default'='orc', 'iceberg.catalog'='hadoop.catalog',
+ 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog',
+ 'iceberg.table_identifier'='ice.airports_orc');
+---- DEPENDENT_LOAD
+`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
+hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc /test-warehouse/iceberg_test/hadoop_catalog/ice
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
+airports_parquet
+---- CREATE
+CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
+STORED AS ICEBERG
+TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.catalog',
+ 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog',
+ 'iceberg.table_identifier'='ice.airports_parquet');
+---- DEPENDENT_LOAD
+`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
+hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet /test-warehouse/iceberg_test/hadoop_catalog/ice
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
iceberg_resolution_test_external
---- CREATE
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv
index 87e5e61..29362a4 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -63,6 +63,8 @@ table_name:hudi_partitioned, constraint:restrict_to, table_format:parquet/none/n
table_name:hudi_non_partitioned, constraint:restrict_to, table_format:parquet/none/none
table_name:hudi_as_parquet, constraint:restrict_to, table_format:parquet/none/none
# Iceberg tests are executed in the PARQUET file format dimension
+table_name:airports_orc, constraint:restrict_to, table_format:parquet/none/none
+table_name:airports_parquet, constraint:restrict_to, table_format:parquet/none/none
table_name:complextypestbl_iceberg_orc, constraint:restrict_to, table_format:parquet/none/none
table_name:hadoop_catalog_test_external, constraint:restrict_to, table_format:parquet/none/none
table_name:iceberg_int_partitioned, constraint:restrict_to, table_format:parquet/none/none
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-missing-field-ids.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-missing-field-ids.test
new file mode 100644
index 0000000..5cb6a2f
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-missing-field-ids.test
@@ -0,0 +1,21 @@
+====
+---- QUERY
+select * from airports_parquet where country != 'USA';
+---- RESULTS
+'ROP','Prachinburi','NA',NULL,'Thailand',14.078333,101.378334
+'ROR','Babelthoup/Koror','NA',NULL,'Palau',7.367222,134.544167
+'SPN','Tinian International Airport','NA',NULL,'N Mariana Islands',14.996111,145.621384
+'YAP','Yap International','NA',NULL,'Federated States of Micronesia',9.5167,138.1
+---- TYPES
+STRING, STRING, STRING, DOUBLE, STRING, DOUBLE, DOUBLE
+====
+---- QUERY
+select * from airports_orc where country != 'USA';
+---- RESULTS
+'ROP','Prachinburi','NA',NULL,'Thailand',14.078333,101.378334
+'ROR','Babelthoup/Koror','NA',NULL,'Palau',7.367222,134.544167
+'SPN','Tinian International Airport','NA',NULL,'N Mariana Islands',14.996111,145.621384
+'YAP','Yap International','NA',NULL,'Federated States of Micronesia',9.5167,138.1
+---- TYPES
+STRING, STRING, STRING, DOUBLE, STRING, DOUBLE, DOUBLE
+====
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 2b61191..45f72d9 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -96,6 +96,9 @@ class TestIcebergTable(ImpalaTestSuite):
def test_catalogs(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-catalogs', vector, use_db=unique_database)
+ def test_missing_field_ids(self, vector):
+ self.run_test_case('QueryTest/iceberg-missing-field-ids', vector)
+
def test_describe_history(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-table-history', vector, use_db=unique_database)