You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2021/11/08 10:11:26 UTC

[impala] 01/02: IMPALA-10974: Impala cannot resolve columns of converted Iceberg table

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b02c003138388cb2546938682c53dbda19118fb8
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Tue Oct 19 14:41:03 2021 +0200

    IMPALA-10974: Impala cannot resolve columns of converted Iceberg table
    
    When a regular Parquet/ORC table is converted to Iceberg via Hive,
    only the Iceberg metadata files need to be created. The data files
    can stay in place.
    
    This causes problems when the data files don't have field ids for
    the schema elements. Currently Impala resolves columns in data
    files based on Iceberg field ids, but since they are missing,
    Impala raises an error or returns NULLs.
    
    With this patch Impala falls back to the default column resolution
    strategy when the data files lack field ids.
    
    Testing:
     * added e2e tests both for Parquet and ORC
    
    Change-Id: I85881b09891c7bd101e7a96e92561b70bbe5af41
    Reviewed-on: http://gerrit.cloudera.org:8080/17953
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/orc-metadata-utils.cc                  |  14 +--
 be/src/exec/parquet/parquet-metadata-utils.h       |  16 ++--
 testdata/data/README                               |   8 ++
 .../hadoop_catalog/ice/airports_orc/000000_0       | Bin 0 -> 101946 bytes
 .../1ebf435e-7da7-41e7-bebf-eb3ebf1b1002-m0.avro   | Bin 0 -> 3034 bytes
 ...321-1-1ebf435e-7da7-41e7-bebf-eb3ebf1b1002.avro | Bin 0 -> 1874 bytes
 .../ice/airports_orc/metadata/v1.metadata.json     |  76 ++++++++++++++++
 .../ice/airports_orc/metadata/v2.metadata.json     |  99 +++++++++++++++++++++
 .../ice/airports_orc/metadata/version-hint.txt     |   1 +
 .../hadoop_catalog/ice/airports_parquet/000000_0   | Bin 0 -> 186923 bytes
 .../2d65964e-90ea-4442-bab5-71a67b84dfd9-m0.avro   | Bin 0 -> 3236 bytes
 ...609-1-2d65964e-90ea-4442-bab5-71a67b84dfd9.avro | Bin 0 -> 1877 bytes
 .../ice/airports_parquet/metadata/v1.metadata.json |  76 ++++++++++++++++
 .../ice/airports_parquet/metadata/v2.metadata.json |  99 +++++++++++++++++++++
 .../ice/airports_parquet/metadata/version-hint.txt |   1 +
 .../functional/functional_schema_template.sql      |  28 ++++++
 .../datasets/functional/schema_constraints.csv     |   2 +
 .../QueryTest/iceberg-missing-field-ids.test       |  21 +++++
 tests/query_test/test_iceberg.py                   |   3 +
 19 files changed, 433 insertions(+), 11 deletions(-)

diff --git a/be/src/exec/orc-metadata-utils.cc b/be/src/exec/orc-metadata-utils.cc
index 190acc9..976c125 100644
--- a/be/src/exec/orc-metadata-utils.cc
+++ b/be/src/exec/orc-metadata-utils.cc
@@ -26,6 +26,8 @@ using boost::algorithm::iequals;
 
 namespace impala {
 
+static const std::string& ICEBERG_FIELD_ID = "iceberg.id";
+
 inline int GetFieldIdFromStr(const std::string& str) {
   try {
     return std::stoi(str);
@@ -39,10 +41,13 @@ OrcSchemaResolver::OrcSchemaResolver(const HdfsTableDescriptor& tbl_desc,
     tbl_desc_(tbl_desc), root_(root), filename_(filename),
     is_table_full_acid_(is_table_acid) {
   DetermineFullAcidSchema();
-  if (tbl_desc_.IsIcebergTable()) {
-    schema_resolution_strategy_ = TSchemaResolutionStrategy::FIELD_ID;
-  } else {
-    schema_resolution_strategy_ = TSchemaResolutionStrategy::POSITION;
+  schema_resolution_strategy_ = TSchemaResolutionStrategy::POSITION;
+  if (tbl_desc_.IsIcebergTable() && root_->getSubtypeCount() > 0) {
+    // Use FIELD_ID-based column resolution for Iceberg tables if possible.
+    const orc::Type* first_child =  root_->getSubtype(0);
+    if (first_child->hasAttributeKey(ICEBERG_FIELD_ID)) {
+      schema_resolution_strategy_ = TSchemaResolutionStrategy::FIELD_ID;
+    }
   }
 }
 
@@ -208,7 +213,6 @@ Status OrcSchemaResolver::ResolveColumnByIcebergFieldId(const SchemaPath& col_pa
 
 const orc::Type* OrcSchemaResolver::FindChildWithFieldId(const orc::Type* node,
     const int field_id) const {
-  const std::string& ICEBERG_FIELD_ID = "iceberg.id";
   for (int i = 0; i < node->getSubtypeCount(); ++i) {
     const orc::Type* child = node->getSubtype(i);
     DCHECK(child != nullptr);
diff --git a/be/src/exec/parquet/parquet-metadata-utils.h b/be/src/exec/parquet/parquet-metadata-utils.h
index 9c84c3e..efdb05d 100644
--- a/be/src/exec/parquet/parquet-metadata-utils.h
+++ b/be/src/exec/parquet/parquet-metadata-utils.h
@@ -145,12 +145,7 @@ class ParquetSchemaResolver {
     : tbl_desc_(tbl_desc),
       fallback_schema_resolution_(fallback_schema_resolution),
       array_resolution_(array_resolution),
-      filename_(NULL) {
-    // We set FIELD_ID for Iceberg tables.
-    if (tbl_desc_.IsIcebergTable()) {
-      fallback_schema_resolution_ = TSchemaResolutionStrategy::type::FIELD_ID;
-    }
-  }
+      filename_(NULL) {}
 
   /// Parses the schema of the given file metadata into an internal schema
   /// representation used in path resolution. Remembers the filename for error
@@ -158,6 +153,15 @@ class ParquetSchemaResolver {
   Status Init(const parquet::FileMetaData* file_metadata, const char* filename) {
     DCHECK(filename != NULL);
     filename_ = filename;
+    // Use FIELD_ID-based column resolution for Iceberg tables if possible.
+    const auto& schema = file_metadata->schema;
+    if (tbl_desc_.IsIcebergTable() && schema.size() > 1) {
+      // schema[0] is the 'root', schema[1] is the first column.
+      const parquet::SchemaElement& first_column = schema[1];
+      if (first_column.__isset.field_id) {
+        fallback_schema_resolution_ = TSchemaResolutionStrategy::type::FIELD_ID;
+      }
+    }
     return CreateSchemaTree(file_metadata->schema, &schema_);
   }
 
diff --git a/testdata/data/README b/testdata/data/README
index db8bdd7..14e0078 100644
--- a/testdata/data/README
+++ b/testdata/data/README
@@ -661,3 +661,11 @@ binary_decimal_precision_and_scale_widening.parquet
 Parquet file written with schema (decimal(9,2), decimal(18,2), decimal(38,2)). The rows
 inside the file are carefully chosen so that they don't cause an overflow when being read
 by an Impala table with a higher precision/scale.
+
+iceberg_test/hadoop_catalog/ice/airports_parquet:
+Regular Parquet table converted to Iceberg, which means that the data file doesn't contain
+field ids.
+
+iceberg_test/hadoop_catalog/ice/airports_orc:
+Regular ORC table converted to Iceberg, which means that the data file doesn't contain
+field ids.
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/000000_0 b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/000000_0
new file mode 100644
index 0000000..aec4ebe
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/000000_0 differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/1ebf435e-7da7-41e7-bebf-eb3ebf1b1002-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/1ebf435e-7da7-41e7-bebf-eb3ebf1b1002-m0.avro
new file mode 100644
index 0000000..9544de7
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/1ebf435e-7da7-41e7-bebf-eb3ebf1b1002-m0.avro differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/snap-4990977953383402321-1-1ebf435e-7da7-41e7-bebf-eb3ebf1b1002.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/snap-4990977953383402321-1-1ebf435e-7da7-41e7-bebf-eb3ebf1b1002.avro
new file mode 100644
index 0000000..9b96897
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/snap-4990977953383402321-1-1ebf435e-7da7-41e7-bebf-eb3ebf1b1002.avro differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v1.metadata.json
new file mode 100644
index 0000000..a7d9649
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v1.metadata.json
@@ -0,0 +1,76 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "6f36d4ad-321a-4359-87a0-fd9e31a034a7",
+  "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_orc",
+  "last-updated-ms" : 1634575394783,
+  "last-column-id" : 7,
+  "schema" : {
+    "type" : "struct",
+    "fields" : [ {
+      "id" : 1,
+      "name" : "iata",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 2,
+      "name" : "airport",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "city",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 4,
+      "name" : "state",
+      "required" : false,
+      "type" : "double"
+    }, {
+      "id" : 5,
+      "name" : "country",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 6,
+      "name" : "lat",
+      "required" : false,
+      "type" : "double"
+    }, {
+      "id" : 7,
+      "name" : "lon",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "last_modified_time" : "1634575394",
+    "numRows" : "0",
+    "rawDataSize" : "0",
+    "gc.enabled" : "TRUE",
+    "bucketing_version" : "2",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "numFilesErasureCoded" : "0",
+    "engine.hive.enabled" : "true",
+    "totalSize" : "101946",
+    "EXTERNAL" : "TRUE",
+    "write.format.default" : "orc",
+    "numFiles" : "1",
+    "table_type" : "ICEBERG"
+  },
+  "current-snapshot-id" : -1,
+  "snapshots" : [ ],
+  "snapshot-log" : [ ],
+  "metadata-log" : [ ]
+}
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v2.metadata.json
new file mode 100644
index 0000000..2299c67
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v2.metadata.json
@@ -0,0 +1,99 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "6f36d4ad-321a-4359-87a0-fd9e31a034a7",
+  "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_orc",
+  "last-updated-ms" : 1634575395550,
+  "last-column-id" : 7,
+  "schema" : {
+    "type" : "struct",
+    "fields" : [ {
+      "id" : 1,
+      "name" : "iata",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 2,
+      "name" : "airport",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "city",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 4,
+      "name" : "state",
+      "required" : false,
+      "type" : "double"
+    }, {
+      "id" : 5,
+      "name" : "country",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 6,
+      "name" : "lat",
+      "required" : false,
+      "type" : "double"
+    }, {
+      "id" : 7,
+      "name" : "lon",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "last_modified_time" : "1634575394",
+    "numRows" : "0",
+    "rawDataSize" : "0",
+    "gc.enabled" : "TRUE",
+    "bucketing_version" : "2",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "numFilesErasureCoded" : "0",
+    "engine.hive.enabled" : "true",
+    "totalSize" : "101946",
+    "EXTERNAL" : "TRUE",
+    "write.format.default" : "orc",
+    "numFiles" : "1",
+    "table_type" : "ICEBERG"
+  },
+  "current-snapshot-id" : 4990977953383402321,
+  "snapshots" : [ {
+    "snapshot-id" : 4990977953383402321,
+    "timestamp-ms" : 1634575395550,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "3376",
+      "added-files-size" : "101946",
+      "changed-partition-count" : "1",
+      "total-records" : "3376",
+      "total-files-size" : "101946",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/snap-4990977953383402321-1-1ebf435e-7da7-41e7-bebf-eb3ebf1b1002.avro"
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1634575395550,
+    "snapshot-id" : 4990977953383402321
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1634575394783,
+    "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/v1.metadata.json"
+  } ]
+}
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/version-hint.txt b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/version-hint.txt
new file mode 100644
index 0000000..0cfbf08
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc/metadata/version-hint.txt
@@ -0,0 +1 @@
+2
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/000000_0 b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/000000_0
new file mode 100644
index 0000000..a52ae2f
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/000000_0 differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/2d65964e-90ea-4442-bab5-71a67b84dfd9-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/2d65964e-90ea-4442-bab5-71a67b84dfd9-m0.avro
new file mode 100644
index 0000000..89bd687
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/2d65964e-90ea-4442-bab5-71a67b84dfd9-m0.avro differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/snap-2304960110511088609-1-2d65964e-90ea-4442-bab5-71a67b84dfd9.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/snap-2304960110511088609-1-2d65964e-90ea-4442-bab5-71a67b84dfd9.avro
new file mode 100644
index 0000000..40dcc44
Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/snap-2304960110511088609-1-2d65964e-90ea-4442-bab5-71a67b84dfd9.avro differ
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v1.metadata.json
new file mode 100644
index 0000000..9365ba9
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v1.metadata.json
@@ -0,0 +1,76 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "f39041e7-f5f4-40df-a62a-3de425149db6",
+  "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_parquet",
+  "last-updated-ms" : 1634576002747,
+  "last-column-id" : 7,
+  "schema" : {
+    "type" : "struct",
+    "fields" : [ {
+      "id" : 1,
+      "name" : "iata",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 2,
+      "name" : "airport",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "city",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 4,
+      "name" : "state",
+      "required" : false,
+      "type" : "double"
+    }, {
+      "id" : 5,
+      "name" : "country",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 6,
+      "name" : "lat",
+      "required" : false,
+      "type" : "double"
+    }, {
+      "id" : 7,
+      "name" : "lon",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "last_modified_time" : "1634576002",
+    "numRows" : "0",
+    "rawDataSize" : "0",
+    "gc.enabled" : "TRUE",
+    "bucketing_version" : "2",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "numFilesErasureCoded" : "0",
+    "engine.hive.enabled" : "true",
+    "totalSize" : "186923",
+    "EXTERNAL" : "TRUE",
+    "write.format.default" : "parquet",
+    "numFiles" : "1",
+    "table_type" : "ICEBERG"
+  },
+  "current-snapshot-id" : -1,
+  "snapshots" : [ ],
+  "snapshot-log" : [ ],
+  "metadata-log" : [ ]
+}
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v2.metadata.json
new file mode 100644
index 0000000..0927e71
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/v2.metadata.json
@@ -0,0 +1,99 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "f39041e7-f5f4-40df-a62a-3de425149db6",
+  "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_parquet",
+  "last-updated-ms" : 1634576003865,
+  "last-column-id" : 7,
+  "schema" : {
+    "type" : "struct",
+    "fields" : [ {
+      "id" : 1,
+      "name" : "iata",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 2,
+      "name" : "airport",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "city",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 4,
+      "name" : "state",
+      "required" : false,
+      "type" : "double"
+    }, {
+      "id" : 5,
+      "name" : "country",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 6,
+      "name" : "lat",
+      "required" : false,
+      "type" : "double"
+    }, {
+      "id" : 7,
+      "name" : "lon",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "last_modified_time" : "1634576002",
+    "numRows" : "0",
+    "rawDataSize" : "0",
+    "gc.enabled" : "TRUE",
+    "bucketing_version" : "2",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "numFilesErasureCoded" : "0",
+    "engine.hive.enabled" : "true",
+    "totalSize" : "186923",
+    "EXTERNAL" : "TRUE",
+    "write.format.default" : "parquet",
+    "numFiles" : "1",
+    "table_type" : "ICEBERG"
+  },
+  "current-snapshot-id" : 2304960110511088609,
+  "snapshots" : [ {
+    "snapshot-id" : 2304960110511088609,
+    "timestamp-ms" : 1634576003865,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "3376",
+      "added-files-size" : "186923",
+      "changed-partition-count" : "1",
+      "total-records" : "3376",
+      "total-files-size" : "186923",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/snap-2304960110511088609-1-2d65964e-90ea-4442-bab5-71a67b84dfd9.avro"
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1634576003865,
+    "snapshot-id" : 2304960110511088609
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1634576002747,
+    "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/00000-56d53b2a-540d-4c67-8374-7c21be957845.metadata.json"
+  } ]
+}
diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/version-hint.txt b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/version-hint.txt
new file mode 100644
index 0000000..0cfbf08
--- /dev/null
+++ b/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet/metadata/version-hint.txt
@@ -0,0 +1 @@
+2
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 52c16cc..21293ed 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3148,6 +3148,34 @@ hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/c
 ---- DATASET
 functional
 ---- BASE_TABLE_NAME
+airports_orc
+---- CREATE
+CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
+STORED AS ICEBERG
+TBLPROPERTIES('write.format.default'='orc', 'iceberg.catalog'='hadoop.catalog',
+              'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog',
+              'iceberg.table_identifier'='ice.airports_orc');
+---- DEPENDENT_LOAD
+`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
+hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/airports_orc /test-warehouse/iceberg_test/hadoop_catalog/ice
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
+airports_parquet
+---- CREATE
+CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
+STORED AS ICEBERG
+TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.catalog',
+              'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog',
+              'iceberg.table_identifier'='ice.airports_parquet');
+---- DEPENDENT_LOAD
+`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
+hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/airports_parquet /test-warehouse/iceberg_test/hadoop_catalog/ice
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
 iceberg_resolution_test_external
 ---- CREATE
 CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv
index 87e5e61..29362a4 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -63,6 +63,8 @@ table_name:hudi_partitioned, constraint:restrict_to, table_format:parquet/none/n
 table_name:hudi_non_partitioned, constraint:restrict_to, table_format:parquet/none/none
 table_name:hudi_as_parquet, constraint:restrict_to, table_format:parquet/none/none
 # Iceberg tests are executed in the PARQUET file format dimension
+table_name:airports_orc, constraint:restrict_to, table_format:parquet/none/none
+table_name:airports_parquet, constraint:restrict_to, table_format:parquet/none/none
 table_name:complextypestbl_iceberg_orc, constraint:restrict_to, table_format:parquet/none/none
 table_name:hadoop_catalog_test_external, constraint:restrict_to, table_format:parquet/none/none
 table_name:iceberg_int_partitioned, constraint:restrict_to, table_format:parquet/none/none
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-missing-field-ids.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-missing-field-ids.test
new file mode 100644
index 0000000..5cb6a2f
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-missing-field-ids.test
@@ -0,0 +1,21 @@
+====
+---- QUERY
+select * from airports_parquet where country != 'USA';
+---- RESULTS
+'ROP','Prachinburi','NA',NULL,'Thailand',14.078333,101.378334
+'ROR','Babelthoup/Koror','NA',NULL,'Palau',7.367222,134.544167
+'SPN','Tinian International Airport','NA',NULL,'N Mariana Islands',14.996111,145.621384
+'YAP','Yap International','NA',NULL,'Federated States of Micronesia',9.5167,138.1
+---- TYPES
+STRING, STRING, STRING, DOUBLE, STRING, DOUBLE, DOUBLE
+====
+---- QUERY
+select * from airports_orc where country != 'USA';
+---- RESULTS
+'ROP','Prachinburi','NA',NULL,'Thailand',14.078333,101.378334
+'ROR','Babelthoup/Koror','NA',NULL,'Palau',7.367222,134.544167
+'SPN','Tinian International Airport','NA',NULL,'N Mariana Islands',14.996111,145.621384
+'YAP','Yap International','NA',NULL,'Federated States of Micronesia',9.5167,138.1
+---- TYPES
+STRING, STRING, STRING, DOUBLE, STRING, DOUBLE, DOUBLE
+====
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 2b61191..45f72d9 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -96,6 +96,9 @@ class TestIcebergTable(ImpalaTestSuite):
   def test_catalogs(self, vector, unique_database):
     self.run_test_case('QueryTest/iceberg-catalogs', vector, use_db=unique_database)
 
+  def test_missing_field_ids(self, vector):
+    self.run_test_case('QueryTest/iceberg-missing-field-ids', vector)
+
   def test_describe_history(self, vector, unique_database):
     self.run_test_case('QueryTest/iceberg-table-history', vector, use_db=unique_database)