You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by la...@apache.org on 2024/01/14 12:40:53 UTC
(impala) 01/02: IMPALA-12706: Fix nested struct querying for Iceberg metadata tables
This is an automated email from the ASF dual-hosted git repository.
laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 74617537b5c805327349ef0ac5c79b84dc1e786d
Author: Tamas Mate <tm...@apache.org>
AuthorDate: Thu Jan 11 14:26:54 2024 +0100
IMPALA-12706: Fix nested struct querying for Iceberg metadata tables
This commit fixes a DCHECK failure when querying a struct inside a
struct. The previous field accessor creation logic was trying to find
the ColumnDescriptor for a struct inside a struct and hit a DCHECK
because there are no ColumnDescriptors for struct fields. The logic
has been reworked to only use ColumnDescriptors for top level columns.
Testing:
- Added E2E test to cover this case
Change-Id: Iadd029a4edc500bd8d8fca3f958903c2dbe09e8e
Reviewed-on: http://gerrit.cloudera.org:8080/20883
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../iceberg-metadata/iceberg-metadata-scan-node.cc | 33 ++++++++++------------
.../iceberg-metadata/iceberg-metadata-scan-node.h | 8 +++---
.../queries/QueryTest/iceberg-metadata-tables.test | 10 +++++++
3 files changed, 29 insertions(+), 22 deletions(-)
diff --git a/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.cc b/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.cc
index d779992fb..5210b7cd1 100644
--- a/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.cc
+++ b/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.cc
@@ -91,30 +91,27 @@ Status IcebergMetadataScanNode::CreateFieldAccessors() {
JNIEnv* env = JniUtil::GetJNIEnv();
if (env == nullptr) return Status("Failed to get/create JVM");
for (SlotDescriptor* slot_desc: tuple_desc_->slots()) {
- if (slot_desc->type().IsStructType()) {
- // Get the top level struct's field id from the ColumnDescriptor then recursively
- // get the field ids for struct fields
- int field_id = tuple_desc_->table_desc()->GetColumnDesc(slot_desc).field_id();
- RETURN_IF_ERROR(AddAccessorForFieldId(env, field_id, slot_desc->id()));
- RETURN_IF_ERROR(CreateFieldAccessors(env, slot_desc));
- } else if (slot_desc->col_path().size() > 1) {
- DCHECK(!slot_desc->type().IsComplexType());
- // Slot that is child of a struct without tuple, can occur when a struct member is
- // in the select list. ColumnType has a tree structure, and this loop finds the
- // STRUCT node that stores the primitive type. Because, that struct node has the
- // field id list of its childs.
+ int field_id = -1;
+ if (slot_desc->col_path().size() == 1) {
+ // Top level slots have ColumnDescriptors that store the field ids.
+ field_id = tuple_desc_->table_desc()->GetColumnDesc(slot_desc).field_id();
+ } else {
+ // Non top level slots are fields of a nested type. This code path is to handle
+ // slots that does not have their nested type's tuple available.
+ // This loop finds the struct ColumnType node that stores the slot as it has the
+ // field id list of its children.
int root_type_index = slot_desc->col_path()[0];
ColumnType* current_type = &const_cast<ColumnType&>(
tuple_desc_->table_desc()->col_descs()[root_type_index].type());
for (int i = 1; i < slot_desc->col_path().size() - 1; ++i) {
current_type = ¤t_type->children[slot_desc->col_path()[i]];
}
- int field_id = current_type->field_ids[slot_desc->col_path().back()];
- RETURN_IF_ERROR(AddAccessorForFieldId(env, field_id, slot_desc->id()));
- } else {
- // For primitives in the top level tuple, use the ColumnDescriptor
- int field_id = tuple_desc_->table_desc()->GetColumnDesc(slot_desc).field_id();
- RETURN_IF_ERROR(AddAccessorForFieldId(env, field_id, slot_desc->id()));
+ field_id = current_type->field_ids[slot_desc->col_path().back()];
+ }
+ DCHECK_NE(field_id, -1);
+ RETURN_IF_ERROR(AddAccessorForFieldId(env, field_id, slot_desc->id()));
+ if (slot_desc->type().IsStructType()) {
+ RETURN_IF_ERROR(CreateFieldAccessors(env, slot_desc));
}
}
return Status::OK();
diff --git a/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.h b/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.h
index 64f1d3aa5..ae15efbd4 100644
--- a/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.h
+++ b/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.h
@@ -121,10 +121,10 @@ class IcebergMetadataScanNode : public ScanNode {
Status GetCatalogTable(jobject* jtable);
/// Populates the jaccessors_ map by creating the accessors for the columns in the JVM.
- /// To create a field accessor for a column the Iceberg field id is needed. For
- /// primitive type columns that are not a field of a struct, this can be found in the
- /// ColumnDescriptor. However, ColumnDescriptors are not available for struct fields,
- /// in this case the SlotDescriptor can be used.
+ /// To create a field accessor for a column the Iceberg field id is needed. For columns
+ /// that are not a field of a struct, this can be found in the ColumnDescriptor.
+ /// However, ColumnDescriptors are not available for struct fields, in this case the
+ /// ColumnType of the SlotDescriptor can be used.
Status CreateFieldAccessors();
/// Recursive part of the Accessor collection, when there is a struct in the tuple.
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
index 0f58dae99..a559e13b6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
@@ -470,6 +470,16 @@ select readable_metrics from functional_parquet.iceberg_query_metadata.entries;
STRING
====
---- QUERY
+select readable_metrics.i from functional_parquet.iceberg_query_metadata.entries;
+---- RESULTS
+'{"column_size":47,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":3,"upper_bound":3}'
+'{"column_size":47,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":2,"upper_bound":2}'
+'{"column_size":47,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":1}'
+'{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bound":null}'
+---- TYPES
+STRING
+====
+---- QUERY
select snapshot_id, readable_metrics from functional_parquet.iceberg_query_metadata.entries;
---- RESULTS
row_regex:[1-9]\d*|0,'{"i":{"column_size":47,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":3,"upper_bound":3}}'