You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/08/21 22:29:19 UTC

[impala] branch master updated: IMPALA-12342: Erasure coding build fails on loading iceberg_lineitem_multiblock

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new a34f7ce63 IMPALA-12342: Erasure coding build fails on loading iceberg_lineitem_multiblock
a34f7ce63 is described below

commit a34f7ce63299c72ef45a99b01bb4e80210befbff
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Mon Aug 14 16:09:48 2023 +0200

    IMPALA-12342: Erasure coding build fails on loading iceberg_lineitem_multiblock
    
    Previous to this patch we tried to load table
    iceberg_lineitem_multiblock with HDFS block size 524288. This failed
    in builds that use HDFS erasure coding which requires block size at
    least 1048576.
    
    This patch increases the block size to 1048576. This also triggers
    the bug that was fixed by IMPALA-12327. But to have more tests with
    multiblock tables this patch also adds table iceberg_lineitem_sixblocks
    and few tests with different MT_DOP settings.
    
    Testing:
     * tested in build with HDFS EC
    
    Change-Id: Iad15a335407c12578eb822bb1cb4450647502e50
    Reviewed-on: http://gerrit.cloudera.org:8080/20359
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../functional/functional_schema_template.sql      | 18 +++++++++++-
 .../datasets/functional/schema_constraints.csv     |  1 +
 .../iceberg-v2-read-position-deletes.test          | 32 ++++++++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 7a1e255a2..7befa6eb2 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3708,7 +3708,23 @@ TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
               'format-version'='2');
 ---- DEPENDENT_LOAD
 `hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
-hadoop fs -Ddfs.block.size=524288 -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_lineitem_multiblock /test-warehouse/iceberg_test/hadoop_catalog/ice
+hadoop fs -Ddfs.block.size=1048576 -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_lineitem_multiblock /test-warehouse/iceberg_test/hadoop_catalog/ice
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
+iceberg_lineitem_sixblocks
+---- CREATE
+CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
+LIKE PARQUET '/test-warehouse/lineitem_sixblocks_iceberg/lineitem_sixblocks.parquet'
+STORED AS PARQUET
+LOCATION '/test-warehouse/lineitem_sixblocks_iceberg/';
+ALTER TABLE {db_name}{db_suffix}.{table_name} CONVERT TO ICEBERG;
+ALTER TABLE {db_name}{db_suffix}.{table_name} SET TBLPROPERTIES ('format-version'='2');
+DELETE FROM {db_name}{db_suffix}.{table_name} WHERE l_returnflag='N';
+---- LOAD
+`hadoop fs -mkdir -p ${FILESYSTEM_PREFIX}/test-warehouse/lineitem_sixblocks_iceberg && \
+hadoop fs -Ddfs.block.size=1048576 -put -f ${IMPALA_HOME}/testdata/LineItemMultiBlock/lineitem_sixblocks.parquet /test-warehouse/lineitem_sixblocks_iceberg
 ====
 ---- DATASET
 functional
diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv
index 8e8ae9fa7..3967867bf 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -97,6 +97,7 @@ table_name:iceberg_multiple_storage_locations, constraint:restrict_to, table_for
 table_name:iceberg_avro_format, constraint:restrict_to, table_format:parquet/none/none
 table_name:iceberg_mixed_file_format, constraint:restrict_to, table_format:parquet/none/none
 table_name:iceberg_lineitem_multiblock, constraint:restrict_to, table_format:parquet/none/none
+table_name:iceberg_lineitem_sixblocks, constraint:restrict_to, table_format:parquet/none/none
 
 # TODO: Support Avro. Data loading currently fails for Avro because complex types
 # cannot be converted to the corresponding Avro types yet.
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test
index 4742defee..6af973fdb 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test
@@ -687,6 +687,38 @@ select count(*) from iceberg_lineitem_multiblock where l_linenumber%5=0;
 bigint
 ====
 ---- QUERY
+SET MT_DOP=0;
+select count(*) from iceberg_lineitem_sixblocks;
+---- RESULTS
+19836
+---- TYPES
+bigint
+====
+---- QUERY
+SET MT_DOP=2;
+select count(*) from iceberg_lineitem_sixblocks;
+---- RESULTS
+19836
+---- TYPES
+bigint
+====
+---- QUERY
+SET MT_DOP=0;
+select count(*) from iceberg_lineitem_sixblocks where l_returnflag='N';
+---- RESULTS
+0
+---- TYPES
+bigint
+====
+---- QUERY
+SET MT_DOP=2;
+select count(*) from iceberg_lineitem_sixblocks where l_returnflag='N';
+---- RESULTS
+0
+---- TYPES
+bigint
+====
+---- QUERY
 SELECT * from iceberg_v2_partitioned_position_deletes;
 ---- RESULTS
 6,'Alex','view',2020-01-01 09:00:00