You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by ar...@apache.org on 2021/10/20 04:43:56 UTC

[impala] branch master updated (dc313b3 -> 7bf3996)

This is an automated email from the ASF dual-hosted git repository.

arawat pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from dc313b3  IMPALA-10921 Add script to compare TPCDS runs.
     new 3e75a17  IMPALA-10957: test_iceberg_query is flaky
     new dc8f375  IMPALA-10959: Reload MV as ACID tables
     new cae3614  IMPALA-10958: Decouple getConstraintsInformation from hive.ql.metadata.Table
     new 7bf3996  IMPALA-10973: Do not schedule empty scan nodes to coordinator

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/scheduling/scheduler.cc                     |  28 +++---
 be/src/scheduling/scheduler.h                      |   3 +-
 .../impala/compat/HiveMetadataFormatUtils.java     |  12 +--
 .../org/apache/impala/compat/MetastoreShim.java    |  13 +--
 .../impala/catalog/events/MetastoreEvents.java     |  36 +++++--
 .../impala/service/DescribeResultFactory.java      |  12 +--
 .../queries/QueryTest/iceberg-old-fileformat.test  |  57 +++++++++++
 .../queries/QueryTest/iceberg-query.test           | 111 +++++----------------
 tests/query_test/test_scanners.py                  |   3 +
 9 files changed, 144 insertions(+), 131 deletions(-)
 create mode 100644 testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test

[impala] 01/04: IMPALA-10957: test_iceberg_query is flaky

Posted by ar...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

arawat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3e75a177302924c15e496b0611cd05ba61d74fb9
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Fri Oct 15 16:45:09 2021 +0200

    IMPALA-10957: test_iceberg_query is flaky
    
    In iceberg-query.test we create an external Iceberg table and
    set the table property 'iceberg.file_format' to check
    backward-compatibility with earlier versions. At the end we
    delete the table. The table deletion makes the test fail
    sporadically during GVO.
    
    Seems like the bug is caused by the parallel execution of this test.
    The test didn't use a unique database, therefore dropping the table
    could affect other executions of the same test. This patch puts
    the relevant queries to their own .test file using a unique
    database.
    
    Change-Id: I16e558ae5add48d8a39bd89277a0256f534ba65f
    Reviewed-on: http://gerrit.cloudera.org:8080/17929
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../queries/QueryTest/iceberg-old-fileformat.test  |  57 +++++++++++
 .../queries/QueryTest/iceberg-query.test           | 111 +++++----------------
 tests/query_test/test_scanners.py                  |   3 +
 3 files changed, 86 insertions(+), 85 deletions(-)

diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test
new file mode 100644
index 0000000..72219bf
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test
@@ -0,0 +1,57 @@
+====
+---- QUERY
+CREATE EXTERNAL TABLE IF NOT EXISTS iceberg_partitioned_orc_external_old_fileformat
+STORED AS ICEBERG
+TBLPROPERTIES(
+  'iceberg.file_format'='orc',
+  'iceberg.catalog'='hadoop.catalog',
+  'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc',
+  'iceberg.table_identifier'='functional_parquet.iceberg_partitioned_orc'
+);
+ALTER TABLE iceberg_partitioned_orc_external_old_fileformat
+UNSET TBLPROPERTIES IF EXISTS ('write.format.default');
+DESCRIBE FORMATTED iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS: VERIFY_IS_SUBSET
+'Location:           ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
+'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
+'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
+'','iceberg.file_format','orc                 '
+'','iceberg.catalog     ','hadoop.catalog      '
+---- RESULTS: VERIFY_IS_NOT_IN
+'','write.format.default','orc                 '
+---- TYPES
+string, string, string
+====
+---- QUERY
+SELECT * FROM iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS
+7,'Lisa','download'
+16,'Lisa','download'
+13,'Alan','click'
+10,'Alan','click'
+19,'Alex','view'
+1,'Alex','view'
+4,'Alex','view'
+20,'Alex','view'
+14,'Lisa','download'
+5,'Lisa','download'
+15,'Alex','view'
+18,'Alan','click'
+9,'Alan','click'
+17,'Alex','view'
+12,'Alan','click'
+2,'Lisa','download'
+8,'Lisa','download'
+11,'Alex','view'
+6,'Alex','view'
+3,'Alan','click'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+SELECT count(*) FROM iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS
+20
+---- TYPES
+BIGINT
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
index 85645d4..4d2b590 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
@@ -335,32 +335,32 @@ bigint,bigint,bigint,string,string
 SHOW FILES in iceberg_partitioned_orc_external
 ---- LABELS
 Path,Size,Partition
----- RESULTS
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00004-4-0982a5d3-48c0-4dd0-ab87-d24190894251-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00014-14-dc56d2c8-e285-428d-b81e-f3d07ec53c12-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00015-15-d4a071f4-5d57-493e-8d21-8f82655087e4-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00019-19-e9fa9638-8693-43dc-b1ba-2d8dc390b585-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00020-20-4526ff61-8991-4645-b448-bf3535492111-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00028-28-439376ff-3d17-4494-94ba-586fa00caadd-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00003-3-fb7663c5-aade-4690-bdfc-2334d605a891-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00007-7-fc53a761-1ee2-4ce4-9507-c9917917d2c9-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00011-11-8887c762-b6a8-4e2c-b1e0-6069d6141917-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00012-12-a85e02aa-ff28-4311-b996-a83f67a5e39b-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00022-22-dea5a8ec-ea06-4688-8293-486a62563ef3-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00025-25-d182fe7e-de19-4df8-ac19-27bbfe17b5bd-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00001-1-d5df20b1-6ce6-486a-b084-ea55b7954431-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00006-6-b30cc130-03a1-45e5-b927-d80545593f9a-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00009-9-6476cc9e-9b5a-48d3-8cce-ea7ebb2d904f-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00017-17-c2ab6dc5-3586-4b45-8a3c-860fdc71980d-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00023-23-fd03c61f-8c73-4639-8974-1f6431a30bc5-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00027-27-9baec6d2-8e31-4d19-8259-1a9e546aaea5-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00030-30-433b1afa-9fee-4581-89fb-105483ebf1a2-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00031-31-e0156a2d-4240-4c8e-9724-6ad3231cf178-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/02fb8198-e791-4d89-8afa-c75fb5443346-m0.avro',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/snap-3506237933060603670-1-02fb8198-e791-4d89-8afa-c75fb5443346.avro',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v1.metadata.json',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v2.metadata.json',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/version-hint.text',regex:.*,''
+---- RESULTS: VERIFY_IS_SUBSET
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00004-4-0982a5d3-48c0-4dd0-ab87-d24190894251-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00014-14-dc56d2c8-e285-428d-b81e-f3d07ec53c12-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00015-15-d4a071f4-5d57-493e-8d21-8f82655087e4-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00019-19-e9fa9638-8693-43dc-b1ba-2d8dc390b585-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00020-20-4526ff61-8991-4645-b448-bf3535492111-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00028-28-439376ff-3d17-4494-94ba-586fa00caadd-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00003-3-fb7663c5-aade-4690-bdfc-2334d605a891-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00007-7-fc53a761-1ee2-4ce4-9507-c9917917d2c9-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00011-11-8887c762-b6a8-4e2c-b1e0-6069d6141917-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00012-12-a85e02aa-ff28-4311-b996-a83f67a5e39b-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00022-22-dea5a8ec-ea06-4688-8293-486a62563ef3-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00025-25-d182fe7e-de19-4df8-ac19-27bbfe17b5bd-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00001-1-d5df20b1-6ce6-486a-b084-ea55b7954431-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00006-6-b30cc130-03a1-45e5-b927-d80545593f9a-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00009-9-6476cc9e-9b5a-48d3-8cce-ea7ebb2d904f-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00017-17-c2ab6dc5-3586-4b45-8a3c-860fdc71980d-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00023-23-fd03c61f-8c73-4639-8974-1f6431a30bc5-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00027-27-9baec6d2-8e31-4d19-8259-1a9e546aaea5-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00030-30-433b1afa-9fee-4581-89fb-105483ebf1a2-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00031-31-e0156a2d-4240-4c8e-9724-6ad3231cf178-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/02fb8198-e791-4d89-8afa-c75fb5443346-m0.avro'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/snap-3506237933060603670-1-02fb8198-e791-4d89-8afa-c75fb5443346.avro'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v1.metadata.json',.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v2.metadata.json',.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/version-hint.text',.*,''
 ---- TYPES
 STRING,STRING,STRING
 ====
@@ -405,65 +405,6 @@ describe formatted iceberg_partitioned_orc_external;
 string, string, string
 ====
 ---- QUERY
-CREATE EXTERNAL TABLE IF NOT EXISTS iceberg_partitioned_orc_external_old_fileformat
-STORED AS ICEBERG
-TBLPROPERTIES(
-  'iceberg.file_format'='orc',
-  'iceberg.catalog'='hadoop.catalog',
-  'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc',
-  'iceberg.table_identifier'='functional_parquet.iceberg_partitioned_orc'
-);
-ALTER TABLE iceberg_partitioned_orc_external_old_fileformat
-UNSET TBLPROPERTIES IF EXISTS ('write.format.default');
-describe formatted iceberg_partitioned_orc_external_old_fileformat;
----- RESULTS: VERIFY_IS_SUBSET
-'Location:           ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
-'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
-'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
-'','iceberg.file_format','orc                 '
-'','iceberg.catalog     ','hadoop.catalog      '
----- RESULTS: VERIFY_IS_NOT_IN
-'','write.format.default','orc                 '
----- TYPES
-string, string, string
-====
----- QUERY
-SELECT * FROM iceberg_partitioned_orc_external_old_fileformat;
----- RESULTS
-7,'Lisa','download'
-16,'Lisa','download'
-13,'Alan','click'
-10,'Alan','click'
-19,'Alex','view'
-1,'Alex','view'
-4,'Alex','view'
-20,'Alex','view'
-14,'Lisa','download'
-5,'Lisa','download'
-15,'Alex','view'
-18,'Alan','click'
-9,'Alan','click'
-17,'Alex','view'
-12,'Alan','click'
-2,'Lisa','download'
-8,'Lisa','download'
-11,'Alex','view'
-6,'Alex','view'
-3,'Alan','click'
----- TYPES
-INT, STRING, STRING
-====
----- QUERY
-SELECT count(*) FROM iceberg_partitioned_orc_external_old_fileformat;
----- RESULTS
-20
----- TYPES
-BIGINT
-====
----- QUERY
-DROP TABLE iceberg_partitioned_orc_external_old_fileformat;
-====
----- QUERY
 SELECT count(*) from iceberg_resolution_test_external;
 ---- TYPES
 bigint
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 530e2d9..314654f 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -356,6 +356,9 @@ class TestIceberg(ImpalaTestSuite):
   def test_iceberg_query(self, vector):
     self.run_test_case('QueryTest/iceberg-query', vector)
 
+  def test_iceberg_old_fileformat(self, vector, unique_database):
+    self.run_test_case('QueryTest/iceberg-old-fileformat', vector, use_db=unique_database)
+
   def test_iceberg_profile(self, vector, unique_database):
     self.run_test_case('QueryTest/iceberg-profile', vector, use_db=unique_database)

[impala] 03/04: IMPALA-10958: Decouple getConstraintsInformation from hive.ql.metadata.Table

Posted by ar...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

arawat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit cae36149aace90c256131fc4ba30f1cc5dae561e
Author: Yu-Wen Lai <yu...@cloudera.com>
AuthorDate: Tue Sep 28 12:37:18 2021 -0700

    IMPALA-10958: Decouple getConstraintsInformation from
    hive.ql.metadata.Table
    
    After HIVE-22782, ql.metadata.Table object has no methods to set
    PrimaryKeyInfo and ForeignKeyInfo alone. However, we call these two
    functions In DescribeResultFactory to set constraints and pass the
    table into HiveMetadataFormatUtils. Instead of calling the methods
    from table, we can directly pass PrimaryKeyInfo and ForeignKeyInfo
    to HiveMetadataFormatUtils so that Impala won't be influenced even
    though the table class changes interface.
    
    Additionally, we can get rid of ql.metadata.Table for
    getTableInformation altogether since it just needs
    metastore.api.Table internally.
    
    Tests:
    Ran core tests.
    
    Change-Id: I2dfc54ae2f995dc4ab735d17dbbad9a48f6633da
    Reviewed-on: http://gerrit.cloudera.org:8080/17910
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Vihang Karajgaonkar <vi...@cloudera.com>
---
 .../org/apache/impala/compat/HiveMetadataFormatUtils.java   | 12 ++++++------
 .../java/org/apache/impala/compat/MetastoreShim.java        | 13 +++++++------
 .../org/apache/impala/service/DescribeResultFactory.java    | 12 ++----------
 3 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveMetadataFormatUtils.java b/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveMetadataFormatUtils.java
index a2b1a5e..394366d 100644
--- a/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveMetadataFormatUtils.java
+++ b/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveMetadataFormatUtils.java
@@ -396,19 +396,19 @@ public class HiveMetadataFormatUtils {
     return null;
   }
 
-  public static String getConstraintsInformation(
-      org.apache.hadoop.hive.ql.metadata.Table table) {
+  public static String getConstraintsInformation(PrimaryKeyInfo pkInfo,
+      ForeignKeyInfo fkInfo) {
     StringBuilder constraintsInfo = new StringBuilder(DEFAULT_STRINGBUILDER_SIZE);
 
     constraintsInfo.append(LINE_DELIM).append("# Constraints").append(LINE_DELIM);
 
-    if (PrimaryKeyInfo.isPrimaryKeyInfoNotEmpty(table.getPrimaryKeyInfo())) {
+    if (PrimaryKeyInfo.isPrimaryKeyInfoNotEmpty(pkInfo)) {
       constraintsInfo.append(LINE_DELIM).append("# Primary Key").append(LINE_DELIM);
-      getPrimaryKeyInformation(constraintsInfo, table.getPrimaryKeyInfo());
+      getPrimaryKeyInformation(constraintsInfo, pkInfo);
     }
-    if (ForeignKeyInfo.isForeignKeyInfoNotEmpty(table.getForeignKeyInfo())) {
+    if (ForeignKeyInfo.isForeignKeyInfoNotEmpty(fkInfo)) {
       constraintsInfo.append(LINE_DELIM).append("# Foreign Keys").append(LINE_DELIM);
-      getForeignKeysInformation(constraintsInfo, table.getForeignKeyInfo());
+      getForeignKeysInformation(constraintsInfo, fkInfo);
     }
 
     return constraintsInfo.toString();
diff --git a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
index 86a0c00..5255d7e 100644
--- a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
@@ -88,6 +88,8 @@ import org.apache.hadoop.hive.metastore.messaging.MessageFactory;
 import org.apache.hadoop.hive.metastore.messaging.MessageSerializer;
 import org.apache.hadoop.hive.metastore.utils.FileUtils;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo;
+import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo;
 import org.apache.hive.service.rpc.thrift.TGetColumnsReq;
 import org.apache.hive.service.rpc.thrift.TGetFunctionsReq;
 import org.apache.hive.service.rpc.thrift.TGetSchemasReq;
@@ -481,18 +483,17 @@ public class MetastoreShim {
    * changed significantly in Hive-3
    * @return
    */
-  public static String getTableInformation(
-      org.apache.hadoop.hive.ql.metadata.Table table) {
-    return HiveMetadataFormatUtils.getTableInformation(table.getTTable(), false);
+  public static String getTableInformation(Table table) {
+    return HiveMetadataFormatUtils.getTableInformation(table, false);
   }
 
   /**
    * Wrapper method around Hive-3's MetadataFormatUtils.getConstraintsInformation
    * @return
    */
-  public static String getConstraintsInformation(
-      org.apache.hadoop.hive.ql.metadata.Table table) {
-    return HiveMetadataFormatUtils.getConstraintsInformation(table);
+  public static String getConstraintsInformation(PrimaryKeyInfo pkInfo,
+      ForeignKeyInfo fkInfo) {
+    return HiveMetadataFormatUtils.getConstraintsInformation(pkInfo, fkInfo);
   }
 
   /**
diff --git a/fe/src/main/java/org/apache/impala/service/DescribeResultFactory.java b/fe/src/main/java/org/apache/impala/service/DescribeResultFactory.java
index d809ab4..4cb8629 100644
--- a/fe/src/main/java/org/apache/impala/service/DescribeResultFactory.java
+++ b/fe/src/main/java/org/apache/impala/service/DescribeResultFactory.java
@@ -220,29 +220,21 @@ public class DescribeResultFactory {
     msTable.getSd().setCols(Column.toFieldSchemas(nonClustered));
     msTable.setPartitionKeys(Column.toFieldSchemas(clustered));
 
-    // To avoid initializing any of the SerDe classes in the metastore table Thrift
-    // struct, create the ql.metadata.Table object by calling the empty c'tor and
-    // then calling setTTable().
-    org.apache.hadoop.hive.ql.metadata.Table hiveTable =
-        new org.apache.hadoop.hive.ql.metadata.Table();
-    hiveTable.setTTable(msTable);
     org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo pki =
         new org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo(
             table.getSqlConstraints().getPrimaryKeys(), table.getName(),
             table.getDb().getName());
-    hiveTable.setPrimaryKeyInfo(pki);
     org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo fki =
         new org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo(
             table.getSqlConstraints().getForeignKeys(), table.getName(),
             table.getDb().getName());
-    hiveTable.setForeignKeyInfo(fki);
     StringBuilder sb = new StringBuilder();
     // First add all the columns (includes partition columns).
     sb.append(MetastoreShim.getAllColumnsInformation(msTable.getSd().getCols(),
         msTable.getPartitionKeys(), true, false, true));
     // Add the extended table metadata information.
-    sb.append(MetastoreShim.getTableInformation(hiveTable));
-    sb.append(MetastoreShim.getConstraintsInformation(hiveTable));
+    sb.append(MetastoreShim.getTableInformation(msTable));
+    sb.append(MetastoreShim.getConstraintsInformation(pki, fki));
 
     for (String line: sb.toString().split("\n")) {
       // To match Hive's HiveServer2 output, split each line into multiple column

[impala] 02/04: IMPALA-10959: Reload MV as ACID tables

Posted by ar...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

arawat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit dc8f375986321f2d7c1ed750e5c02c2e6f8cb4e8
Author: Yu-Wen Lai <yu...@cloudera.com>
AuthorDate: Tue Oct 5 15:42:47 2021 -0700

    IMPALA-10959: Reload MV as ACID tables
    
    We observed that the event processor is broken after receiving a
    partition event for materialized views (MV). This is because we are
    treating MV as view in Impala but Hive generates partition events for MV,
    which breaks current event processor.
    
    In this patch, we let partition events of MV follow the code path of ACID
    tables to reload the view. In the long term, we will need IMPALA-10723 to
    treat materialized view as a table.
    
    Tests:
    - manually testing
    
    Change-Id: Ibeab8cc53ad47d24df8baba81e1ec6ea4c80a084
    Reviewed-on: http://gerrit.cloudera.org:8080/17911
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Vihang Karajgaonkar <vi...@cloudera.com>
---
 .../impala/catalog/events/MetastoreEvents.java     | 36 ++++++++++++++++------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index 93d3af0..59c381a 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.metastore.messaging.json.JSONAlterTableMessage;
 import org.apache.hadoop.hive.metastore.messaging.json.JSONCreateDatabaseMessage;
 import org.apache.hadoop.hive.metastore.messaging.json.JSONDropDatabaseMessage;
 import org.apache.hadoop.hive.metastore.messaging.json.JSONDropTableMessage;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.impala.analysis.TableName;
 import org.apache.impala.catalog.CatalogException;
 import org.apache.impala.catalog.CatalogServiceCatalog;
@@ -960,8 +961,11 @@ public class MetastoreEvents {
         infoLog("Not processing the event as it is a self-event");
         return;
       }
-      // Reload the whole table if it's a transactional table.
-      if (AcidUtils.isTransactionalTable(msTbl_.getParameters())) {
+      // Reload the whole table if it's a transactional table or materialized view.
+      // Materialized views are treated as a special case because it causes problems
+      // on the reloading partition logic which expects it to be a HdfsTable.
+      if (AcidUtils.isTransactionalTable(msTbl_.getParameters())
+          || MetaStoreUtils.isMaterializedViewTable(msTbl_)) {
         insertPartition_ = null;
       }
 
@@ -1534,8 +1538,13 @@ public class MetastoreEvents {
         return;
       }
       try {
-        // Reload the whole table if it's a transactional table.
-        if (AcidUtils.isTransactionalTable(msTbl_.getParameters()) && !isSelfEvent()) {
+        // Reload the whole table if it's a transactional table or materialized view.
+        // Materialized views are treated as a special case because it's possible to
+        // receive partition event on MVs, but they are regular views in Impala. That
+        // cause problems on the reloading partition logic which expects it to be a
+        // HdfsTable.
+        if ((AcidUtils.isTransactionalTable(msTbl_.getParameters()) && !isSelfEvent())
+            || MetaStoreUtils.isMaterializedViewTable(msTbl_)) {
           reloadTableFromCatalog("ADD_PARTITION", true);
         } else {
           // HMS adds partitions in a transactional way. This means there may be multiple
@@ -1669,8 +1678,12 @@ public class MetastoreEvents {
         return;
       }
 
-      // Reload the whole table if it's a transactional table.
-      if (AcidUtils.isTransactionalTable(msTbl_.getParameters())) {
+      // Reload the whole table if it's a transactional table or materialized view.
+      // Materialized views are treated as a special case because it's possible to receive
+      // partition event on MVs, but they are regular views in Impala. That cause problems
+      // on the reloading partition logic which expects it to be a HdfsTable.
+      if (AcidUtils.isTransactionalTable(msTbl_.getParameters())
+          || MetaStoreUtils.isMaterializedViewTable(msTbl_)) {
         reloadTableFromCatalog("ALTER_PARTITION", true);
       } else {
         // Refresh the partition that was altered.
@@ -1886,10 +1899,13 @@ public class MetastoreEvents {
         infoLog("Partition list is empty. Ignoring this event.");
       }
       try {
-        // Reload the whole table if it's a transactional table. In case of transactional
-        // tables we rely on the self-event evaluation since there is no fine-grained
-        // partition level refresh.
-        if (AcidUtils.isTransactionalTable(msTbl_.getParameters())) {
+        // Reload the whole table if it's a transactional table or materialized view.
+        // Materialized views are treated as a special case because it's possible to
+        // receive partition event on MVs, but they are regular views in Impala. That
+        // cause problems on the reloading partition logic which expects it to be a
+        // HdfsTable.
+        if (AcidUtils.isTransactionalTable(msTbl_.getParameters())
+            || MetaStoreUtils.isMaterializedViewTable(msTbl_)) {
           reloadTableFromCatalog("DROP_PARTITION", true);
         } else {
           int numPartsRemoved = catalogOpExecutor_

[impala] 04/04: IMPALA-10973: Do not schedule empty scan nodes to coordinator

Posted by ar...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

arawat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 7bf39968bb95ac3aa66ff50b03495d4bdb97293b
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Tue Oct 19 15:00:13 2021 +0200

    IMPALA-10973: Do not schedule empty scan nodes to coordinator
    
    Until now fragments with scan nodes that have no scan ranges were
    scheduled to the coordinator, even if it is an exclusive coordinator.
    
    This could possibly lead to a lot of work to be scheduled to the
    coordinator. This patch changes the logic to choose a random executor
    instead.
    
    Change-Id: Ie31df3861aad2e3e91cab621ff122a4f721905ef
    Reviewed-on: http://gerrit.cloudera.org:8080/17954
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Abhishek Rawat <ar...@cloudera.com>
    Reviewed-by: Bikramjeet Vig <bi...@cloudera.com>
---
 be/src/scheduling/scheduler.cc | 28 ++++++++++++++++------------
 be/src/scheduling/scheduler.h  |  3 +--
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/be/src/scheduling/scheduler.cc b/be/src/scheduling/scheduler.cc
index 1034fce..4ed5007 100644
--- a/be/src/scheduling/scheduler.cc
+++ b/be/src/scheduling/scheduler.cc
@@ -409,7 +409,20 @@ void Scheduler::CreateCollocatedAndScanInstances(const ExecutorConfig& executor_
   DCHECK(has_union || scan_node_ids.size() == 1) << "This method may need revisiting "
       << "for plans with no union and multiple scans per fragment";
   vector<NetworkAddressPB> scan_hosts;
-  GetScanHosts(executor_config.coord_desc, scan_node_ids, *fragment_state, &scan_hosts);
+  GetScanHosts(scan_node_ids, *fragment_state, &scan_hosts);
+  if (scan_hosts.empty()) {
+    // None of the scan nodes have any scan ranges; run it on a random executor.
+    // TODO TODO: the TODO below seems partially stale
+    // TODO: we'll need to revisit this strategy once we can partition joins
+    // (in which case this fragment might be executing a right outer join
+    // with a large build table)
+    vector<BackendDescriptorPB> all_executors =
+        executor_config.group.GetAllExecutorDescriptors();
+    int idx = std::uniform_int_distribution<int>(0, all_executors.size() - 1)(
+        *state->rng());
+    const BackendDescriptorPB& be_desc = all_executors[idx];
+    scan_hosts.push_back(be_desc.address());
+  }
   for (const NetworkAddressPB& host_addr : scan_hosts) {
     // Ensure that the num instances is at least as many as input fragments. We don't
     // want to increment if there were already some instances from the input fragment,
@@ -784,9 +797,8 @@ std::vector<TPlanNodeId> Scheduler::FindScanNodes(const TPlan& plan) {
   return FindNodes(plan, SCAN_NODE_TYPES);
 }
 
-void Scheduler::GetScanHosts(const BackendDescriptorPB& coord_desc,
-    const vector<TPlanNodeId>& scan_ids, const FragmentScheduleState& fragment_state,
-    vector<NetworkAddressPB>* scan_hosts) {
+void Scheduler::GetScanHosts(const vector<TPlanNodeId>& scan_ids,
+    const FragmentScheduleState& fragment_state, vector<NetworkAddressPB>* scan_hosts) {
   for (const TPlanNodeId& scan_id : scan_ids) {
     // Get the list of impalad host from scan_range_assignment_
     for (const FragmentScanRangeAssignment::value_type& scan_range_assignment :
@@ -796,14 +808,6 @@ void Scheduler::GetScanHosts(const BackendDescriptorPB& coord_desc,
         scan_hosts->push_back(scan_range_assignment.first);
       }
     }
-
-    if (scan_hosts->empty()) {
-      // this scan node doesn't have any scan ranges; run it on the coordinator
-      // TODO: we'll need to revisit this strategy once we can partition joins
-      // (in which case this fragment might be executing a right outer join
-      // with a large build table)
-      scan_hosts->push_back(coord_desc.address());
-    }
   }
 }
 
diff --git a/be/src/scheduling/scheduler.h b/be/src/scheduling/scheduler.h
index a9093cb..f75bace 100644
--- a/be/src/scheduling/scheduler.h
+++ b/be/src/scheduling/scheduler.h
@@ -415,8 +415,7 @@ class Scheduler {
 
   /// Add all hosts that the scans identified by 'scan_ids' are executed on to
   /// 'scan_hosts'.
-  void GetScanHosts(const BackendDescriptorPB& coord_desc,
-      const std::vector<TPlanNodeId>& scan_ids,
+  void GetScanHosts(const std::vector<TPlanNodeId>& scan_ids,
       const FragmentScheduleState& fragment_state,
       std::vector<NetworkAddressPB>* scan_hosts);