You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ar...@apache.org on 2021/10/20 04:43:56 UTC
[impala] branch master updated (dc313b3 -> 7bf3996)
This is an automated email from the ASF dual-hosted git repository.
arawat pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.
from dc313b3 IMPALA-10921 Add script to compare TPCDS runs.
new 3e75a17 IMPALA-10957: test_iceberg_query is flaky
new dc8f375 IMPALA-10959: Reload MV as ACID tables
new cae3614 IMPALA-10958: Decouple getConstraintsInformation from hive.ql.metadata.Table
new 7bf3996 IMPALA-10973: Do not schedule empty scan nodes to coordinator
The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
be/src/scheduling/scheduler.cc | 28 +++---
be/src/scheduling/scheduler.h | 3 +-
.../impala/compat/HiveMetadataFormatUtils.java | 12 +--
.../org/apache/impala/compat/MetastoreShim.java | 13 +--
.../impala/catalog/events/MetastoreEvents.java | 36 +++++--
.../impala/service/DescribeResultFactory.java | 12 +--
.../queries/QueryTest/iceberg-old-fileformat.test | 57 +++++++++++
.../queries/QueryTest/iceberg-query.test | 111 +++++----------------
tests/query_test/test_scanners.py | 3 +
9 files changed, 144 insertions(+), 131 deletions(-)
create mode 100644 testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test
[impala] 01/04: IMPALA-10957: test_iceberg_query is flaky
Posted by ar...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
arawat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3e75a177302924c15e496b0611cd05ba61d74fb9
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Fri Oct 15 16:45:09 2021 +0200
IMPALA-10957: test_iceberg_query is flaky
In iceberg-query.test we create an external Iceberg table and
set the table property 'iceberg.file_format' to check
backward-compatibility with earlier versions. At the end we
delete the table. The table deletion makes the test fail
sporadically during GVO.
Seems like the bug is caused by the parallel execution of this test.
The test didn't use a unique database, therefore dropping the table
could affect other executions of the same test. This patch puts
the relevant queries to their own .test file using a unique
database.
Change-Id: I16e558ae5add48d8a39bd89277a0256f534ba65f
Reviewed-on: http://gerrit.cloudera.org:8080/17929
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../queries/QueryTest/iceberg-old-fileformat.test | 57 +++++++++++
.../queries/QueryTest/iceberg-query.test | 111 +++++----------------
tests/query_test/test_scanners.py | 3 +
3 files changed, 86 insertions(+), 85 deletions(-)
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test
new file mode 100644
index 0000000..72219bf
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-old-fileformat.test
@@ -0,0 +1,57 @@
+====
+---- QUERY
+CREATE EXTERNAL TABLE IF NOT EXISTS iceberg_partitioned_orc_external_old_fileformat
+STORED AS ICEBERG
+TBLPROPERTIES(
+ 'iceberg.file_format'='orc',
+ 'iceberg.catalog'='hadoop.catalog',
+ 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc',
+ 'iceberg.table_identifier'='functional_parquet.iceberg_partitioned_orc'
+);
+ALTER TABLE iceberg_partitioned_orc_external_old_fileformat
+UNSET TBLPROPERTIES IF EXISTS ('write.format.default');
+DESCRIBE FORMATTED iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS: VERIFY_IS_SUBSET
+'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
+'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
+'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
+'','iceberg.file_format','orc '
+'','iceberg.catalog ','hadoop.catalog '
+---- RESULTS: VERIFY_IS_NOT_IN
+'','write.format.default','orc '
+---- TYPES
+string, string, string
+====
+---- QUERY
+SELECT * FROM iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS
+7,'Lisa','download'
+16,'Lisa','download'
+13,'Alan','click'
+10,'Alan','click'
+19,'Alex','view'
+1,'Alex','view'
+4,'Alex','view'
+20,'Alex','view'
+14,'Lisa','download'
+5,'Lisa','download'
+15,'Alex','view'
+18,'Alan','click'
+9,'Alan','click'
+17,'Alex','view'
+12,'Alan','click'
+2,'Lisa','download'
+8,'Lisa','download'
+11,'Alex','view'
+6,'Alex','view'
+3,'Alan','click'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+SELECT count(*) FROM iceberg_partitioned_orc_external_old_fileformat;
+---- RESULTS
+20
+---- TYPES
+BIGINT
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
index 85645d4..4d2b590 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
@@ -335,32 +335,32 @@ bigint,bigint,bigint,string,string
SHOW FILES in iceberg_partitioned_orc_external
---- LABELS
Path,Size,Partition
----- RESULTS
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00004-4-0982a5d3-48c0-4dd0-ab87-d24190894251-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00014-14-dc56d2c8-e285-428d-b81e-f3d07ec53c12-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00015-15-d4a071f4-5d57-493e-8d21-8f82655087e4-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00019-19-e9fa9638-8693-43dc-b1ba-2d8dc390b585-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00020-20-4526ff61-8991-4645-b448-bf3535492111-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00028-28-439376ff-3d17-4494-94ba-586fa00caadd-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00003-3-fb7663c5-aade-4690-bdfc-2334d605a891-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00007-7-fc53a761-1ee2-4ce4-9507-c9917917d2c9-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00011-11-8887c762-b6a8-4e2c-b1e0-6069d6141917-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00012-12-a85e02aa-ff28-4311-b996-a83f67a5e39b-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00022-22-dea5a8ec-ea06-4688-8293-486a62563ef3-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00025-25-d182fe7e-de19-4df8-ac19-27bbfe17b5bd-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00001-1-d5df20b1-6ce6-486a-b084-ea55b7954431-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00006-6-b30cc130-03a1-45e5-b927-d80545593f9a-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00009-9-6476cc9e-9b5a-48d3-8cce-ea7ebb2d904f-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00017-17-c2ab6dc5-3586-4b45-8a3c-860fdc71980d-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00023-23-fd03c61f-8c73-4639-8974-1f6431a30bc5-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00027-27-9baec6d2-8e31-4d19-8259-1a9e546aaea5-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00030-30-433b1afa-9fee-4581-89fb-105483ebf1a2-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00031-31-e0156a2d-4240-4c8e-9724-6ad3231cf178-00000.orc',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/02fb8198-e791-4d89-8afa-c75fb5443346-m0.avro',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/snap-3506237933060603670-1-02fb8198-e791-4d89-8afa-c75fb5443346.avro',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v1.metadata.json',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v2.metadata.json',regex:.*,''
-'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/version-hint.text',regex:.*,''
+---- RESULTS: VERIFY_IS_SUBSET
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00004-4-0982a5d3-48c0-4dd0-ab87-d24190894251-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00014-14-dc56d2c8-e285-428d-b81e-f3d07ec53c12-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00015-15-d4a071f4-5d57-493e-8d21-8f82655087e4-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00019-19-e9fa9638-8693-43dc-b1ba-2d8dc390b585-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00020-20-4526ff61-8991-4645-b448-bf3535492111-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=click/00028-28-439376ff-3d17-4494-94ba-586fa00caadd-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00003-3-fb7663c5-aade-4690-bdfc-2334d605a891-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00007-7-fc53a761-1ee2-4ce4-9507-c9917917d2c9-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00011-11-8887c762-b6a8-4e2c-b1e0-6069d6141917-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00012-12-a85e02aa-ff28-4311-b996-a83f67a5e39b-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00022-22-dea5a8ec-ea06-4688-8293-486a62563ef3-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=download/00025-25-d182fe7e-de19-4df8-ac19-27bbfe17b5bd-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00001-1-d5df20b1-6ce6-486a-b084-ea55b7954431-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00006-6-b30cc130-03a1-45e5-b927-d80545593f9a-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00009-9-6476cc9e-9b5a-48d3-8cce-ea7ebb2d904f-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00017-17-c2ab6dc5-3586-4b45-8a3c-860fdc71980d-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00023-23-fd03c61f-8c73-4639-8974-1f6431a30bc5-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00027-27-9baec6d2-8e31-4d19-8259-1a9e546aaea5-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00030-30-433b1afa-9fee-4581-89fb-105483ebf1a2-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/data/action=view/00031-31-e0156a2d-4240-4c8e-9724-6ad3231cf178-00000.orc'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/02fb8198-e791-4d89-8afa-c75fb5443346-m0.avro'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/snap-3506237933060603670-1-02fb8198-e791-4d89-8afa-c75fb5443346.avro'.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v1.metadata.json',.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/v2.metadata.json',.*,''
+row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc/metadata/version-hint.text',.*,''
---- TYPES
STRING,STRING,STRING
====
@@ -405,65 +405,6 @@ describe formatted iceberg_partitioned_orc_external;
string, string, string
====
---- QUERY
-CREATE EXTERNAL TABLE IF NOT EXISTS iceberg_partitioned_orc_external_old_fileformat
-STORED AS ICEBERG
-TBLPROPERTIES(
- 'iceberg.file_format'='orc',
- 'iceberg.catalog'='hadoop.catalog',
- 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc',
- 'iceberg.table_identifier'='functional_parquet.iceberg_partitioned_orc'
-);
-ALTER TABLE iceberg_partitioned_orc_external_old_fileformat
-UNSET TBLPROPERTIES IF EXISTS ('write.format.default');
-describe formatted iceberg_partitioned_orc_external_old_fileformat;
----- RESULTS: VERIFY_IS_SUBSET
-'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
-'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
-'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
-'','iceberg.file_format','orc '
-'','iceberg.catalog ','hadoop.catalog '
----- RESULTS: VERIFY_IS_NOT_IN
-'','write.format.default','orc '
----- TYPES
-string, string, string
-====
----- QUERY
-SELECT * FROM iceberg_partitioned_orc_external_old_fileformat;
----- RESULTS
-7,'Lisa','download'
-16,'Lisa','download'
-13,'Alan','click'
-10,'Alan','click'
-19,'Alex','view'
-1,'Alex','view'
-4,'Alex','view'
-20,'Alex','view'
-14,'Lisa','download'
-5,'Lisa','download'
-15,'Alex','view'
-18,'Alan','click'
-9,'Alan','click'
-17,'Alex','view'
-12,'Alan','click'
-2,'Lisa','download'
-8,'Lisa','download'
-11,'Alex','view'
-6,'Alex','view'
-3,'Alan','click'
----- TYPES
-INT, STRING, STRING
-====
----- QUERY
-SELECT count(*) FROM iceberg_partitioned_orc_external_old_fileformat;
----- RESULTS
-20
----- TYPES
-BIGINT
-====
----- QUERY
-DROP TABLE iceberg_partitioned_orc_external_old_fileformat;
-====
----- QUERY
SELECT count(*) from iceberg_resolution_test_external;
---- TYPES
bigint
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 530e2d9..314654f 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -356,6 +356,9 @@ class TestIceberg(ImpalaTestSuite):
def test_iceberg_query(self, vector):
self.run_test_case('QueryTest/iceberg-query', vector)
+ def test_iceberg_old_fileformat(self, vector, unique_database):
+ self.run_test_case('QueryTest/iceberg-old-fileformat', vector, use_db=unique_database)
+
def test_iceberg_profile(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-profile', vector, use_db=unique_database)
[impala] 03/04: IMPALA-10958: Decouple getConstraintsInformation
from hive.ql.metadata.Table
Posted by ar...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
arawat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit cae36149aace90c256131fc4ba30f1cc5dae561e
Author: Yu-Wen Lai <yu...@cloudera.com>
AuthorDate: Tue Sep 28 12:37:18 2021 -0700
IMPALA-10958: Decouple getConstraintsInformation from
hive.ql.metadata.Table
After HIVE-22782, ql.metadata.Table object has no methods to set
PrimaryKeyInfo and ForeignKeyInfo alone. However, we call these two
functions In DescribeResultFactory to set constraints and pass the
table into HiveMetadataFormatUtils. Instead of calling the methods
from table, we can directly pass PrimaryKeyInfo and ForeignKeyInfo
to HiveMetadataFormatUtils so that Impala won't be influenced even
though the table class changes interface.
Additionally, we can get rid of ql.metadata.Table for
getTableInformation altogether since it just needs
metastore.api.Table internally.
Tests:
Ran core tests.
Change-Id: I2dfc54ae2f995dc4ab735d17dbbad9a48f6633da
Reviewed-on: http://gerrit.cloudera.org:8080/17910
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Vihang Karajgaonkar <vi...@cloudera.com>
---
.../org/apache/impala/compat/HiveMetadataFormatUtils.java | 12 ++++++------
.../java/org/apache/impala/compat/MetastoreShim.java | 13 +++++++------
.../org/apache/impala/service/DescribeResultFactory.java | 12 ++----------
3 files changed, 15 insertions(+), 22 deletions(-)
diff --git a/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveMetadataFormatUtils.java b/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveMetadataFormatUtils.java
index a2b1a5e..394366d 100644
--- a/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveMetadataFormatUtils.java
+++ b/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveMetadataFormatUtils.java
@@ -396,19 +396,19 @@ public class HiveMetadataFormatUtils {
return null;
}
- public static String getConstraintsInformation(
- org.apache.hadoop.hive.ql.metadata.Table table) {
+ public static String getConstraintsInformation(PrimaryKeyInfo pkInfo,
+ ForeignKeyInfo fkInfo) {
StringBuilder constraintsInfo = new StringBuilder(DEFAULT_STRINGBUILDER_SIZE);
constraintsInfo.append(LINE_DELIM).append("# Constraints").append(LINE_DELIM);
- if (PrimaryKeyInfo.isPrimaryKeyInfoNotEmpty(table.getPrimaryKeyInfo())) {
+ if (PrimaryKeyInfo.isPrimaryKeyInfoNotEmpty(pkInfo)) {
constraintsInfo.append(LINE_DELIM).append("# Primary Key").append(LINE_DELIM);
- getPrimaryKeyInformation(constraintsInfo, table.getPrimaryKeyInfo());
+ getPrimaryKeyInformation(constraintsInfo, pkInfo);
}
- if (ForeignKeyInfo.isForeignKeyInfoNotEmpty(table.getForeignKeyInfo())) {
+ if (ForeignKeyInfo.isForeignKeyInfoNotEmpty(fkInfo)) {
constraintsInfo.append(LINE_DELIM).append("# Foreign Keys").append(LINE_DELIM);
- getForeignKeysInformation(constraintsInfo, table.getForeignKeyInfo());
+ getForeignKeysInformation(constraintsInfo, fkInfo);
}
return constraintsInfo.toString();
diff --git a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
index 86a0c00..5255d7e 100644
--- a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
@@ -88,6 +88,8 @@ import org.apache.hadoop.hive.metastore.messaging.MessageFactory;
import org.apache.hadoop.hive.metastore.messaging.MessageSerializer;
import org.apache.hadoop.hive.metastore.utils.FileUtils;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo;
+import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo;
import org.apache.hive.service.rpc.thrift.TGetColumnsReq;
import org.apache.hive.service.rpc.thrift.TGetFunctionsReq;
import org.apache.hive.service.rpc.thrift.TGetSchemasReq;
@@ -481,18 +483,17 @@ public class MetastoreShim {
* changed significantly in Hive-3
* @return
*/
- public static String getTableInformation(
- org.apache.hadoop.hive.ql.metadata.Table table) {
- return HiveMetadataFormatUtils.getTableInformation(table.getTTable(), false);
+ public static String getTableInformation(Table table) {
+ return HiveMetadataFormatUtils.getTableInformation(table, false);
}
/**
* Wrapper method around Hive-3's MetadataFormatUtils.getConstraintsInformation
* @return
*/
- public static String getConstraintsInformation(
- org.apache.hadoop.hive.ql.metadata.Table table) {
- return HiveMetadataFormatUtils.getConstraintsInformation(table);
+ public static String getConstraintsInformation(PrimaryKeyInfo pkInfo,
+ ForeignKeyInfo fkInfo) {
+ return HiveMetadataFormatUtils.getConstraintsInformation(pkInfo, fkInfo);
}
/**
diff --git a/fe/src/main/java/org/apache/impala/service/DescribeResultFactory.java b/fe/src/main/java/org/apache/impala/service/DescribeResultFactory.java
index d809ab4..4cb8629 100644
--- a/fe/src/main/java/org/apache/impala/service/DescribeResultFactory.java
+++ b/fe/src/main/java/org/apache/impala/service/DescribeResultFactory.java
@@ -220,29 +220,21 @@ public class DescribeResultFactory {
msTable.getSd().setCols(Column.toFieldSchemas(nonClustered));
msTable.setPartitionKeys(Column.toFieldSchemas(clustered));
- // To avoid initializing any of the SerDe classes in the metastore table Thrift
- // struct, create the ql.metadata.Table object by calling the empty c'tor and
- // then calling setTTable().
- org.apache.hadoop.hive.ql.metadata.Table hiveTable =
- new org.apache.hadoop.hive.ql.metadata.Table();
- hiveTable.setTTable(msTable);
org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo pki =
new org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo(
table.getSqlConstraints().getPrimaryKeys(), table.getName(),
table.getDb().getName());
- hiveTable.setPrimaryKeyInfo(pki);
org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo fki =
new org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo(
table.getSqlConstraints().getForeignKeys(), table.getName(),
table.getDb().getName());
- hiveTable.setForeignKeyInfo(fki);
StringBuilder sb = new StringBuilder();
// First add all the columns (includes partition columns).
sb.append(MetastoreShim.getAllColumnsInformation(msTable.getSd().getCols(),
msTable.getPartitionKeys(), true, false, true));
// Add the extended table metadata information.
- sb.append(MetastoreShim.getTableInformation(hiveTable));
- sb.append(MetastoreShim.getConstraintsInformation(hiveTable));
+ sb.append(MetastoreShim.getTableInformation(msTable));
+ sb.append(MetastoreShim.getConstraintsInformation(pki, fki));
for (String line: sb.toString().split("\n")) {
// To match Hive's HiveServer2 output, split each line into multiple column
[impala] 02/04: IMPALA-10959: Reload MV as ACID tables
Posted by ar...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
arawat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit dc8f375986321f2d7c1ed750e5c02c2e6f8cb4e8
Author: Yu-Wen Lai <yu...@cloudera.com>
AuthorDate: Tue Oct 5 15:42:47 2021 -0700
IMPALA-10959: Reload MV as ACID tables
We observed that the event processor is broken after receiving a
partition event for materialized views (MV). This is because we are
treating MV as view in Impala but Hive generates partition events for MV,
which breaks current event processor.
In this patch, we let partition events of MV follow the code path of ACID
tables to reload the view. In the long term, we will need IMPALA-10723 to
treat materialized view as a table.
Tests:
- manually testing
Change-Id: Ibeab8cc53ad47d24df8baba81e1ec6ea4c80a084
Reviewed-on: http://gerrit.cloudera.org:8080/17911
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Vihang Karajgaonkar <vi...@cloudera.com>
---
.../impala/catalog/events/MetastoreEvents.java | 36 ++++++++++++++++------
1 file changed, 26 insertions(+), 10 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index 93d3af0..59c381a 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.metastore.messaging.json.JSONAlterTableMessage;
import org.apache.hadoop.hive.metastore.messaging.json.JSONCreateDatabaseMessage;
import org.apache.hadoop.hive.metastore.messaging.json.JSONDropDatabaseMessage;
import org.apache.hadoop.hive.metastore.messaging.json.JSONDropTableMessage;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.impala.analysis.TableName;
import org.apache.impala.catalog.CatalogException;
import org.apache.impala.catalog.CatalogServiceCatalog;
@@ -960,8 +961,11 @@ public class MetastoreEvents {
infoLog("Not processing the event as it is a self-event");
return;
}
- // Reload the whole table if it's a transactional table.
- if (AcidUtils.isTransactionalTable(msTbl_.getParameters())) {
+ // Reload the whole table if it's a transactional table or materialized view.
+ // Materialized views are treated as a special case because it causes problems
+ // on the reloading partition logic which expects it to be a HdfsTable.
+ if (AcidUtils.isTransactionalTable(msTbl_.getParameters())
+ || MetaStoreUtils.isMaterializedViewTable(msTbl_)) {
insertPartition_ = null;
}
@@ -1534,8 +1538,13 @@ public class MetastoreEvents {
return;
}
try {
- // Reload the whole table if it's a transactional table.
- if (AcidUtils.isTransactionalTable(msTbl_.getParameters()) && !isSelfEvent()) {
+ // Reload the whole table if it's a transactional table or materialized view.
+ // Materialized views are treated as a special case because it's possible to
+ // receive partition event on MVs, but they are regular views in Impala. That
+ // cause problems on the reloading partition logic which expects it to be a
+ // HdfsTable.
+ if ((AcidUtils.isTransactionalTable(msTbl_.getParameters()) && !isSelfEvent())
+ || MetaStoreUtils.isMaterializedViewTable(msTbl_)) {
reloadTableFromCatalog("ADD_PARTITION", true);
} else {
// HMS adds partitions in a transactional way. This means there may be multiple
@@ -1669,8 +1678,12 @@ public class MetastoreEvents {
return;
}
- // Reload the whole table if it's a transactional table.
- if (AcidUtils.isTransactionalTable(msTbl_.getParameters())) {
+ // Reload the whole table if it's a transactional table or materialized view.
+ // Materialized views are treated as a special case because it's possible to receive
+ // partition event on MVs, but they are regular views in Impala. That cause problems
+ // on the reloading partition logic which expects it to be a HdfsTable.
+ if (AcidUtils.isTransactionalTable(msTbl_.getParameters())
+ || MetaStoreUtils.isMaterializedViewTable(msTbl_)) {
reloadTableFromCatalog("ALTER_PARTITION", true);
} else {
// Refresh the partition that was altered.
@@ -1886,10 +1899,13 @@ public class MetastoreEvents {
infoLog("Partition list is empty. Ignoring this event.");
}
try {
- // Reload the whole table if it's a transactional table. In case of transactional
- // tables we rely on the self-event evaluation since there is no fine-grained
- // partition level refresh.
- if (AcidUtils.isTransactionalTable(msTbl_.getParameters())) {
+ // Reload the whole table if it's a transactional table or materialized view.
+ // Materialized views are treated as a special case because it's possible to
+ // receive partition event on MVs, but they are regular views in Impala. That
+ // cause problems on the reloading partition logic which expects it to be a
+ // HdfsTable.
+ if (AcidUtils.isTransactionalTable(msTbl_.getParameters())
+ || MetaStoreUtils.isMaterializedViewTable(msTbl_)) {
reloadTableFromCatalog("DROP_PARTITION", true);
} else {
int numPartsRemoved = catalogOpExecutor_
[impala] 04/04: IMPALA-10973: Do not schedule empty scan nodes to
coordinator
Posted by ar...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
arawat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 7bf39968bb95ac3aa66ff50b03495d4bdb97293b
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Tue Oct 19 15:00:13 2021 +0200
IMPALA-10973: Do not schedule empty scan nodes to coordinator
Until now fragments with scan nodes that have no scan ranges were
scheduled to the coordinator, even if it is an exclusive coordinator.
This could possibly lead to a lot of work to be scheduled to the
coordinator. This patch changes the logic to choose a random executor
instead.
Change-Id: Ie31df3861aad2e3e91cab621ff122a4f721905ef
Reviewed-on: http://gerrit.cloudera.org:8080/17954
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Abhishek Rawat <ar...@cloudera.com>
Reviewed-by: Bikramjeet Vig <bi...@cloudera.com>
---
be/src/scheduling/scheduler.cc | 28 ++++++++++++++++------------
be/src/scheduling/scheduler.h | 3 +--
2 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/be/src/scheduling/scheduler.cc b/be/src/scheduling/scheduler.cc
index 1034fce..4ed5007 100644
--- a/be/src/scheduling/scheduler.cc
+++ b/be/src/scheduling/scheduler.cc
@@ -409,7 +409,20 @@ void Scheduler::CreateCollocatedAndScanInstances(const ExecutorConfig& executor_
DCHECK(has_union || scan_node_ids.size() == 1) << "This method may need revisiting "
<< "for plans with no union and multiple scans per fragment";
vector<NetworkAddressPB> scan_hosts;
- GetScanHosts(executor_config.coord_desc, scan_node_ids, *fragment_state, &scan_hosts);
+ GetScanHosts(scan_node_ids, *fragment_state, &scan_hosts);
+ if (scan_hosts.empty()) {
+ // None of the scan nodes have any scan ranges; run it on a random executor.
+ // TODO TODO: the TODO below seems partially stale
+ // TODO: we'll need to revisit this strategy once we can partition joins
+ // (in which case this fragment might be executing a right outer join
+ // with a large build table)
+ vector<BackendDescriptorPB> all_executors =
+ executor_config.group.GetAllExecutorDescriptors();
+ int idx = std::uniform_int_distribution<int>(0, all_executors.size() - 1)(
+ *state->rng());
+ const BackendDescriptorPB& be_desc = all_executors[idx];
+ scan_hosts.push_back(be_desc.address());
+ }
for (const NetworkAddressPB& host_addr : scan_hosts) {
// Ensure that the num instances is at least as many as input fragments. We don't
// want to increment if there were already some instances from the input fragment,
@@ -784,9 +797,8 @@ std::vector<TPlanNodeId> Scheduler::FindScanNodes(const TPlan& plan) {
return FindNodes(plan, SCAN_NODE_TYPES);
}
-void Scheduler::GetScanHosts(const BackendDescriptorPB& coord_desc,
- const vector<TPlanNodeId>& scan_ids, const FragmentScheduleState& fragment_state,
- vector<NetworkAddressPB>* scan_hosts) {
+void Scheduler::GetScanHosts(const vector<TPlanNodeId>& scan_ids,
+ const FragmentScheduleState& fragment_state, vector<NetworkAddressPB>* scan_hosts) {
for (const TPlanNodeId& scan_id : scan_ids) {
// Get the list of impalad host from scan_range_assignment_
for (const FragmentScanRangeAssignment::value_type& scan_range_assignment :
@@ -796,14 +808,6 @@ void Scheduler::GetScanHosts(const BackendDescriptorPB& coord_desc,
scan_hosts->push_back(scan_range_assignment.first);
}
}
-
- if (scan_hosts->empty()) {
- // this scan node doesn't have any scan ranges; run it on the coordinator
- // TODO: we'll need to revisit this strategy once we can partition joins
- // (in which case this fragment might be executing a right outer join
- // with a large build table)
- scan_hosts->push_back(coord_desc.address());
- }
}
}
diff --git a/be/src/scheduling/scheduler.h b/be/src/scheduling/scheduler.h
index a9093cb..f75bace 100644
--- a/be/src/scheduling/scheduler.h
+++ b/be/src/scheduling/scheduler.h
@@ -415,8 +415,7 @@ class Scheduler {
/// Add all hosts that the scans identified by 'scan_ids' are executed on to
/// 'scan_hosts'.
- void GetScanHosts(const BackendDescriptorPB& coord_desc,
- const std::vector<TPlanNodeId>& scan_ids,
+ void GetScanHosts(const std::vector<TPlanNodeId>& scan_ids,
const FragmentScheduleState& fragment_state,
std::vector<NetworkAddressPB>* scan_hosts);