You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/04/14 13:19:24 UTC

[doris] branch branch-1.2-lts updated (09ac48b4aa -> ca78c132d4)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a change to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


    from 09ac48b4aa [cherry-pick](bitmapfilter) fix bitmap filter timeout unit error (#18672)
     new c911e688de [Fix](multi-catalog) invalidates the file cache when table is non-partitioned. (#17932)
     new 2bb84fc4f4 [Fix](multi-catalog) add handler for hms INSERT EVENT. (#17933)
     new 71df9d983c [fix](trino catalog) To specify both catalog and database, run the show table command (#18645)
     new ca78c132d4 [fix](info_db) avoid infodb query timeout when external catalog info is too large or is not reachable (#18662)

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/en/docs/admin-manual/config/fe-config.md      | 14 +++++++
 docs/zh-CN/docs/admin-manual/config/fe-config.md   | 15 ++++++++
 .../main/java/org/apache/doris/common/Config.java  |  9 +++++
 .../doris/datasource/hive/HiveMetaStoreCache.java  | 12 ++++++
 .../{CreateTableEvent.java => InsertEvent.java}    | 44 +++++++++++-----------
 .../hive/event/MetastoreEventFactory.java          |  2 +
 .../org/apache/doris/external/jdbc/JdbcClient.java | 15 ++++++--
 .../org/apache/doris/planner/SchemaScanNode.java   |  3 ++
 8 files changed, 88 insertions(+), 26 deletions(-)
 copy fe/fe-core/src/main/java/org/apache/doris/datasource/hive/event/{CreateTableEvent.java => InsertEvent.java} (54%)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 03/04: [fix](trino catalog) To specify both catalog and database, run the show table command (#18645)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 71df9d983c07b3bdabd27f693a620c937c9118d3
Author: yongkang.zhong <zh...@qq.com>
AuthorDate: Fri Apr 14 17:51:50 2023 +0800

    [fix](trino catalog) To specify both catalog and database, run the show table command (#18645)
    
    * [fix](trino catalog) To specify both catalog and database, run the show table command
    
    * fix
---
 .../java/org/apache/doris/external/jdbc/JdbcClient.java   | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java
index 38a6f07b04..3b8e3d73f7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java
@@ -250,6 +250,7 @@ public class JdbcClient {
         String[] types = {"TABLE", "VIEW"};
         try {
             DatabaseMetaData databaseMetaData = conn.getMetaData();
+            String catalogName = conn.getCatalog();
             switch (dbType) {
                 case JdbcResource.MYSQL:
                     rs = databaseMetaData.getTables(dbName, null, null, types);
@@ -259,9 +260,11 @@ public class JdbcClient {
                 case JdbcResource.CLICKHOUSE:
                 case JdbcResource.SQLSERVER:
                 case JdbcResource.SAP_HANA:
-                case JdbcResource.TRINO:
                     rs = databaseMetaData.getTables(null, dbName, null, types);
                     break;
+                case JdbcResource.TRINO:
+                    rs = databaseMetaData.getTables(catalogName, dbName, null, types);
+                    break;
                 default:
                     throw new JdbcClientException("Unknown database type");
             }
@@ -287,6 +290,7 @@ public class JdbcClient {
         String[] types = {"TABLE", "VIEW"};
         try {
             DatabaseMetaData databaseMetaData = conn.getMetaData();
+            String catalogName = conn.getCatalog();
             switch (dbType) {
                 case JdbcResource.MYSQL:
                     rs = databaseMetaData.getTables(dbName, null, tableName, types);
@@ -296,9 +300,11 @@ public class JdbcClient {
                 case JdbcResource.CLICKHOUSE:
                 case JdbcResource.SQLSERVER:
                 case JdbcResource.SAP_HANA:
-                case JdbcResource.TRINO:
                     rs = databaseMetaData.getTables(null, dbName, null, types);
                     break;
+                case JdbcResource.TRINO:
+                    rs = databaseMetaData.getTables(catalogName, dbName, null, types);
+                    break;
                 default:
                     throw new JdbcClientException("Unknown database type: " + dbType);
             }
@@ -350,6 +356,7 @@ public class JdbcClient {
         }
         try {
             DatabaseMetaData databaseMetaData = conn.getMetaData();
+            String catalogName = conn.getCatalog();
             // getColumns(String catalog, String schemaPattern, String tableNamePattern, String columnNamePattern)
             // catalog - the catalog of this table, `null` means all catalogs
             // schema - The schema of the table; corresponding to tablespace in Oracle
@@ -368,9 +375,11 @@ public class JdbcClient {
                 case JdbcResource.CLICKHOUSE:
                 case JdbcResource.SQLSERVER:
                 case JdbcResource.SAP_HANA:
-                case JdbcResource.TRINO:
                     rs = databaseMetaData.getColumns(null, dbName, tableName, null);
                     break;
+                case JdbcResource.TRINO:
+                    rs = databaseMetaData.getColumns(catalogName, dbName, tableName, null);
+                    break;
                 default:
                     throw new JdbcClientException("Unknown database type");
             }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 02/04: [Fix](multi-catalog) add handler for hms INSERT EVENT. (#17933)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 2bb84fc4f4b0e47e0bc7a01bebb68fc2bf50c230
Author: Xiangyu Wang <du...@gmail.com>
AuthorDate: Fri Mar 24 10:17:47 2023 +0800

    [Fix](multi-catalog) add handler for hms INSERT EVENT. (#17933)
    
    When we use a hive client to submit a `INSERT INTO TBL SELECT * FROM ...` or `INSERT INTO TBL VALUES ...`
    sql and the table is non-partitioned table, the hms will generate an insert event. The insert stmt may changed the
    hdfs file distribution of this table, but currently we do not handle this, so the file cache of this table may be inaccurate.
---
 .../doris/datasource/hive/event/InsertEvent.java   | 75 ++++++++++++++++++++++
 .../hive/event/MetastoreEventFactory.java          |  2 +
 2 files changed, 77 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/event/InsertEvent.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/event/InsertEvent.java
new file mode 100644
index 0000000000..cf4ba1d5b0
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/event/InsertEvent.java
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+package org.apache.doris.datasource.hive.event;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.DdlException;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.hadoop.hive.metastore.api.NotificationEvent;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.messaging.InsertMessage;
+
+import java.util.List;
+
+/**
+ * MetastoreEvent for INSERT event type
+ */
+public class InsertEvent extends MetastoreTableEvent {
+    private final Table hmsTbl;
+
+    private InsertEvent(NotificationEvent event, String catalogName) {
+        super(event, catalogName);
+        Preconditions.checkArgument(getEventType().equals(MetastoreEventType.INSERT));
+        Preconditions
+                .checkNotNull(event.getMessage(), debugString("Event message is null"));
+        try {
+            InsertMessage insertMessage =
+                    MetastoreEventsProcessor.getMessageDeserializer(event.getMessageFormat())
+                            .getInsertMessage(event.getMessage());
+            hmsTbl = Preconditions.checkNotNull(insertMessage.getTableObj());
+        } catch (Exception ex) {
+            throw new MetastoreNotificationException(ex);
+        }
+    }
+
+    protected static List<MetastoreEvent> getEvents(NotificationEvent event, String catalogName) {
+        return Lists.newArrayList(new InsertEvent(event, catalogName));
+    }
+
+    @Override
+    protected void process() throws MetastoreNotificationException {
+        try {
+            infoLog("catalogName:[{}],dbName:[{}],tableName:[{}]", catalogName, dbName, tblName);
+            /***
+             *  Only when we use hive client to execute a `INSERT INTO TBL SELECT * ...` or `INSERT INTO TBL ...` sql
+             *  to a non-partitioned table then the hms will generate an insert event, and there is not
+             *  any partition event occurs, but the file cache may has been changed, so we need handle this.
+             *  Currently {@link org.apache.doris.datasource.CatalogMgr#refreshExternalTable} do not invalidate
+             *  the file cache of this table,
+             *  but <a href="https://github.com/apache/doris/pull/17932">this PR</a> has fixed it.
+             */
+            Env.getCurrentEnv().getCatalogMgr().refreshExternalTable(dbName, tblName, catalogName);
+        } catch (DdlException e) {
+            throw new MetastoreNotificationException(
+                    debugString("Failed to process event"));
+        }
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/event/MetastoreEventFactory.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/event/MetastoreEventFactory.java
index ce96ce62e1..3ab2a7e030 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/event/MetastoreEventFactory.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/event/MetastoreEventFactory.java
@@ -58,6 +58,8 @@ public class MetastoreEventFactory implements EventFactory {
                 return DropPartitionEvent.getEvents(event, catalogName);
             case ALTER_PARTITION:
                 return AlterPartitionEvent.getEvents(event, catalogName);
+            case INSERT:
+                return InsertEvent.getEvents(event, catalogName);
             default:
                 // ignore all the unknown events by creating a IgnoredEvent
                 return IgnoredEvent.getEvents(event, catalogName);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 01/04: [Fix](multi-catalog) invalidates the file cache when table is non-partitioned. (#17932)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit c911e688de57a3c3b992bc082ba1bf5df9543f90
Author: Xiangyu Wang <du...@gmail.com>
AuthorDate: Wed Mar 22 23:34:18 2023 +0800

    [Fix](multi-catalog) invalidates the file cache when table is non-partitioned. (#17932)
    
    Reference to `org.apache.doris.planner.external.HiveSplitter`, the file cache of `HiveMetaStoreCache`
    may be created even the table is a non-partitioned table,
    so the `RefreshTableStmt` should consider this scene and handle it.
---
 .../org/apache/doris/datasource/hive/HiveMetaStoreCache.java | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 31b8d2f3b4..a2fb14de39 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -52,6 +52,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.InputSplit;
@@ -374,6 +375,17 @@ public class HiveMetaStoreCache {
             LOG.debug("invalid table cache for {}.{} in catalog {}, cache num: {}, cost: {} ms",
                     dbName, tblName, catalog.getName(), partitionValues.partitionValuesMap.size(),
                     (System.currentTimeMillis() - start));
+        } else {
+            /**
+             * A file cache entry can be created reference to
+             * {@link org.apache.doris.planner.external.HiveSplitter#getSplits},
+             * so we need to invalidate it if this is a non-partitioned table.
+             *
+             * */
+            Table table = catalog.getClient().getTable(dbName, tblName);
+            // we just need to assign the `location` filed because the `equals` method of `FileCacheKey`
+            // just compares the value of `location`
+            fileCache.invalidate(new FileCacheKey(table.getSd().getLocation(), null));
         }
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 04/04: [fix](info_db) avoid infodb query timeout when external catalog info is too large or is not reachable (#18662)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ca78c132d48bde99b940f2268d4e6fd94b1e31b7
Author: Mingyu Chen <mo...@163.com>
AuthorDate: Fri Apr 14 14:40:31 2023 +0800

    [fix](info_db) avoid infodb query timeout when external catalog info is too large or is not reachable (#18662)
    
    When query tables in information_schema databases, it may timeout due to:
    
    There are external catalog with too many tables.
    The external catalog is unreachable
    So I add a new FE config infodb_support_ext_catalog.
    The default is false, which means that when select from tables in information_schema database,
    the result will not contain the information of the table in external catalog.
    
    Describe your changes.
---
 docs/en/docs/admin-manual/config/fe-config.md             | 14 ++++++++++++++
 docs/zh-CN/docs/admin-manual/config/fe-config.md          | 15 +++++++++++++++
 .../src/main/java/org/apache/doris/common/Config.java     |  9 +++++++++
 .../java/org/apache/doris/planner/SchemaScanNode.java     |  3 +++
 4 files changed, 41 insertions(+)

diff --git a/docs/en/docs/admin-manual/config/fe-config.md b/docs/en/docs/admin-manual/config/fe-config.md
index d661c32edc..3ca3c32c66 100644
--- a/docs/en/docs/admin-manual/config/fe-config.md
+++ b/docs/en/docs/admin-manual/config/fe-config.md
@@ -2386,3 +2386,17 @@ MasterOnly:true
 
 Maximum number of error tablet showed in broker load.
 
+#### `infodb_support_ext_catalog`
+
+<version since="1.2.4"></version>
+
+Default: false
+
+IsMutable: true
+
+MasterOnly: false
+
+If false, when select from tables in information_schema database,
+the result will not contain the information of the table in external catalog.
+This is to avoid query time when external catalog is not reachable.
+
diff --git a/docs/zh-CN/docs/admin-manual/config/fe-config.md b/docs/zh-CN/docs/admin-manual/config/fe-config.md
index 2888279e0a..01d6abbdc7 100644
--- a/docs/zh-CN/docs/admin-manual/config/fe-config.md
+++ b/docs/zh-CN/docs/admin-manual/config/fe-config.md
@@ -2449,3 +2449,18 @@ hive partition 的最大缓存数量。
 是否为 Master FE 节点独有的配置项:true
 
 broker load job 保存的失败tablet 信息的最大数量
+
+#### `infodb_support_ext_catalog`
+
+<version since="1.2.4"></version>
+
+默认值:false
+
+是否可以动态配置:true
+
+是否为 Master FE 节点独有的配置项:false
+
+当设置为 false 时,查询 `information_schema` 中的表时,将不再返回 external catalog 中的表的信息。
+
+这个参数主要用于避免因 external catalog 无法访问、信息过多等原因导致的查询 `information_schema` 超时的问题。
+
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 1c0b0acc6a..145b9fb894 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1993,5 +1993,14 @@ public class Config extends ConfigBase {
      */
     @ConfField(mutable = false, masterOnly = false)
     public static String mysql_ssl_default_certificate_password = "doris";
+
+    /**
+     * If false, when select from tables in information_schema database,
+     * the result will not contain the information of the table in external catalog.
+     * This is to avoid query time when external catalog is not reachable.
+     * TODO: this is a temp solution, we should support external catalog in the future.
+     */
+    @ConfField(mutable = true)
+    public static boolean infodb_support_ext_catalog = false;
 }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
index f00c8683f3..2107cb6c8e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
@@ -23,6 +23,7 @@ import org.apache.doris.catalog.SchemaTable;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.Util;
+import org.apache.doris.datasource.InternalCatalog;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.service.FrontendOptions;
 import org.apache.doris.statistics.StatisticalType;
@@ -96,6 +97,8 @@ public class SchemaScanNode extends ScanNode {
         }
         if (schemaCatalog != null) {
             msg.schema_scan_node.setCatalog(schemaCatalog);
+        } else if (!Config.infodb_support_ext_catalog) {
+            msg.schema_scan_node.setCatalog(InternalCatalog.INTERNAL_CATALOG_NAME);
         }
         msg.schema_scan_node.show_hidden_cloumns = Util.showHiddenColumns();
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org