You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2023/03/22 16:19:49 UTC

[impala] branch master updated (8f204c82b -> fd4db07e7)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from 8f204c82b IMPALA-12013: Remove future from pkg_resources.py
     new 65ae5febe IMPALA-12014 warning on failed KeepAlive for Kudu scanner
     new 19b5d781f IMPALA-11987: [DOCS] Document the mem_limit_executors query option
     new fd4db07e7 Reapply IMPALA-11865: Set thread names for Java thread pools

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/exec/kudu/kudu-scanner.cc                    | 15 ++++++++++-----
 be/src/exec/kudu/kudu-scanner.h                     |  2 +-
 docs/topics/impala_mem_limit.xml                    | 21 ++++++++++++++++++++-
 .../impala/catalog/CatalogServiceCatalog.java       |  4 +++-
 .../org/apache/impala/catalog/TableLoadingMgr.java  | 11 ++++++++---
 .../catalog/metastore/CatalogMetastoreServer.java   |  4 +++-
 .../apache/impala/hooks/QueryEventHookManager.java  |  5 ++++-
 .../java/org/apache/impala/service/Frontend.java    | 11 ++++++-----
 8 files changed, 55 insertions(+), 18 deletions(-)


[impala] 03/03: Reapply IMPALA-11865: Set thread names for Java thread pools

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit fd4db07e7592446fdc23b601da0f5cfdc0d9f9ec
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Mon Jan 30 09:43:34 2023 -0800

    Reapply IMPALA-11865: Set thread names for Java thread pools
    
    Currently, some Java thread pools use the default
    thread names, which take the form "pool-N-thread-M".
    This sets the thread names to more human-friendly names
    by adding a ThreadFactory to Executor thread pools
    in our Java code. This should make the jstack output
    easier to understand. For example, "pool-8-thread-1"
    becomes "TableLoadingSubmitterThread-0".
    
    Testing:
     - Ran jstack locally and verified
    
    Change-Id: Iaa3028666ff9e51bc0722d3702060bc404bb3da3
    Reviewed-on: http://gerrit.cloudera.org:8080/19642
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/catalog/CatalogServiceCatalog.java |  4 +++-
 .../main/java/org/apache/impala/catalog/TableLoadingMgr.java  | 11 ++++++++---
 .../impala/catalog/metastore/CatalogMetastoreServer.java      |  4 +++-
 .../java/org/apache/impala/hooks/QueryEventHookManager.java   |  5 ++++-
 fe/src/main/java/org/apache/impala/service/Frontend.java      | 11 ++++++-----
 5 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
index 53f423f4c..96465f69a 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
@@ -138,6 +138,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.google.common.base.Stopwatch;
 import com.google.common.collect.Maps;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
 /**
  * Specialized Catalog that implements the CatalogService specific Catalog
@@ -265,7 +266,8 @@ public class CatalogServiceCatalog extends Catalog {
 
   // Periodically polls HDFS to get the latest set of known cache pools.
   private final ScheduledExecutorService cachePoolReader_ =
-      Executors.newScheduledThreadPool(1);
+    Executors.newScheduledThreadPool(1,
+        new ThreadFactoryBuilder().setNameFormat("HDFSCachePoolReader").build());
 
   // Log of deleted catalog objects.
   private final CatalogDeltaLog deleteLog_;
diff --git a/fe/src/main/java/org/apache/impala/catalog/TableLoadingMgr.java b/fe/src/main/java/org/apache/impala/catalog/TableLoadingMgr.java
index 37685c6ff..48720d0bb 100644
--- a/fe/src/main/java/org/apache/impala/catalog/TableLoadingMgr.java
+++ b/fe/src/main/java/org/apache/impala/catalog/TableLoadingMgr.java
@@ -36,6 +36,7 @@ import org.apache.impala.util.HdfsCachingUtil;
 import org.apache.log4j.Logger;
 
 import com.google.common.base.Preconditions;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
 /**
 * Class that manages scheduling the loading of table metadata from the Hive Metastore and
@@ -148,7 +149,8 @@ public class TableLoadingMgr {
   // (no work will be rejected, but memory consumption is unbounded). If this thread
   // dies it will be automatically restarted.
   // The tables to process are read from the resfreshThreadWork_ queue.
-  ExecutorService asyncRefreshThread_ = Executors.newSingleThreadExecutor();
+  ExecutorService asyncRefreshThread_ = Executors.newSingleThreadExecutor(
+      new ThreadFactoryBuilder().setNameFormat("TableAsyncRefreshThread").build());
 
   // Tables for the async refresh thread to process. Synchronization must be handled
   // externally.
@@ -162,7 +164,8 @@ public class TableLoadingMgr {
     catalog_ = catalog;
     tblLoader_ = new TableLoader(catalog_);
     numLoadingThreads_ = numLoadingThreads;
-    tblLoadingPool_ = Executors.newFixedThreadPool(numLoadingThreads_);
+    tblLoadingPool_ = Executors.newFixedThreadPool(numLoadingThreads_,
+        new ThreadFactoryBuilder().setNameFormat("TableLoadingThread-%d").build());
 
     // Start the background table loading submitter threads.
     startTableLoadingSubmitterThreads();
@@ -265,7 +268,9 @@ public class TableLoadingMgr {
    */
   private void startTableLoadingSubmitterThreads() {
     ExecutorService submitterLoadingPool =
-        Executors.newFixedThreadPool(numLoadingThreads_);
+      Executors.newFixedThreadPool(numLoadingThreads_,
+          new ThreadFactoryBuilder()
+              .setNameFormat("TableLoadingSubmitterThread-%d").build());
     try {
       for (int i = 0; i < numLoadingThreads_; ++i) {
         submitterLoadingPool.execute(new Runnable() {
diff --git a/fe/src/main/java/org/apache/impala/catalog/metastore/CatalogMetastoreServer.java b/fe/src/main/java/org/apache/impala/catalog/metastore/CatalogMetastoreServer.java
index 14a0f6076..ea7ab2a2f 100644
--- a/fe/src/main/java/org/apache/impala/catalog/metastore/CatalogMetastoreServer.java
+++ b/fe/src/main/java/org/apache/impala/catalog/metastore/CatalogMetastoreServer.java
@@ -28,6 +28,7 @@ import java.util.Set;
 import com.codahale.metrics.Timer;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
 import java.lang.reflect.InvocationHandler;
 import java.lang.reflect.InvocationTargetException;
@@ -109,7 +110,8 @@ public class CatalogMetastoreServer extends ThriftHiveMetastore implements
 
   // Logs Catalogd HMS cache metrics at a fixed frequency.
   private final ScheduledExecutorService metricsLoggerService_ =
-      Executors.newScheduledThreadPool(1);
+    Executors.newScheduledThreadPool(1,
+        new ThreadFactoryBuilder().setNameFormat("MetricsLoggerService").build());
 
   // the server is started in a daemon thread so that instantiating this is not
   // a blocking call.
diff --git a/fe/src/main/java/org/apache/impala/hooks/QueryEventHookManager.java b/fe/src/main/java/org/apache/impala/hooks/QueryEventHookManager.java
index eef24673b..b1e4d702d 100644
--- a/fe/src/main/java/org/apache/impala/hooks/QueryEventHookManager.java
+++ b/fe/src/main/java/org/apache/impala/hooks/QueryEventHookManager.java
@@ -31,6 +31,8 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.stream.Collectors;
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
 /**
  * {@link QueryEventHookManager} manages the registration and execution of
  * {@link QueryEventHook}s. Each manager instance may manage its own hooks,
@@ -117,7 +119,8 @@ public class QueryEventHookManager {
   private QueryEventHookManager(int nHookExecutorThreads, String[] hookClasses)
       throws InternalException {
 
-    this.hookExecutor_ = Executors.newFixedThreadPool(nHookExecutorThreads);
+    this.hookExecutor_ = Executors.newFixedThreadPool(nHookExecutorThreads,
+        new ThreadFactoryBuilder().setNameFormat("QueryEventHookExecutor-%d").build());
     Runtime.getRuntime().addShutdownHook(new Thread(() -> this.cleanUp()));
 
     final List<QueryEventHook> hooks = new ArrayList<>(hookClasses.length);
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index c9da2bdea..cde002df3 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -28,6 +28,7 @@ import com.google.common.collect.Sets;
 import com.google.common.math.IntMath;
 import com.google.common.util.concurrent.MoreExecutors;
 import com.google.common.util.concurrent.Uninterruptibles;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -400,13 +401,11 @@ public class Frontend {
   // Privileges in which the user should have any of them to see a database or table,
   private final EnumSet<Privilege> minPrivilegeSetForShowStmts_;
   /**
-   * Authorization checker. Initialized and periodically loaded by a task
-   * running on the {@link #policyReader_} thread.
+   * Authorization checker. Initialized on creation, then it is kept up to date
+   * via calls to updateCatalogCache().
    */
   private final AtomicReference<AuthorizationChecker> authzChecker_ =
       new AtomicReference<>();
-  private final ScheduledExecutorService policyReader_ =
-      Executors.newScheduledThreadPool(1);
 
   private final ImpaladTableUsageTracker impaladTableUsageTracker_;
 
@@ -455,7 +454,9 @@ public class Frontend {
         checkAuthorizationPool_ = MoreExecutors.newDirectExecutorService();
       } else {
         LOG.info("Using a thread pool of size {} for authorization", numThreads);
-        checkAuthorizationPool_ = Executors.newFixedThreadPool(numThreads);
+        checkAuthorizationPool_ = Executors.newFixedThreadPool(numThreads,
+            new ThreadFactoryBuilder()
+                .setNameFormat("AuthorizationCheckerThread-%d").build());
       }
     } else {
       authzChecker_.set(authzFactory.newAuthorizationChecker());


[impala] 02/03: IMPALA-11987: [DOCS] Document the mem_limit_executors query option

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 19b5d781f46798d0ad4fdb4bc8deb352a6d15d07
Author: Shajini Thayasingh <st...@cloudera.com>
AuthorDate: Wed Mar 8 11:20:23 2023 -0800

    IMPALA-11987: [DOCS] Document the mem_limit_executors query option
    
    Document the difference between mem_limit and mem_limit_executors
    query options.
    
    Change-Id: Ibce8347d564afde0f9b9e83df560454f4b8b5c9d
    Reviewed-on: http://gerrit.cloudera.org:8080/19604
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Daniel Becker <da...@cloudera.com>
    Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
---
 docs/topics/impala_mem_limit.xml | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/docs/topics/impala_mem_limit.xml b/docs/topics/impala_mem_limit.xml
index aec76e65a..02a1d7750 100644
--- a/docs/topics/impala_mem_limit.xml
+++ b/docs/topics/impala_mem_limit.xml
@@ -209,5 +209,24 @@ for buffer reservations. Memory reservation needed given the current plan: 38.00
 or the pool config (max-query-mem-limit, min-query-mem-limit) for the query to allow the query memory limit to be
 at least 70.00 MB. Note that changing the mem_limit may also change the plan. See the query profile for more
 information about the per-node memory requirements.</codeblock>
- </conbody>
+</conbody>
+<concept id="mem_limit_executors">
+  <title>MEM_LIMIT_EXECUTORS Query Option</title>
+    <conbody>
+      <note>This is a developer-only query option. Setting this query option is not recommended
+        unless specifically advised.</note>
+      <p>The existing <codeph>MEM_LIMIT</codeph> query option applies to all impala coordinators and
+        executors. This means that the same amount of memory gets reserved but coordinators
+        typically just do the job of coordinating the query and thus do not necessarily need all the
+        estimated memory. Blocking the estimated memory on coordinators blocks the memory to be used
+        for other queries.</p>
+      <p>The new <codeph>MEM_LIMIT_EXECUTORS</codeph> query option functions similarly to the
+          <codeph>MEM_LIMIT</codeph> option but sets the query memory limit only on executors. This
+        new option addresses the issue related to <codeph>MEM_LIMIT</codeph> and is recommended in
+        scenarios where the query needs much higher memory on executors compared with
+        coordinators.</p>
+      <p>Note that the <codeph>MEM_LIMIT_EXECUTORS</codeph> option does not work with
+          <codeph>MEM_LIMIT</codeph>. If you set both, only <codeph>MEM_LIMIT</codeph> applies.</p>
+    </conbody>
+</concept>
 </concept>


[impala] 01/03: IMPALA-12014 warning on failed KeepAlive for Kudu scanner

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 65ae5febee868addafb56b01f9276a03601dd039
Author: Alexey Serbin <as...@cloudera.com>
AuthorDate: Tue Mar 21 12:40:51 2023 -0700

    IMPALA-12014 warning on failed KeepAlive for Kudu scanner
    
    With this patch, there is now a warning message output when
    ScannerKeepAlive RPC for a Kudu scanner fails.  To avoid flooding the
    log with those messages, KLOG_EVERY_N_SECS(WARNING, 60) is used.
    
    I also updated the signature of the KuduScanner::BuildErrorString()
    method to become a constant one.
    
    This is a follow-up to IMPALA-3292.
    
    Change-Id: If39f968685797506491d3d5ebc481efb6e43a568
    Reviewed-on: http://gerrit.cloudera.org:8080/19643
    Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Michael Smith <mi...@cloudera.com>
---
 be/src/exec/kudu/kudu-scanner.cc | 15 ++++++++++-----
 be/src/exec/kudu/kudu-scanner.h  |  2 +-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/be/src/exec/kudu/kudu-scanner.cc b/be/src/exec/kudu/kudu-scanner.cc
index 62353eea4..c05367012 100644
--- a/be/src/exec/kudu/kudu-scanner.cc
+++ b/be/src/exec/kudu/kudu-scanner.cc
@@ -20,6 +20,7 @@
 #include <string>
 #include <vector>
 
+#include <glog/logging.h>
 #include <kudu/client/resource_metrics.h>
 #include <kudu/client/row_result.h>
 #include <kudu/client/value.h>
@@ -34,6 +35,7 @@
 #include "gutil/gscoped_ptr.h"
 #include "gutil/strings/substitute.h"
 #include "kudu/util/block_bloom_filter.h"
+#include "kudu/util/logging.h"
 #include "kudu/util/slice.h"
 #include "runtime/mem-pool.h"
 #include "runtime/mem-tracker.h"
@@ -93,7 +95,9 @@ Status KuduScanner::Open() {
 }
 
 void KuduScanner::KeepKuduScannerAlive() {
-  if (scanner_ == nullptr) return;
+  if (scanner_ == nullptr) {
+    return;
+  }
   int64_t now = MonotonicMicros();
   int64_t keepalive_us = FLAGS_kudu_scanner_keep_alive_period_sec * 1e6;
   if (now < last_alive_time_micros_ + keepalive_us) {
@@ -106,7 +110,8 @@ void KuduScanner::KeepKuduScannerAlive() {
   // if the scan is unrecoverable.
   kudu::Status s = scanner_->KeepAlive();
   if (!s.ok()) {
-    VLOG(1) << "Unable to keep the Kudu scanner alive: " << s.ToString();
+    KLOG_EVERY_N_SECS(WARNING, 60) << BuildErrorString(
+        Substitute("$0: unable to keep scanner alive", s.ToString()).c_str());
     return;
   }
   last_alive_time_micros_ = now;
@@ -453,9 +458,9 @@ Status KuduScanner::GetNextScannerBatch() {
   return Status::OK();
 }
 
-string KuduScanner::BuildErrorString(const char* msg) {
-  return Substitute("$0 for node with id '$1' for Kudu table '$2'", msg, scan_node_->id(),
-      scan_node_->table_desc()->table_name());
+string KuduScanner::BuildErrorString(const char* msg) const {
+  return Substitute("$0 for node with id '$1' for Kudu table '$2'",
+      msg, scan_node_->id(), scan_node_->table_desc()->table_name());
 }
 
 }  // namespace impala
diff --git a/be/src/exec/kudu/kudu-scanner.h b/be/src/exec/kudu/kudu-scanner.h
index 0fcce2e83..1250e19ab 100644
--- a/be/src/exec/kudu/kudu-scanner.h
+++ b/be/src/exec/kudu/kudu-scanner.h
@@ -94,7 +94,7 @@ class KuduScanner {
   }
 
   /// Builds the error string by adding the PlanNode id and KuduTable to the message.
-  std::string BuildErrorString(const char* msg);
+  std::string BuildErrorString(const char* msg) const;
 
   KuduScanNodeBase* scan_node_;
   RuntimeState* state_;