You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2021/11/17 16:51:49 UTC

[impala] branch master updated (b692a92 -> a690b74)

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from b692a92  IMPALA-5256: Force log rotation when max_log_size exceeded
     new 0cd5bb0  IMPALA-11006: Impalad crashes during query cancel tests
     new a690b74  IMPALA-11020: Reattach STDOUT/STDERR independently.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/common/logging.cc               | 44 ++++++++++++++++++++++------------
 be/src/service/client-request-state.cc | 14 +++++++++--
 2 files changed, 41 insertions(+), 17 deletions(-)

[impala] 01/02: IMPALA-11006: Impalad crashes during query cancel tests

Posted by wz...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0cd5bb012856c364c2e7ec8962c01e488b718a8b
Author: Qifan Chen <qc...@cloudera.com>
AuthorDate: Thu Nov 4 19:44:13 2021 -0400

    IMPALA-11006: Impalad crashes during query cancel tests
    
    This patch addresses impalad crash during query cancel tests for
    CTAS. The root cause is that the wait thread in impalad assumes
    that the coordinator or computation results are always available
    once the worker thread async_exec_thread_ is joined. In reality,
    this never happens as the query is cancelled.
    
    The patch specifically checks the cancel status when it is found
    that the coordinator is not available for CTAS and bails out
    immediately. This prevents crash since the subsequent code that
    assumes the coordinator or computation results are available is
    never reached.
    
    Testing:
      1. Ran core tests successfully.
    
    Change-Id: Ia49411f8525734b8d463d9ffbfbce705b90a8d73
    Reviewed-on: http://gerrit.cloudera.org:8080/17999
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/service/client-request-state.cc | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc
index 7939c5f..ca0f6c0 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -1060,6 +1060,9 @@ Status ClientRequestState::WaitInternal() {
   }
   if (!child_queries.empty()) query_events_->MarkEvent("Child queries finished");
 
+  bool isCTAS = catalog_op_type() == TCatalogOpType::DDL
+      && ddl_type() == TDdlType::CREATE_TABLE_AS_SELECT;
+
   if (GetCoordinator() != NULL) {
     Status status = GetCoordinator()->Wait();
     if (UNLIKELY(!status.ok())) {
@@ -1067,6 +1070,14 @@ Status ClientRequestState::WaitInternal() {
       return status;
     }
     RETURN_IF_ERROR(UpdateCatalog());
+  } else {
+    // When the coordinator is not available for CTAS that requires a coordinator, check
+    // further if the query has been cancelled. If so, return immediately as there will
+    // be no query result available (IMPALA-11006).
+    if (isCTAS) {
+      lock_guard<mutex> l(lock_);
+      if (is_cancelled_) return Status::CANCELLED;
+    }
   }
 
   if (catalog_op_type() == TCatalogOpType::DDL &&
@@ -1078,8 +1089,7 @@ Status ClientRequestState::WaitInternal() {
     // Queries that do not return a result are finished at this point. This includes
     // DML operations.
     eos_.Store(true);
-  } else if (catalog_op_type() == TCatalogOpType::DDL &&
-      ddl_type() == TDdlType::CREATE_TABLE_AS_SELECT) {
+  } else if (isCTAS) {
     SetCreateTableAsSelectResultSet();
   }
   // Rows are available now (for SELECT statement), so start the 'wait' timer that tracks

[impala] 02/02: IMPALA-11020: Reattach STDOUT/STDERR independently.

Posted by wz...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a690b746271c55408ce8a3f0dd0251a8f3ff8cfc
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Mon Nov 15 14:01:18 2021 -0800

    IMPALA-11020: Reattach STDOUT/STDERR independently.
    
    IMPALA-5256 made an incorrect assumption that glog will rotate both INFO
    and ERROR logs at the same time. A race condition can happen between
    glog's log rotation and symlink resolutions in AttachStdoutStderrLocked
    because impala does not hold glog's log_mutex.
    
    This patch fixes the issue by replacing the DCHECK with a log, warning
    if only one symlink has changed. We then reattach the stream only for
    the one pointing to a new file. The next AttachStdoutStderr call after
    logbufsecs will handle redirection for the slower log rotation. This
    patch also adds explicit log flush after logging statements that intent
    to initialize a new log file.
    
    Testing:
    - Pass the exhaustive test.
    
    Change-Id: I35bbed631773bf7a4d55b2b77df1a093f34dedc5
    Reviewed-on: http://gerrit.cloudera.org:8080/18031
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/common/logging.cc | 44 +++++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/be/src/common/logging.cc b/be/src/common/logging.cc
index debdd2f..7291858 100644
--- a/be/src/common/logging.cc
+++ b/be/src/common/logging.cc
@@ -97,8 +97,8 @@ impala::Status ResolveLogSymlink(const string& symlink_path, string& canonical_p
 // The main implementation of AttachStdoutStderr().
 // Caller must hold lock over logging_mutex.
 impala::Status AttachStdoutStderrLocked() {
+  // Redirect stdout to INFO log and stderr to ERROR log.
   // Needs to be done after InitGoogleLogging, to get the INFO/ERROR file paths.
-  // Redirect stdout to INFO log and stderr to ERROR log
   string info_log_symlink_path, error_log_symlink_path;
   impala::GetFullLogFilename(google::INFO, &info_log_symlink_path);
   impala::GetFullLogFilename(google::ERROR, &error_log_symlink_path);
@@ -107,29 +107,37 @@ impala::Status AttachStdoutStderrLocked() {
   RETURN_IF_ERROR(ResolveLogSymlink(info_log_symlink_path, info_log_path));
   RETURN_IF_ERROR(ResolveLogSymlink(error_log_symlink_path, error_log_path));
 
-  if (last_info_log_path != info_log_path || last_error_log_path != error_log_path) {
-    // Both INFO and ERROR log should be rotated together at the same time.
-    DCHECK_NE(last_info_log_path, info_log_path);
-    DCHECK_NE(last_error_log_path, error_log_path);
-
-    // The log files are created on first use, log something to each before redirecting.
-    LOG(INFO) << "stdout will be logged to this file.";
-    LOG(ERROR) << "stderr will be logged to this file.";
+  bool info_log_rotated = last_info_log_path != info_log_path;
+  bool error_log_rotated = last_error_log_path != error_log_path;
+
+  if (info_log_rotated != error_log_rotated) {
+    // Since we're not holding glog's log_mutex when resolving the symlinks, it is
+    // possible that glog rotates the INFO and ERROR log independently in between our
+    // symlink resolution. If this happens, log a warning and continue.
+    // Next AttachStdoutStderrLocked call should redirect the stream to the newer file.
+    LOG(WARNING) << "INFO and ERROR log is not rotated at the same time "
+                 << "(info_log_rotated=" << info_log_rotated << ", "
+                 << "error_log_rotated=" << error_log_rotated << "). "
+                 << "STDOUT/STDERR might still write to an older log file.";
+  }
 
-    // Print to stderr/stdout before redirecting so people looking for these logs in
-    // the standard place know where to look.
+  if (info_log_rotated) {
+    // Print to stdout before redirecting so people looking for these logs in the standard
+    // place know where to look.
     cout << "Redirecting stdout to " << info_log_symlink_path << endl;
-    cerr << "Redirecting stderr to " << error_log_symlink_path << endl;
-
     // TODO: how to handle these errors? Maybe abort the process?
     if (freopen(info_log_path.c_str(), "a", stdout) == NULL) {
       cout << "Could not redirect stdout: " << impala::GetStrErrMsg();
     }
+    last_info_log_path = info_log_path;
+  }
+
+  if (error_log_rotated) {
+    // Similar to stdout, do the same thing for stderr.
+    cerr << "Redirecting stderr to " << error_log_symlink_path << endl;
     if (freopen(error_log_path.c_str(), "a", stderr) == NULL) {
       cerr << "Could not redirect stderr: " << impala::GetStrErrMsg();
     }
-
-    last_info_log_path = info_log_path;
     last_error_log_path = error_log_path;
   }
   return impala::Status::OK();
@@ -189,6 +197,11 @@ void impala::InitGoogleLoggingSafe(const char* arg) {
   }
 
   if (RedirectStdoutStderr()) {
+    // The log files are created on first use, log something to each and flush before
+    // redirecting.
+    LOG(INFO) << "stdout will be logged to this file.";
+    LOG(ERROR) << "stderr will be logged to this file.";
+    google::FlushLogFiles(google::GLOG_INFO);
     Status status = AttachStdoutStderrLocked();
     if (!status.ok()) {
       LOG(ERROR) << "Failed to attach STDOUT/STDERR: " << status.GetDetail();
@@ -233,6 +246,7 @@ void impala::ForceRotateLog() {
   google::SetLogFilenameExtension("");
   LOG(INFO) << "INFO log rotated by Impala due to max_log_size exceeded.";
   LOG(ERROR) << "ERROR log rotated by Impala due to max_log_size exceeded.";
+  google::FlushLogFiles(google::GLOG_INFO);
 }
 
 bool impala::RedirectStdoutStderr() {