You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2019/02/12 17:31:36 UTC

[impala] branch master updated: IMPALA-8064: Improve observability of wait times for runtime filters

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new e0aabdd  IMPALA-8064: Improve observability of wait times for runtime filters
e0aabdd is described below

commit e0aabddd573c204a780d3f5ff0af442cdb26b7c6
Author: poojanilangekar <po...@cloudera.com>
AuthorDate: Thu Feb 7 17:00:34 2019 -0800

    IMPALA-8064: Improve observability of wait times for runtime filters
    
    This change is a diagnostic fix to improve the wait times logged
    for runtime filters. The filter wait time counts against the
    elapsed time since the filter's registration in ScanNode::Init()
    while the duration logged in ScanNode::WaitForRuntimeFilters() is
    the time spent in the function waiting for all the filters to
    arrive. This could be misleading as it doesn't account for the
    elapsed time spent between ScanNode::Init() and
    ScanNode::WaitForRuntimeFilters(). This change logs the maximum
    arrival delay for any runtime filter to arrive.
    
    From my analysis of the logs of the failed tests, I believe the
    filters are actually waiting for the specified time but logging
    the duration incorrectly. The solution would be to increase the
    wait time further. This change would help validate this
    hypothesis.
    
    Change-Id: I28fd45e75c773bc01d424f5a179ae186ee9b7469
    Reviewed-on: http://gerrit.cloudera.org:8080/12401
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/scan-node.cc              | 13 +++++++++----
 be/src/runtime/runtime-filter-bank.cc |  7 +++----
 be/src/runtime/runtime-filter.h       |  9 +++++----
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/be/src/exec/scan-node.cc b/be/src/exec/scan-node.cc
index 906af66..039836c 100644
--- a/be/src/exec/scan-node.cc
+++ b/be/src/exec/scan-node.cc
@@ -168,6 +168,7 @@ bool ScanNode::WaitForRuntimeFilters() {
   }
   vector<string> arrived_filter_ids;
   vector<string> missing_filter_ids;
+  int32_t max_arrival_delay = 0;
   int64_t start = MonotonicMillis();
   for (auto& ctx: filter_ctxs_) {
     string filter_id = Substitute("$0", ctx.filter->id());
@@ -176,20 +177,24 @@ bool ScanNode::WaitForRuntimeFilters() {
     } else {
       missing_filter_ids.push_back(filter_id);
     }
+    max_arrival_delay = max(max_arrival_delay, ctx.filter->arrival_delay_ms());
   }
   int64_t end = MonotonicMillis();
   const string& wait_time = PrettyPrinter::Print(end - start, TUnit::TIME_MS);
+  const string& arrival_delay = PrettyPrinter::Print(max_arrival_delay, TUnit::TIME_MS);
 
   if (arrived_filter_ids.size() == filter_ctxs_.size()) {
     runtime_profile()->AddInfoString("Runtime filters",
-        Substitute("All filters arrived. Waited $0", wait_time));
+        Substitute("All filters arrived. Waited $0. Maximum arrival delay: $1.",
+                                         wait_time, arrival_delay));
     VLOG(2) << "Filters arrived. Waited " << wait_time;
     return true;
   }
 
-  const string& filter_str = Substitute(
-      "Not all filters arrived (arrived: [$0], missing [$1]), waited for $2",
-      join(arrived_filter_ids, ", "), join(missing_filter_ids, ", "), wait_time);
+  const string& filter_str = Substitute("Not all filters arrived (arrived: [$0], missing "
+                                        "[$1]), waited for $2. Arrival delay: $3.",
+      join(arrived_filter_ids, ", "), join(missing_filter_ids, ", "), wait_time,
+      arrival_delay);
   runtime_profile()->AddInfoString("Runtime filters", filter_str);
   VLOG(2) << filter_str;
   return false;
diff --git a/be/src/runtime/runtime-filter-bank.cc b/be/src/runtime/runtime-filter-bank.cc
index 85c9625..f8667bc 100644
--- a/be/src/runtime/runtime-filter-bank.cc
+++ b/be/src/runtime/runtime-filter-bank.cc
@@ -146,9 +146,8 @@ void RuntimeFilterBank::UpdateFilterFromLocal(
       filter = it->second;
     }
     filter->SetFilter(bloom_filter, min_max_filter);
-    state_->runtime_profile()->AddInfoString(
-        Substitute("Filter $0 arrival", filter_id),
-        PrettyPrinter::Print(filter->arrival_delay(), TUnit::TIME_MS));
+    state_->runtime_profile()->AddInfoString(Substitute("Filter $0 arrival", filter_id),
+        PrettyPrinter::Print(filter->arrival_delay_ms(), TUnit::TIME_MS));
   }
 
   if (has_remote_target
@@ -211,7 +210,7 @@ void RuntimeFilterBank::PublishGlobalFilter(const TPublishFilterParams& params)
   it->second->SetFilter(bloom_filter, min_max_filter);
   state_->runtime_profile()->AddInfoString(
       Substitute("Filter $0 arrival", params.filter_id),
-      PrettyPrinter::Print(it->second->arrival_delay(), TUnit::TIME_MS));
+      PrettyPrinter::Print(it->second->arrival_delay_ms(), TUnit::TIME_MS));
 }
 
 BloomFilter* RuntimeFilterBank::AllocateScratchBloomFilter(int32_t filter_id) {
diff --git a/be/src/runtime/runtime-filter.h b/be/src/runtime/runtime-filter.h
index 7ab73d7..97a2842 100644
--- a/be/src/runtime/runtime-filter.h
+++ b/be/src/runtime/runtime-filter.h
@@ -76,10 +76,11 @@ class RuntimeFilter {
   /// Inlined in IR so that the constant 'col_type' can be propagated.
   bool IR_ALWAYS_INLINE Eval(void* val, const ColumnType& col_type) const noexcept;
 
-  /// Returns the amount of time waited since registration for the filter to
-  /// arrive. Returns 0 if filter has not yet arrived.
-  int32_t arrival_delay() const {
-    if (arrival_time_.Load() == 0L) return 0L;
+  /// Returns the amount of time in milliseconds elapsed between the registration of the
+  /// filter and its arrival. If the filter has not yet arrived, it returns the time
+  /// elapsed since registration.
+  int32_t arrival_delay_ms() const {
+    if (arrival_time_.Load() == 0L) return MonotonicMillis() - registration_time_;
     return arrival_time_.Load() - registration_time_;
   }