You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by la...@apache.org on 2021/04/12 10:35:17 UTC

[impala] branch master updated (1dcd596 -> e01b631)

This is an automated email from the ASF dual-hosted git repository.

laszlog pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 1dcd596  IMPALA-10643: Allow the inclusion of jetty-client
     new bf4c2df  IMPALA-10646: Add non-server RHEL 8 signature to toolchain bootstrap
     new aa2302e  IMPALA-10596: De-flake TestAdmissionControllerStress
     new e01b631  IMPALA-10583: Fix bug on unbounded result spooling memory config.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/exec/buffered-plan-root-sink.cc             |  8 +++++---
 be/src/runtime/spillable-row-batch-queue.cc        |  5 ++++-
 bin/bootstrap_toolchain.py                         |  7 ++++---
 .../org/apache/impala/planner/PlanRootSink.java    | 12 +++++++++--
 tests/custom_cluster/test_admission_controller.py  |  4 ++++
 tests/query_test/test_result_spooling.py           | 24 ++++++++++++++++++++++
 6 files changed, 51 insertions(+), 9 deletions(-)

[impala] 01/03: IMPALA-10646: Add non-server RHEL 8 signature to toolchain bootstrap

Posted by la...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit bf4c2dfb3893d77bc288bd683e7a3b304377fe8c
Author: Laszlo Gaal <la...@cloudera.com>
AuthorDate: Thu Apr 8 23:54:29 2021 +0200

    IMPALA-10646: Add non-server RHEL 8 signature to toolchain bootstrap
    
    Recent Red Hat Enterprise Linux 8.x version return a shorter release
    name string from `lsb_release -si` than earlier versions. This shorter
    string was not recognized in the OS mapper logic in
    bin/bootstrap_toolchain.py, makig it -- and the build process -- break
    on Red Hat 8.2
    
    The patch adds the shorter signature as a point fix.
    
    Rename a local variable to fix an unrelated name conflict (shadowing)
    found by flake8.
    
    Tests: run bin/bootstrap_toolchain.py manually on Red Hat 8.2, then run
      a complete build on the same OS.
    
    Regression-tested (build and dataload only) on the following versions:
    - Centos 8.2 (as opposed to Red Hat 8.2)
    - Centos 7.9
    - Ubuntu 18.04
    
    Change-Id: Icb1a6c215b1b5a65691042bb7d94fb034392d135
    Reviewed-on: http://gerrit.cloudera.org:8080/17292
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/bootstrap_toolchain.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index baef199..9ca71cb 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -82,6 +82,7 @@ OS_MAPPING = [
   OsMapping("redhatenterpriseserver5", "ec2-package-centos-5", None),
   OsMapping("redhatenterpriseserver6", "ec2-package-centos-6", "redhat6"),
   OsMapping("redhatenterpriseserver7", "ec2-package-centos-7", "redhat7"),
+  OsMapping("redhatenterprise8", "ec2-package-centos-8", "redhat8"),
   OsMapping("redhatenterpriseserver8", "ec2-package-centos-8", "redhat8"),
   OsMapping("debian6", "ec2-package-debian-6", None),
   OsMapping("debian7", "ec2-package-debian-7", None),
@@ -364,9 +365,9 @@ def get_platform_release_label(release=None):
     else:
       lsb_release = check_output(["lsb_release", "-irs"])
       release = "".join(map(lambda x: x.lower(), lsb_release.split()))
-      # Only need to check against the major release if RHEL or CentOS
-      for platform in ['centos', 'redhatenterpriseserver', 'suse']:
-        if platform in release:
+      # Only need to check against the major release if RHEL, CentOS or Suse
+      for distro in ['centos', 'redhatenterprise', 'redhatenterpriseserver', 'suse']:
+        if distro in release:
           release = release.split('.')[0]
           break
       lsb_release_cache = release

[impala] 02/03: IMPALA-10596: De-flake TestAdmissionControllerStress

Posted by la...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit aa2302e53b0ba9ab493025f3c8aedb5401fb2360
Author: Bikramjeet Vig <bi...@gmail.com>
AuthorDate: Mon Apr 5 20:24:19 2021 -0700

    IMPALA-10596: De-flake TestAdmissionControllerStress
    
    Currently some tests in TestAdmissionControllerStress fail because
    they rely on queries holding onto resources on backends till they
    are explicitly closed. This was done by keeping the query alive by
    fetching very limited rows in intervals. After results spooling was
    turned on by default, the queries started to finish early and
    released their resources on other backends which let queued queries
    to get admitted. This patch turns off result spooling for the queries
    to avoid this situation.
    
    Testing:
    Ran test in a loop on my local dev machine.
    
    Change-Id: I1594a82f507db8dc094d4441dd8739d037f974ff
    Reviewed-on: http://gerrit.cloudera.org:8080/17272
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/custom_cluster/test_admission_controller.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/custom_cluster/test_admission_controller.py b/tests/custom_cluster/test_admission_controller.py
index 2b9ca02..4445942 100644
--- a/tests/custom_cluster/test_admission_controller.py
+++ b/tests/custom_cluster/test_admission_controller.py
@@ -1763,6 +1763,10 @@ class TestAdmissionControllerStress(TestAdmissionControllerBase):
 
           exec_options = self.vector.get_value('exec_option')
           exec_options.update(self.additional_query_options)
+          # Turning off result spooling allows us to better control query execution by
+          # controlling the number or rows fetched. This allows us to maintain resource
+          # usage among backends.
+          exec_options['spool_query_results'] = 0
           query = QUERY.format(self.query_num)
           self.query_state = 'SUBMITTING'
           client = self.impalad.service.create_beeswax_client()

[impala] 03/03: IMPALA-10583: Fix bug on unbounded result spooling memory config.

Posted by la...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e01b6312593197749a17dd6895f976c35eac5769
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Fri Mar 12 09:37:41 2021 -0800

    IMPALA-10583: Fix bug on unbounded result spooling memory config.
    
    Two bugs happening on result spooling when we set two of its query
    options to 0 (unbounded).
    
    The first bug happens if max_spilled_result_spooling_mem =
    0 (unbounded). max_unpinned_bytes_ in SpillableRowBatchQueue will be set
    to 0, SpillableRowBatchQueue::IsFull() then will always return true, and
    the query hang. This patch fix it by setting max_unpinned_bytes_ to
    INT64_MAX if max_spilled_result_spooling_mem = 0.
    
    The second bug happens if we set max_result_spooling_mem =
    0 (unbounded). PlanRootSink.java will peg maxMemReservationBytes to
    always equal to minMemReservationBytes. This patch fixes this by
    reverting to the default max_result_spooling_mem (100MB).
    
    Testing:
    - Add test_unbounded_result_spooling_mem.
    - Pass core tests.
    
    Change-Id: If8f5e3668281bba8813f8082f45b4faa7721530e
    Reviewed-on: http://gerrit.cloudera.org:8080/17187
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/buffered-plan-root-sink.cc             |  8 +++++---
 be/src/runtime/spillable-row-batch-queue.cc        |  5 ++++-
 .../org/apache/impala/planner/PlanRootSink.java    | 12 +++++++++--
 tests/query_test/test_result_spooling.py           | 24 ++++++++++++++++++++++
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/be/src/exec/buffered-plan-root-sink.cc b/be/src/exec/buffered-plan-root-sink.cc
index daaa97e..1cb148b 100644
--- a/be/src/exec/buffered-plan-root-sink.cc
+++ b/be/src/exec/buffered-plan-root-sink.cc
@@ -50,9 +50,11 @@ Status BufferedPlanRootSink::Open(RuntimeState* state) {
   RETURN_IF_ERROR(DebugAction(state->query_options(), "BPRS_BEFORE_OPEN"));
   RETURN_IF_ERROR(DataSink::Open(state));
   current_batch_ = make_unique<RowBatch>(row_desc_, state->batch_size(), mem_tracker());
-  batch_queue_.reset(new SpillableRowBatchQueue(name_,
-      state->query_options().max_spilled_result_spooling_mem, state, mem_tracker(),
-      profile(), row_desc_, resource_profile_, debug_options_));
+  int64_t max_spilled_mem = state->query_options().max_spilled_result_spooling_mem;
+  // max_spilled_result_spooling_mem = 0 means unbounded.
+  if (max_spilled_mem <= 0) max_spilled_mem = INT64_MAX;
+  batch_queue_.reset(new SpillableRowBatchQueue(name_, max_spilled_mem, state,
+      mem_tracker(), profile(), row_desc_, resource_profile_, debug_options_));
   RETURN_IF_ERROR(batch_queue_->Open());
   return Status::OK();
 }
diff --git a/be/src/runtime/spillable-row-batch-queue.cc b/be/src/runtime/spillable-row-batch-queue.cc
index eb02900..ce5a4fd 100644
--- a/be/src/runtime/spillable-row-batch-queue.cc
+++ b/be/src/runtime/spillable-row-batch-queue.cc
@@ -37,7 +37,10 @@ SpillableRowBatchQueue::SpillableRowBatchQueue(const string& name,
     row_desc_(row_desc),
     resource_profile_(resource_profile),
     debug_options_(debug_options),
-    max_unpinned_bytes_(max_unpinned_bytes) {}
+    max_unpinned_bytes_(max_unpinned_bytes) {
+  DCHECK_GT(max_unpinned_bytes_, 0);
+  DCHECK_GT(resource_profile_.spillable_buffer_size, 0);
+}
 
 SpillableRowBatchQueue::~SpillableRowBatchQueue() {
   DCHECK(closed_);
diff --git a/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java b/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java
index b4a41b0..53887b7 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java
@@ -107,12 +107,20 @@ public class PlanRootSink extends DataSink {
         return;
       }
 
+      long maxSpoolingMem = queryOptions.getMax_result_spooling_mem();
+      if (maxSpoolingMem <= 0) {
+        // max_result_spooling_mem = 0 means unbounded. But instead of setting unlimited
+        // memory reservation, we fallback to the default max_result_spooling_mem.
+        TQueryOptions defaults = new TQueryOptions();
+        maxSpoolingMem = defaults.getMax_result_spooling_mem();
+        queryOptions.setMax_result_spooling_mem(maxSpoolingMem);
+      }
+
       long bufferSize = queryOptions.getDefault_spillable_buffer_size();
       long maxRowBufferSize = PlanNode.computeMaxSpillableBufferSize(
           bufferSize, queryOptions.getMax_row_size());
       long minMemReservationBytes = 2 * maxRowBufferSize;
-      long maxMemReservationBytes = Math.max(
-          queryOptions.getMax_result_spooling_mem(), minMemReservationBytes);
+      long maxMemReservationBytes = Math.max(maxSpoolingMem, minMemReservationBytes);
 
       // User might set query option scratch_limit that is lower than either of
       // minMemReservationBytes, maxMemReservationBytes, or
diff --git a/tests/query_test/test_result_spooling.py b/tests/query_test/test_result_spooling.py
index 04aa9f0..a2db1bc 100644
--- a/tests/query_test/test_result_spooling.py
+++ b/tests/query_test/test_result_spooling.py
@@ -488,6 +488,30 @@ class TestResultSpoolingMaxReservation(ImpalaTestSuite):
     exec_options['default_spillable_buffer_size'] = 8 * 1024
     self.__run_small_spilling_query(exec_options, "70.00 KB")
 
+  def test_unbounded_result_spooling_mem(self, vector):
+    """Test result spooling against unbounded MAX_RESULT_SPOOLING_MEM and
+    MAX_SPILLED_RESULT_SPOOLING_MEM. In this situation, planner should override
+    MAX_RESULT_SPOOLING_MEM to its default (100MB) and BufferedPlanRootSink should
+    assume MAX_SPILLED_RESULT_SPOOLING_MEM = INT64_MAX."""
+    exec_options = vector.get_value('exec_option')
+    exec_options['debug_action'] = vector.get_value('debug_action')
+    exec_options['spool_query_results'] = 'true'
+    exec_options['max_row_size'] = 8 * 1024
+    exec_options['max_result_spooling_mem'] = 0
+    exec_options['max_spilled_result_spooling_mem'] = 0
+    exec_options['default_spillable_buffer_size'] = 8 * 1024
+
+    query = "select * from functional.alltypes order by id limit 1500"
+    result = self.execute_query(query, exec_options)
+    assert result.success, "Failed to run {0} when result spooling is enabled" \
+        .format(query)
+
+    # Check that PLAN_ROOT_SINK's reservation limit match the default
+    # MAX_RESULT_SPOOLING_MEM.
+    plan_root_sink_reservation_limit = "PLAN_ROOT_SINK[\s\S]*?ReservationLimit: {0}" \
+        .format('100.00 MB')
+    assert re.search(plan_root_sink_reservation_limit, result.runtime_profile)
+
   def __run_small_spilling_query(self, exec_options, expected_limit):
     """Given an exec_options, test that simple query below spills and PLAN_ROOT_SINK's
     ReservationLimit match with the expected_limit"""