You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by la...@apache.org on 2021/04/12 10:35:17 UTC
[impala] branch master updated (1dcd596 -> e01b631)
This is an automated email from the ASF dual-hosted git repository.
laszlog pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.
from 1dcd596 IMPALA-10643: Allow the inclusion of jetty-client
new bf4c2df IMPALA-10646: Add non-server RHEL 8 signature to toolchain bootstrap
new aa2302e IMPALA-10596: De-flake TestAdmissionControllerStress
new e01b631 IMPALA-10583: Fix bug on unbounded result spooling memory config.
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
be/src/exec/buffered-plan-root-sink.cc | 8 +++++---
be/src/runtime/spillable-row-batch-queue.cc | 5 ++++-
bin/bootstrap_toolchain.py | 7 ++++---
.../org/apache/impala/planner/PlanRootSink.java | 12 +++++++++--
tests/custom_cluster/test_admission_controller.py | 4 ++++
tests/query_test/test_result_spooling.py | 24 ++++++++++++++++++++++
6 files changed, 51 insertions(+), 9 deletions(-)
[impala] 01/03: IMPALA-10646: Add non-server RHEL 8 signature to
toolchain bootstrap
Posted by la...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit bf4c2dfb3893d77bc288bd683e7a3b304377fe8c
Author: Laszlo Gaal <la...@cloudera.com>
AuthorDate: Thu Apr 8 23:54:29 2021 +0200
IMPALA-10646: Add non-server RHEL 8 signature to toolchain bootstrap
Recent Red Hat Enterprise Linux 8.x version return a shorter release
name string from `lsb_release -si` than earlier versions. This shorter
string was not recognized in the OS mapper logic in
bin/bootstrap_toolchain.py, makig it -- and the build process -- break
on Red Hat 8.2
The patch adds the shorter signature as a point fix.
Rename a local variable to fix an unrelated name conflict (shadowing)
found by flake8.
Tests: run bin/bootstrap_toolchain.py manually on Red Hat 8.2, then run
a complete build on the same OS.
Regression-tested (build and dataload only) on the following versions:
- Centos 8.2 (as opposed to Red Hat 8.2)
- Centos 7.9
- Ubuntu 18.04
Change-Id: Icb1a6c215b1b5a65691042bb7d94fb034392d135
Reviewed-on: http://gerrit.cloudera.org:8080/17292
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
bin/bootstrap_toolchain.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index baef199..9ca71cb 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -82,6 +82,7 @@ OS_MAPPING = [
OsMapping("redhatenterpriseserver5", "ec2-package-centos-5", None),
OsMapping("redhatenterpriseserver6", "ec2-package-centos-6", "redhat6"),
OsMapping("redhatenterpriseserver7", "ec2-package-centos-7", "redhat7"),
+ OsMapping("redhatenterprise8", "ec2-package-centos-8", "redhat8"),
OsMapping("redhatenterpriseserver8", "ec2-package-centos-8", "redhat8"),
OsMapping("debian6", "ec2-package-debian-6", None),
OsMapping("debian7", "ec2-package-debian-7", None),
@@ -364,9 +365,9 @@ def get_platform_release_label(release=None):
else:
lsb_release = check_output(["lsb_release", "-irs"])
release = "".join(map(lambda x: x.lower(), lsb_release.split()))
- # Only need to check against the major release if RHEL or CentOS
- for platform in ['centos', 'redhatenterpriseserver', 'suse']:
- if platform in release:
+ # Only need to check against the major release if RHEL, CentOS or Suse
+ for distro in ['centos', 'redhatenterprise', 'redhatenterpriseserver', 'suse']:
+ if distro in release:
release = release.split('.')[0]
break
lsb_release_cache = release
[impala] 02/03: IMPALA-10596: De-flake TestAdmissionControllerStress
Posted by la...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit aa2302e53b0ba9ab493025f3c8aedb5401fb2360
Author: Bikramjeet Vig <bi...@gmail.com>
AuthorDate: Mon Apr 5 20:24:19 2021 -0700
IMPALA-10596: De-flake TestAdmissionControllerStress
Currently some tests in TestAdmissionControllerStress fail because
they rely on queries holding onto resources on backends till they
are explicitly closed. This was done by keeping the query alive by
fetching very limited rows in intervals. After results spooling was
turned on by default, the queries started to finish early and
released their resources on other backends which let queued queries
to get admitted. This patch turns off result spooling for the queries
to avoid this situation.
Testing:
Ran test in a loop on my local dev machine.
Change-Id: I1594a82f507db8dc094d4441dd8739d037f974ff
Reviewed-on: http://gerrit.cloudera.org:8080/17272
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
tests/custom_cluster/test_admission_controller.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tests/custom_cluster/test_admission_controller.py b/tests/custom_cluster/test_admission_controller.py
index 2b9ca02..4445942 100644
--- a/tests/custom_cluster/test_admission_controller.py
+++ b/tests/custom_cluster/test_admission_controller.py
@@ -1763,6 +1763,10 @@ class TestAdmissionControllerStress(TestAdmissionControllerBase):
exec_options = self.vector.get_value('exec_option')
exec_options.update(self.additional_query_options)
+ # Turning off result spooling allows us to better control query execution by
+ # controlling the number or rows fetched. This allows us to maintain resource
+ # usage among backends.
+ exec_options['spool_query_results'] = 0
query = QUERY.format(self.query_num)
self.query_state = 'SUBMITTING'
client = self.impalad.service.create_beeswax_client()
[impala] 03/03: IMPALA-10583: Fix bug on unbounded result spooling
memory config.
Posted by la...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit e01b6312593197749a17dd6895f976c35eac5769
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Fri Mar 12 09:37:41 2021 -0800
IMPALA-10583: Fix bug on unbounded result spooling memory config.
Two bugs happening on result spooling when we set two of its query
options to 0 (unbounded).
The first bug happens if max_spilled_result_spooling_mem =
0 (unbounded). max_unpinned_bytes_ in SpillableRowBatchQueue will be set
to 0, SpillableRowBatchQueue::IsFull() then will always return true, and
the query hang. This patch fix it by setting max_unpinned_bytes_ to
INT64_MAX if max_spilled_result_spooling_mem = 0.
The second bug happens if we set max_result_spooling_mem =
0 (unbounded). PlanRootSink.java will peg maxMemReservationBytes to
always equal to minMemReservationBytes. This patch fixes this by
reverting to the default max_result_spooling_mem (100MB).
Testing:
- Add test_unbounded_result_spooling_mem.
- Pass core tests.
Change-Id: If8f5e3668281bba8813f8082f45b4faa7721530e
Reviewed-on: http://gerrit.cloudera.org:8080/17187
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exec/buffered-plan-root-sink.cc | 8 +++++---
be/src/runtime/spillable-row-batch-queue.cc | 5 ++++-
.../org/apache/impala/planner/PlanRootSink.java | 12 +++++++++--
tests/query_test/test_result_spooling.py | 24 ++++++++++++++++++++++
4 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/be/src/exec/buffered-plan-root-sink.cc b/be/src/exec/buffered-plan-root-sink.cc
index daaa97e..1cb148b 100644
--- a/be/src/exec/buffered-plan-root-sink.cc
+++ b/be/src/exec/buffered-plan-root-sink.cc
@@ -50,9 +50,11 @@ Status BufferedPlanRootSink::Open(RuntimeState* state) {
RETURN_IF_ERROR(DebugAction(state->query_options(), "BPRS_BEFORE_OPEN"));
RETURN_IF_ERROR(DataSink::Open(state));
current_batch_ = make_unique<RowBatch>(row_desc_, state->batch_size(), mem_tracker());
- batch_queue_.reset(new SpillableRowBatchQueue(name_,
- state->query_options().max_spilled_result_spooling_mem, state, mem_tracker(),
- profile(), row_desc_, resource_profile_, debug_options_));
+ int64_t max_spilled_mem = state->query_options().max_spilled_result_spooling_mem;
+ // max_spilled_result_spooling_mem = 0 means unbounded.
+ if (max_spilled_mem <= 0) max_spilled_mem = INT64_MAX;
+ batch_queue_.reset(new SpillableRowBatchQueue(name_, max_spilled_mem, state,
+ mem_tracker(), profile(), row_desc_, resource_profile_, debug_options_));
RETURN_IF_ERROR(batch_queue_->Open());
return Status::OK();
}
diff --git a/be/src/runtime/spillable-row-batch-queue.cc b/be/src/runtime/spillable-row-batch-queue.cc
index eb02900..ce5a4fd 100644
--- a/be/src/runtime/spillable-row-batch-queue.cc
+++ b/be/src/runtime/spillable-row-batch-queue.cc
@@ -37,7 +37,10 @@ SpillableRowBatchQueue::SpillableRowBatchQueue(const string& name,
row_desc_(row_desc),
resource_profile_(resource_profile),
debug_options_(debug_options),
- max_unpinned_bytes_(max_unpinned_bytes) {}
+ max_unpinned_bytes_(max_unpinned_bytes) {
+ DCHECK_GT(max_unpinned_bytes_, 0);
+ DCHECK_GT(resource_profile_.spillable_buffer_size, 0);
+}
SpillableRowBatchQueue::~SpillableRowBatchQueue() {
DCHECK(closed_);
diff --git a/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java b/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java
index b4a41b0..53887b7 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java
@@ -107,12 +107,20 @@ public class PlanRootSink extends DataSink {
return;
}
+ long maxSpoolingMem = queryOptions.getMax_result_spooling_mem();
+ if (maxSpoolingMem <= 0) {
+ // max_result_spooling_mem = 0 means unbounded. But instead of setting unlimited
+ // memory reservation, we fallback to the default max_result_spooling_mem.
+ TQueryOptions defaults = new TQueryOptions();
+ maxSpoolingMem = defaults.getMax_result_spooling_mem();
+ queryOptions.setMax_result_spooling_mem(maxSpoolingMem);
+ }
+
long bufferSize = queryOptions.getDefault_spillable_buffer_size();
long maxRowBufferSize = PlanNode.computeMaxSpillableBufferSize(
bufferSize, queryOptions.getMax_row_size());
long minMemReservationBytes = 2 * maxRowBufferSize;
- long maxMemReservationBytes = Math.max(
- queryOptions.getMax_result_spooling_mem(), minMemReservationBytes);
+ long maxMemReservationBytes = Math.max(maxSpoolingMem, minMemReservationBytes);
// User might set query option scratch_limit that is lower than either of
// minMemReservationBytes, maxMemReservationBytes, or
diff --git a/tests/query_test/test_result_spooling.py b/tests/query_test/test_result_spooling.py
index 04aa9f0..a2db1bc 100644
--- a/tests/query_test/test_result_spooling.py
+++ b/tests/query_test/test_result_spooling.py
@@ -488,6 +488,30 @@ class TestResultSpoolingMaxReservation(ImpalaTestSuite):
exec_options['default_spillable_buffer_size'] = 8 * 1024
self.__run_small_spilling_query(exec_options, "70.00 KB")
+ def test_unbounded_result_spooling_mem(self, vector):
+ """Test result spooling against unbounded MAX_RESULT_SPOOLING_MEM and
+ MAX_SPILLED_RESULT_SPOOLING_MEM. In this situation, planner should override
+ MAX_RESULT_SPOOLING_MEM to its default (100MB) and BufferedPlanRootSink should
+ assume MAX_SPILLED_RESULT_SPOOLING_MEM = INT64_MAX."""
+ exec_options = vector.get_value('exec_option')
+ exec_options['debug_action'] = vector.get_value('debug_action')
+ exec_options['spool_query_results'] = 'true'
+ exec_options['max_row_size'] = 8 * 1024
+ exec_options['max_result_spooling_mem'] = 0
+ exec_options['max_spilled_result_spooling_mem'] = 0
+ exec_options['default_spillable_buffer_size'] = 8 * 1024
+
+ query = "select * from functional.alltypes order by id limit 1500"
+ result = self.execute_query(query, exec_options)
+ assert result.success, "Failed to run {0} when result spooling is enabled" \
+ .format(query)
+
+ # Check that PLAN_ROOT_SINK's reservation limit match the default
+ # MAX_RESULT_SPOOLING_MEM.
+ plan_root_sink_reservation_limit = "PLAN_ROOT_SINK[\s\S]*?ReservationLimit: {0}" \
+ .format('100.00 MB')
+ assert re.search(plan_root_sink_reservation_limit, result.runtime_profile)
+
def __run_small_spilling_query(self, exec_options, expected_limit):
"""Given an exec_options, test that simple query below spills and PLAN_ROOT_SINK's
ReservationLimit match with the expected_limit"""