You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by kw...@apache.org on 2018/01/27 00:03:40 UTC

[1/3] impala git commit: IMPALA-6441: update explain string for stress test

Repository: impala
Updated Branches:
  refs/heads/master 0fdd81682 -> 47959067e


IMPALA-6441: update explain string for stress test

- Update the explain string pattern match that the stress test uses as a
  binary search start point.
- Make matching code importable and testable.
- Add system test (no vectors needed) to ensure the stress test will
  always be able to find the correct string. It was failing before I
  updated the pattern.

Change-Id: I39af1af8809a6e6b99293798afb0a96b0c02c224
Reviewed-on: http://gerrit.cloudera.org:8080/9141
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/2ee2c4fd
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/2ee2c4fd
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/2ee2c4fd

Branch: refs/heads/master
Commit: 2ee2c4fdb9b75d4acadd73627d09193c0198ade8
Parents: 0fdd816
Author: Michael Brown <mi...@cloudera.com>
Authored: Thu Jan 25 10:09:51 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Jan 26 01:27:41 2018 +0000

----------------------------------------------------------------------
 tests/metadata/test_explain.py    | 23 +++++++++++++++++++-
 tests/stress/__init__.py          |  0
 tests/stress/concurrent_select.py | 38 ++++++++++++++++++++++++++++------
 3 files changed, 54 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/2ee2c4fd/tests/metadata/test_explain.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_explain.py b/tests/metadata/test_explain.py
index 9a602db..22fc177 100644
--- a/tests/metadata/test_explain.py
+++ b/tests/metadata/test_explain.py
@@ -17,11 +17,13 @@
 
 # Functional tests running EXPLAIN statements.
 #
-import pytest
 import re
 
+from decimal import Decimal
+
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfLocal, SkipIfNotHdfsMinicluster
+from tests.stress.concurrent_select import match_memory_estimate
 from tests.util.filesystem_utils import WAREHOUSE
 
 # Tests the different explain levels [0-3] on a few queries.
@@ -176,3 +178,22 @@ class TestExplainEmptyPartition(ImpalaTestSuite):
     assert "missing relevant table and/or column statistics" in explain_result
     # Also test IMPALA-1530 - adding the number of partitions missing stats
     assert "partitions: 1/2 " in explain_result
+
+
+class TestInfraIntegration(ImpalaTestSuite):
+  """
+  This is a test suite to ensure separate test tooling in Python is compatible with the
+  product.
+  """
+  def test_stress_binary_search_start_point(self):
+    """
+    Test that the stress test can use EXPLAIN to find the start point for its binary
+    search.
+    """
+    result = self.client.execute("explain select 1")
+    mem_limit, units = match_memory_estimate(result.data)
+    assert isinstance(units, str) and units.upper() in ('T', 'G', 'M', 'K', ''), (
+        'unexpected units {u} from explain memory estimation\n{output}:'.format(
+            u=units, output='\n'.join(result.data)))
+    assert Decimal(mem_limit) >= 0, (
+        'unexpected value from explain\n:' + '\n'.join(result.data))

http://git-wip-us.apache.org/repos/asf/impala/blob/2ee2c4fd/tests/stress/__init__.py
----------------------------------------------------------------------
diff --git a/tests/stress/__init__.py b/tests/stress/__init__.py
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/impala/blob/2ee2c4fd/tests/stress/concurrent_select.py
----------------------------------------------------------------------
diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py
index ac86deb..69a4434 100755
--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -90,7 +90,8 @@ MEM_LIMIT_EQ_THRESHOLD_PC = 0.975
 MEM_LIMIT_EQ_THRESHOLD_MB = 50
 
 # Regex to extract the estimated memory from an explain plan.
-MEM_ESTIMATE_PATTERN = re.compile(r"Estimated.*Memory=(\d+.?\d*)(T|G|M|K)?B")
+MEM_ESTIMATE_PATTERN = re.compile(
+    r"Per-Host Resource Estimates: Memory=(\d+.?\d*)(T|G|M|K)?B")
 
 PROFILES_DIR = "profiles"
 RESULT_HASHES_DIR = "result_hashes"
@@ -1342,6 +1343,34 @@ def populate_runtime_info(
   LOG.debug("Query after populating runtime info: %s", query)
 
 
+def match_memory_estimate(explain_lines):
+  """
+  Given a list of strings from EXPLAIN output, find the estimated memory needed. This is
+  used as a binary search start point.
+
+  Params:
+    explain_lines: list of str
+
+  Returns:
+    2-tuple str of memory limit in decimal string and units (one of 'T', 'G', 'M', 'K',
+    '' bytes)
+
+  Raises:
+    Exception if no match found
+  """
+  # IMPALA-6441: This method is a public, first class method so it can be importable and
+  # tested with actual EXPLAIN output to make sure we always find the start point.
+  mem_limit, units = None, None
+  for line in explain_lines:
+    regex_result = MEM_ESTIMATE_PATTERN.search(line)
+    if regex_result:
+      mem_limit, units = regex_result.groups()
+      break
+  if None in (mem_limit, units):
+    raise Exception('could not parse explain string:\n' + '\n'.join(explain_lines))
+  return mem_limit, units
+
+
 def estimate_query_mem_mb_usage(query, query_runner):
   """Runs an explain plan then extracts and returns the estimated memory needed to run
   the query.
@@ -1355,11 +1384,8 @@ def estimate_query_mem_mb_usage(query, query_runner):
       return
     LOG.debug("Explaining query\n%s", query.sql)
     cursor.execute('EXPLAIN ' + query.sql)
-    first_val = cursor.fetchone()[0]
-    regex_result = MEM_ESTIMATE_PATTERN.search(first_val)
-    if not regex_result:
-      return
-    mem_limit, units = regex_result.groups()
+    explain_lines = cursor.fetchall()
+    mem_limit, units = match_memory_estimate(explain_lines)
     return parse_mem_to_mb(mem_limit, units)
 
 


[3/3] impala git commit: IMPALA-6356: Reduce amount of logging from RpczStore::LogTrace()

Posted by kw...@apache.org.
IMPALA-6356: Reduce amount of logging from RpczStore::LogTrace()

This change bumps the threshold of RPC duration above which a RPC
is logged. It's increased from 1 second to 2 minutes which is
a conservative value in order to reduce the amount of logging from
RpczStore::LogTrace() when an Impala demon is busy.

Change-Id: I347b0dea641368e10ba84bc40ec250c26a4f43b2
Reviewed-on: http://gerrit.cloudera.org:8080/9125
Reviewed-by: Michael Ho <kw...@cloudera.com>
Reviewed-by: Mostafa Mokhtar <mm...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/47959067
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/47959067
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/47959067

Branch: refs/heads/master
Commit: 47959067ea79b3406ffc411a95362708514c31b2
Parents: 5ca603a
Author: Michael Ho <kw...@cloudera.com>
Authored: Wed Jan 24 13:58:02 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Jan 26 23:07:47 2018 +0000

----------------------------------------------------------------------
 be/src/rpc/rpc-mgr.cc | 6 ++++++
 1 file changed, 6 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/47959067/be/src/rpc/rpc-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/rpc-mgr.cc b/be/src/rpc/rpc-mgr.cc
index 91978bb..d4e8fe1 100644
--- a/be/src/rpc/rpc-mgr.cc
+++ b/be/src/rpc/rpc-mgr.cc
@@ -44,6 +44,9 @@ DECLARE_string(hostname);
 DECLARE_string(principal);
 DECLARE_string(be_principal);
 
+// Defined in kudu/rpc/rpcz_store.cc
+DECLARE_int32(rpc_duration_too_long_ms);
+
 DEFINE_int32(num_acceptor_threads, 2,
     "Number of threads dedicated to accepting connection requests for RPC services");
 DEFINE_int32(num_reactor_threads, 0,
@@ -55,6 +58,9 @@ DEFINE_int32(rpc_retry_interval_ms, 5,
 namespace impala {
 
 Status RpcMgr::Init() {
+  // Log any RPCs which take longer than 2 minutes.
+  FLAGS_rpc_duration_too_long_ms = 2 * 60 * 1000;
+
   MessengerBuilder bld("impala-server");
   const scoped_refptr<MetricEntity> entity(
       METRIC_ENTITY_server.Instantiate(&registry_, "krpc-metrics"));


[2/3] impala git commit: IMPALA-6410: compare_branches: use looser expression

Posted by kw...@apache.org.
IMPALA-6410: compare_branches: use looser expression

We've already got one use of "Cherry-pick:" instead of "Cherry-picks:"
in master, so I'm loosening the regular expression a bit. (And
converting the string search into a case-insensitive regexp search.)

I tested this by running it manually and inspecting results.

Change-Id: Ie3f75d9e01d2760571547b1a1a5f42bbc8455a05
Reviewed-on: http://gerrit.cloudera.org:8080/9135
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5ca603a3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5ca603a3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5ca603a3

Branch: refs/heads/master
Commit: 5ca603a3769d23c2085bc37d6261517cf0bbcb6e
Parents: 2ee2c4f
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Thu Jan 25 09:05:12 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Jan 26 22:45:57 2018 +0000

----------------------------------------------------------------------
 bin/compare_branches.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/5ca603a3/bin/compare_branches.py
----------------------------------------------------------------------
diff --git a/bin/compare_branches.py b/bin/compare_branches.py
index 7050924..fddb17e 100755
--- a/bin/compare_branches.py
+++ b/bin/compare_branches.py
@@ -103,8 +103,9 @@ def create_parser():
       os.path.dirname(os.path.abspath(__file__)), 'ignored_commits.json')
   parser.add_argument('--ignored_commits_file', default=default_ignored_commits_path,
       help='JSON File that contains ignored commits as specified in the help')
-  parser.add_argument('--skip_commits_matching', default="Cherry-picks: not for {branch}",
-      help='String in commit messages that causes the commit to be ignored. ' +
+  parser.add_argument('--skip_commits_matching',
+      default="Cherry-pick.?:.?not (for|to) {branch}",
+      help='Regex searched for in commit messages that causes the commit to be ignored.' +
            ' {branch} is replaced with target branch; the search is case-insensitive')
   parser.add_argument('--verbose', '-v', action='store_true', default=False,
       help='Turn on DEBUG and INFO logging')
@@ -232,14 +233,14 @@ def main():
   print '-' * 80
   jira_keys = []
   jira_key_pat = re.compile(r'(IMPALA-\d+)')
-  skip_commits_matching = options.skip_commits_matching.replace(
-      "{branch}", options.target_branch)
+  skip_commits_matching = options.skip_commits_matching.format(
+      branch=options.target_branch)
   for change_id, (commit_hash, msg, author, date, body) in source_commits.iteritems():
     change_in_target = change_id in target_commits
     ignore_by_config = commit_hash in ignored_commits[
         (options.source_branch, options.target_branch)]
-    ignore_by_commit_message = skip_commits_matching.lower() in msg.lower() \
-        or skip_commits_matching.lower() in body.lower()
+    ignore_by_commit_message = re.search(skip_commits_matching, "\n".join([msg, body]),
+        re.IGNORECASE)
     # This conditional block just for debug logging of ignored commits
     if ignore_by_config or ignore_by_commit_message:
       if change_in_target: