You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/04/26 03:18:39 UTC

[1/2] impala git commit: IMPALA-6913: Simple parser for Impala profile logs.

Repository: impala
Updated Branches:
  refs/heads/master d879fa993 -> c557a5bfb


IMPALA-6913: Simple parser for Impala profile logs.

It's hard to find an example of how to parse the profiles in Impala test
logs, so I've added it to the bin/ directory. The parser's not
full-featured, but it may be useful to others. It handles three
common cases of Thrift-encoded profiles that we see.

Change-Id: Ib1174c65b002f9b71eccb0b56f875449f00eae39
Reviewed-on: http://gerrit.cloudera.org:8080/9265
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Reviewed-by: Bharath Vissapragada <bh...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/61655340
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/61655340
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/61655340

Branch: refs/heads/master
Commit: 61655340160aa3bcb42126ed325d0caa6d38b4a8
Parents: d879fa9
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Thu Feb 8 15:11:29 2018 -0800
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 26 02:27:55 2018 +0000

----------------------------------------------------------------------
 bin/parse-thrift-profile.py | 79 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/61655340/bin/parse-thrift-profile.py
----------------------------------------------------------------------
diff --git a/bin/parse-thrift-profile.py b/bin/parse-thrift-profile.py
new file mode 100755
index 0000000..5f8485f
--- /dev/null
+++ b/bin/parse-thrift-profile.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env impala-python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Parses a base64-encoded profile provided via stdin. It accepts
+# three common formats:
+#
+# 1. Impala profile logs of the format
+#    "<ts> <queryid> <base64encoded, compressed thrift profile>"
+# 2. Just the base64-encoded compressed thrift profile
+# 3. Base-64 encoded uncompressed thrift profile.
+#
+# In all cases, the script expects one profile per line.
+#
+# For example:
+#
+# $ cat logs/cluster_test/custom_cluster_tests/profiles/impala_profile_log \
+#      | head -n 1 | awk '{ print $3 }' | parse-profile.py
+# TRuntimeProfileTree(nodes=[TRuntimeProfileNode(info_strings_display_order=....
+#
+# or
+#
+# $ bin/parse-thrift-profile.py logs/custom_cluster_tests/profiles/impala_profile_log_1.1-1523657191158
+# 2018-04-13T15:06:34.144000 e44af7f93edb8cd6:1b1f801600000000 TRuntimeProfileTree(nodes=[TRuntimeProf...
+
+
+from thrift.protocol import TCompactProtocol
+from thrift.TSerialization import deserialize
+from RuntimeProfile.ttypes import TRuntimeProfileTree
+
+import base64
+import datetime
+import sys
+import zlib
+
+if len(sys.argv) == 1 or sys.argv[1] == "-":
+  input_data = sys.stdin
+elif len(sys.argv) == 2:
+  input_data = file(sys.argv[1])
+else:
+  print >> sys.stderr, "Usage: %s [file]" % (sys.argv[0],)
+  sys.exit(1)
+
+for line in input_data:
+  space_separated = line.split(" ")
+  if len(space_separated) == 3:
+    ts = int(space_separated[0])
+    print datetime.datetime.fromtimestamp(ts/1000.0).isoformat(), space_separated[1],
+    base64_encoded = space_separated[2]
+  elif len(space_separated) == 1:
+    base64_encoded = space_separated[0]
+  else:
+    raise Exception("Unexpected line: " + line)
+  possibly_compressed = base64.b64decode(base64_encoded)
+  # Handle both compressed and uncompressed Thrift profiles
+  try:
+    thrift = zlib.decompress(possibly_compressed)
+  except zlib.error:
+    thrift = possibly_compressed
+
+  tree = TRuntimeProfileTree()
+  deserialize(tree, thrift, protocol_factory=TCompactProtocol.TCompactProtocolFactory())
+  tree.validate()
+  print tree


[2/2] impala git commit: IMPALA-6906: disable test that depends on memory estimates on S3

Posted by ta...@apache.org.
IMPALA-6906: disable test that depends on memory estimates on S3

S3 divides up scan ranges into synthetic blocks smaller than the
equivalent HDFS blocks, which in turn affects the memory estimate
calculation, so the test that was tuned for HDFS does not work
in the same way as S3.

The test is exercising an admission control code path that is
independent of the filesystem, so we don't gain important coverage by
running this on S3.

ADLS can have similar block size issues, so skip that too.

Change-Id: Ida763a402203286c02ad3cbcbed5336c70abef7c
Reviewed-on: http://gerrit.cloudera.org:8080/10207
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/c557a5bf
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/c557a5bf
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/c557a5bf

Branch: refs/heads/master
Commit: c557a5bfb7695c9e6fb506b57d1c2d952cf5c3a4
Parents: 6165534
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Apr 25 13:49:45 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 26 02:59:27 2018 +0000

----------------------------------------------------------------------
 tests/custom_cluster/test_admission_controller.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/c557a5bf/tests/custom_cluster/test_admission_controller.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_admission_controller.py b/tests/custom_cluster/test_admission_controller.py
index ab09069..5f3ee9e 100644
--- a/tests/custom_cluster/test_admission_controller.py
+++ b/tests/custom_cluster/test_admission_controller.py
@@ -30,6 +30,9 @@ from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.environ import specific_build_type_timeout, IMPALAD_BUILD
 from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.common.skip import (
+    SkipIfS3,
+    SkipIfADLS)
 from tests.common.test_dimensions import (
     create_single_exec_option_dimension,
     create_uncompressed_text_dimension)
@@ -379,6 +382,8 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite):
       assert re.search("Rejected query from pool default-pool: request memory needed "
           ".* is greater than pool max mem resources 10.00 MB", str(ex))
 
+  @SkipIfS3.hdfs_block_size
+  @SkipIfADLS.hdfs_block_size
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args=impalad_admission_ctrl_flags(max_requests=1, max_queued=1,
@@ -386,7 +391,8 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite):
       statestored_args=_STATESTORED_ARGS)
   def test_memory_rejection(self, vector):
     """Test that rejection of queries based on reservation and estimates works as
-    expected."""
+    expected. The test depends on scanner memory estimates, which different on remote
+    filesystems with different (synthetic) block sizes."""
     # Test that the query will be rejected by admission control if:
     # a) the largest per-backend min buffer reservation is larger than the query mem limit
     # b) the largest per-backend min buffer reservation is larger than the