You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/08/04 23:16:22 UTC

[impala] branch master updated (0a13029 -> 15afacd)

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 0a13029  IMPALA-8125: Add query option to limit number of hdfs writer instances
     new e187c40  IMPALA-9909: Print body of http error code in Impala Shell.
     new 15afacd  IMPALA-9984: Implement codegen for TupleIsNullPredicate

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/codegen/gen_ir_descriptions.py              |   1 +
 be/src/codegen/impala-ir.cc                        |   1 +
 be/src/exprs/CMakeLists.txt                        |   1 +
 .../tuple-is-null-predicate-ir.cc}                 |   8 +-
 be/src/exprs/tuple-is-null-predicate.cc            | 121 ++++++++++++++++++++-
 be/src/exprs/tuple-is-null-predicate.h             |   6 +-
 shell/ImpalaHttpClient.py                          |  10 +-
 tests/shell/test_shell_interactive.py              | 105 ++++++++++++++----
 8 files changed, 223 insertions(+), 30 deletions(-)
 copy be/src/{runtime/tuple-row.cc => exprs/tuple-is-null-predicate-ir.cc} (87%)


[impala] 02/02: IMPALA-9984: Implement codegen for TupleIsNullPredicate

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 15afacd124b2f889d42e1fc2db9b03e9ba9a6121
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Tue Jul 21 11:12:19 2020 +0200

    IMPALA-9984: Implement codegen for TupleIsNullPredicate
    
    This commit implements proper codegen for TupleIsNullPredicate.
    
    Change-Id: I410aa7ec762ca16f455bd7da1dce763c1a7b156e
    Reviewed-on: http://gerrit.cloudera.org:8080/16227
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/codegen/gen_ir_descriptions.py      |   1 +
 be/src/codegen/impala-ir.cc                |   1 +
 be/src/exprs/CMakeLists.txt                |   1 +
 be/src/exprs/tuple-is-null-predicate-ir.cc |  24 ++++++
 be/src/exprs/tuple-is-null-predicate.cc    | 121 ++++++++++++++++++++++++++++-
 be/src/exprs/tuple-is-null-predicate.h     |   6 +-
 6 files changed, 152 insertions(+), 2 deletions(-)

diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py
index fbbca0d..f085a9c 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -230,6 +230,7 @@ ir_functions = [
   ["DOUBLE_MIN_MAX_FILTER_INSERT", "_ZN6impala18DoubleMinMaxFilter6InsertEPKv"],
   ["STRING_MIN_MAX_FILTER_INSERT", "_ZN6impala18StringMinMaxFilter6InsertEPKv"],
   ["TIMESTAMP_MIN_MAX_FILTER_INSERT", "_ZN6impala21TimestampMinMaxFilter6InsertEPKv"],
+  ["TUPLE_ROW_GET_TUPLE_IS_NULL", "_ZN6impala19TupleRowTupleIsNullEPKNS_8TupleRowEi"],
   ["DATE_MIN_MAX_FILTER_INSERT", "_ZN6impala16DateMinMaxFilter6InsertEPKv"],
   ["DECIMAL_MIN_MAX_FILTER_INSERT4", "_ZN6impala19DecimalMinMaxFilter7Insert4EPKv"],
   ["DECIMAL_MIN_MAX_FILTER_INSERT8", "_ZN6impala19DecimalMinMaxFilter7Insert8EPKv"],
diff --git a/be/src/codegen/impala-ir.cc b/be/src/codegen/impala-ir.cc
index 2fdc82f..c5bc441 100644
--- a/be/src/codegen/impala-ir.cc
+++ b/be/src/codegen/impala-ir.cc
@@ -56,6 +56,7 @@
 #include "exprs/scalar-expr-ir.cc"
 #include "exprs/string-functions-ir.cc"
 #include "exprs/timestamp-functions-ir.cc"
+#include "exprs/tuple-is-null-predicate-ir.cc"
 #include "exprs/udf-builtins-ir.cc"
 #include "exprs/utility-functions-ir.cc"
 #include "runtime/krpc-data-stream-sender-ir.cc"
diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt
index 649be16..38de9ef 100644
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -64,6 +64,7 @@ add_library(Exprs
   timestamp-functions-ir.cc
   timezone_db.cc
   tuple-is-null-predicate.cc
+  tuple-is-null-predicate-ir.cc
   scalar-fn-call.cc
   udf-builtins.cc
   udf-builtins-ir.cc
diff --git a/be/src/exprs/tuple-is-null-predicate-ir.cc b/be/src/exprs/tuple-is-null-predicate-ir.cc
new file mode 100644
index 0000000..9497724
--- /dev/null
+++ b/be/src/exprs/tuple-is-null-predicate-ir.cc
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "runtime/tuple-row.h"
+
+namespace impala {
+bool TupleRowTupleIsNull(const TupleRow* row, int index) {
+  return row->GetTuple(index) == nullptr;
+}
+}
diff --git a/be/src/exprs/tuple-is-null-predicate.cc b/be/src/exprs/tuple-is-null-predicate.cc
index 4225863..6289f84 100644
--- a/be/src/exprs/tuple-is-null-predicate.cc
+++ b/be/src/exprs/tuple-is-null-predicate.cc
@@ -22,6 +22,8 @@
 #include "gen-cpp/Exprs_types.h"
 
 #include "common/names.h"
+#include "codegen/llvm-codegen.h"
+#include "codegen/codegen-anyval.h"
 #include "runtime/descriptors.h"
 #include "runtime/tuple-row.h"
 
@@ -61,9 +63,126 @@ Status TupleIsNullPredicate::Init(
   return Status::OK();
 }
 
+/// For sample IR, see 'FillCodegendComputeFnConstantFalse' and
+/// 'FillCodegendComputeFnNonConstant'.
+/// If there is at least one non-nullable tuple, we codegen a function that returns a
+/// constant false BooleanValue.
 Status TupleIsNullPredicate::GetCodegendComputeFnImpl(LlvmCodeGen* codegen,
     llvm::Function** fn) {
-  return GetCodegendComputeFnWrapper(codegen, fn);
+  DCHECK_EQ(0, GetNumChildren());
+
+  llvm::Value* args[2];
+  llvm::Function* function = CreateIrFunctionPrototype(
+      "TupleIsNullPredicate", codegen, &args);
+
+  if (tuple_idxs_.size() < tuple_ids_.size()) {
+    FillCodegendComputeFnConstantFalse(codegen, function);
+  } else {
+    FillCodegendComputeFnNonConstant(codegen, function, args);
+  }
+
+  *fn = codegen->FinalizeFunction(function);
+  if (UNLIKELY(*fn == nullptr)) {
+    return Status(TErrorCode::IR_VERIFY_FAILED, "TupleIsNullPredicate");
+  }
+  return Status::OK();
+}
+
+/// Sample IR:
+/// define i16 @TupleIsNullPredicate(%"class.impala::ScalarExprEvaluator"* %eval,
+///                                  %"class.impala::TupleRow"* %row) #50 {
+/// return_false:
+///   ret i16 0
+/// }
+void TupleIsNullPredicate::FillCodegendComputeFnConstantFalse(
+    LlvmCodeGen* codegen, llvm::Function* function) const {
+  llvm::LLVMContext& context = codegen->context();
+  LlvmBuilder builder(context);
+
+  llvm::BasicBlock* return_false_block =
+      llvm::BasicBlock::Create(context, "return_false", function);
+  builder.SetInsertPoint(return_false_block);
+
+  CodegenAnyVal ret_val = CodegenAnyVal::GetNonNullVal(
+      codegen, &builder, ColumnType(TYPE_BOOLEAN), "ret_val");
+  ret_val.SetVal(false);
+  builder.CreateRet(ret_val.GetLoweredValue());
+}
+
+/// Sample IR:
+/// define i16 @TupleIsNullPredicate(%"class.impala::ScalarExprEvaluator"* %eval,
+///                                  %"class.impala::TupleRow"* %row) #51 {
+/// entry:
+///   br label %check_null
+///
+/// check_null:                                       ; preds = %entry
+///   %tuple_is_null_fn = call i1 @_ZN6impala19TupleRowTupleIsNullEPKNS_8TupleRowEi(
+///       %"class.impala::TupleRow"* %row, i32 0)
+///   %0 = select i1 %tuple_is_null_fn, i32 1, i32 0
+///   %count = add i32 0, %0
+///   br label %check_null1
+///
+/// check_null1:                                      ; preds = %check_null
+///   %tuple_is_null_fn2 = call i1 @_ZN6impala19TupleRowTupleIsNullEPKNS_8TupleRowEi(
+///       %"class.impala::TupleRow"* %row, i32 1)
+///   %1 = select i1 %tuple_is_null_fn2, i32 1, i32 0
+///   %count3 = add i32 %count, %1
+///   br label %compare_count
+///
+/// compare_count:                                    ; preds = %check_null1
+///   %count_eq_col_num = icmp eq i32 %count3, 2
+///   %2 = zext i1 %count_eq_col_num to i16
+///   %3 = shl i16 %2, 8
+///   %ret_val = or i16 0, %3
+///   ret i16 %ret_val
+/// }
+void TupleIsNullPredicate::FillCodegendComputeFnNonConstant(
+    LlvmCodeGen* codegen, llvm::Function* function, llvm::Value* args[2]) const {
+  llvm::LLVMContext& context = codegen->context();
+  LlvmBuilder builder(context);
+
+  llvm::BasicBlock* entry_block =
+      llvm::BasicBlock::Create(context, "entry", function);
+  builder.SetInsertPoint(entry_block);
+
+  // Signature:
+  // bool TupleRowTupleIsNull(const TupleRow* row, int index);
+  // Returns whether row->GetTuple(index) is nullptr.
+  llvm::Function* tuple_is_null_fn =
+      codegen->GetFunction(IRFunction::TUPLE_ROW_GET_TUPLE_IS_NULL, false);
+
+  llvm::Value* current_count_val = codegen->GetI32Constant(0);
+  for (int i = 0; i < tuple_idxs_.size(); i++) {
+    // We create the next block that checks whether the next tuple is null and increases
+    // 'count' if it is. While the builder's insertion point is still in the previous
+    // block we insert a branch to the new block.
+    llvm::BasicBlock* check_null_block
+        = llvm::BasicBlock::Create(context, "check_null", function);
+    builder.CreateBr(check_null_block);
+
+    builder.SetInsertPoint(check_null_block);
+    llvm::Value* tuple_is_null = builder.CreateCall(tuple_is_null_fn,
+        {args[1], codegen->GetI32Constant(tuple_idxs_[i])}, "tuple_is_null_fn");
+    llvm::Value* tuple_is_null_int = builder.CreateSelect(tuple_is_null,
+        codegen->GetI32Constant(1), codegen->GetI32Constant(0));
+    current_count_val = builder.CreateAdd(current_count_val, tuple_is_null_int, "count");
+  }
+
+  // Create the last block that will compare 'count' with the original number of tuples.
+  // If they are equal, return a true value, otherwise a false value. While the builder's
+  // insertion point is still in the previous block we insert a branch to the new block.
+  llvm::BasicBlock* compare_count_block
+      = llvm::BasicBlock::Create(context, "compare_count", function);
+  builder.CreateBr(compare_count_block);
+
+  builder.SetInsertPoint(compare_count_block);
+  llvm::Constant* tuple_ids_size = codegen->GetI32Constant(tuple_ids_.size());
+  llvm::Value* cmp =
+      builder.CreateICmpEQ(current_count_val, tuple_ids_size, "count_eq_col_num");
+  CodegenAnyVal ret_val = CodegenAnyVal::GetNonNullVal(
+      codegen, &builder, ColumnType(TYPE_BOOLEAN), "ret_val");
+  ret_val.SetVal(cmp);
+  builder.CreateRet(ret_val.GetLoweredValue());
 }
 
 string TupleIsNullPredicate::DebugString() const {
diff --git a/be/src/exprs/tuple-is-null-predicate.h b/be/src/exprs/tuple-is-null-predicate.h
index e028ca3..4d1c0b3 100644
--- a/be/src/exprs/tuple-is-null-predicate.h
+++ b/be/src/exprs/tuple-is-null-predicate.h
@@ -32,7 +32,6 @@ class TExprNode;
 /// on non-nullable tuples (see IMPALA-904/IMPALA-5504). This happens for exprs that
 /// are evaluated before the outer join that makes the tuples given to this predicate
 /// nullable, e.g., in the ON-clause of that join.
-/// TODO: Implement codegen to eliminate overhead on non-nullable tuples.
 class TupleIsNullPredicate: public Predicate {
  protected:
   friend class ScalarExpr;
@@ -49,6 +48,11 @@ class TupleIsNullPredicate: public Predicate {
       ScalarExprEvaluator*, const TupleRow*) const override;
 
  private:
+  void FillCodegendComputeFnConstantFalse(
+      LlvmCodeGen* codegen, llvm::Function* function) const;
+  void FillCodegendComputeFnNonConstant(
+      LlvmCodeGen* codegen, llvm::Function* function, llvm::Value* args[2]) const;
+
   /// Tuple ids to check for NULL. May contain ids of nullable and non-nullable tuples.
   std::vector<TupleId> tuple_ids_;
 


[impala] 01/02: IMPALA-9909: Print body of http error code in Impala Shell.

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e187c4054340ed8f00c35711185a2fd6719f48b3
Author: Andrew Sherman <as...@cloudera.com>
AuthorDate: Wed May 20 11:57:22 2020 -0700

    IMPALA-9909: Print body of http error code in Impala Shell.
    
    Make Impala Shell closer to Impyla by printing the body of any http
    error code message received when using hs2-over-http. The common case is
    that there is nothing in the body, in which case the behavior is
    unchanged.
    
    TESTING
     Added a test for the new functionality.
     Ran all end-to-end tests.
    
    Change-Id: Iabc45eda0b87ca694b8359148cda6a7c1d5a8fff
    Reviewed-on: http://gerrit.cloudera.org:8080/16269
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 shell/ImpalaHttpClient.py             |  10 +++-
 tests/shell/test_shell_interactive.py | 105 ++++++++++++++++++++++++++--------
 2 files changed, 90 insertions(+), 25 deletions(-)

diff --git a/shell/ImpalaHttpClient.py b/shell/ImpalaHttpClient.py
index cd79971..1820a1b 100644
--- a/shell/ImpalaHttpClient.py
+++ b/shell/ImpalaHttpClient.py
@@ -36,6 +36,7 @@ import six
 # The current changes that have been applied:
 # - Added logic for the 'Expect: 100-continue' header on large requests
 # - If an error code is received back in flush(), an exception is thrown.
+# Note there is a copy of this code in Impyla.
 class ImpalaHttpClient(TTransportBase):
   """Http implementation of TTransport base."""
 
@@ -151,6 +152,9 @@ class ImpalaHttpClient(TTransportBase):
   def read(self, sz):
     return self.__http_response.read(sz)
 
+  def readBody(self):
+    return self.__http_response.read()
+
   def write(self, buf):
     self.__wbuf.write(buf)
 
@@ -208,4 +212,8 @@ class ImpalaHttpClient(TTransportBase):
     if self.code >= 300:
       # Report any http response code that is not 1XX (informational response) or
       # 2XX (successful).
-      raise RPCException("HTTP code {}: {}".format(self.code, self.message))
+      body = self.readBody()
+      if not body:
+        raise RPCException("HTTP code {}: {}".format(self.code, self.message))
+      else:
+        raise RPCException("HTTP code {}: {} [{}]".format(self.code, self.message, body))
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index 245fb9b..f9668d6 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -76,43 +76,85 @@ def tmp_history_file(request):
   return tmp.name
 
 
-@pytest.yield_fixture
-def http_503_server():
-  class RequestHandler503(SimpleHTTPServer.SimpleHTTPRequestHandler):
-    """A custom http handler that checks for duplicate 'Host' headers from the most
-    recent http request, and always returns a 503 http code"""
+class RequestHandler503(SimpleHTTPServer.SimpleHTTPRequestHandler):
+  """A custom http handler that checks for duplicate 'Host' headers from the most
+  recent http request, and always returns a 503 http code."""
 
-    def do_POST(self):
-      # The unfortunately named self.headers here is an instance of mimetools.Message that
-      # contains the request headers.
-      request_headers = self.headers.headers
+  def __init__(self, request, client_address, server):
+    SimpleHTTPServer.SimpleHTTPRequestHandler.__init__(self, request, client_address,
+                                                       server)
 
-      # Ensure that only one 'Host' header is contained in the request before responding.
-      host_hdr_count = sum([header.startswith('Host:') for header in request_headers])
-      assert host_hdr_count == 1, "duplicate 'Host:' headers in %s" % request_headers
+  def should_send_body_text(self):
+    # in RequestHandler503 we do not send any body text
+    return False
 
-      # Respond with 503.
-      self.send_response(code=httplib.SERVICE_UNAVAILABLE, message="Service Unavailable")
+  def do_POST(self):
+    # The unfortunately named self.headers here is an instance of mimetools.Message that
+    # contains the request headers.
+    request_headers = self.headers.headers
 
-  class TestHTTPServer503(object):
-    def __init__(self):
-      self.HOST = "localhost"
-      self.PORT = get_unused_port()
-      self.httpd = SocketServer.TCPServer((self.HOST, self.PORT), RequestHandler503)
+    # Ensure that only one 'Host' header is contained in the request before responding.
+    host_hdr_count = sum([header.startswith('Host:') for header in request_headers])
+    assert host_hdr_count == 1, "duplicate 'Host:' headers in %s" % request_headers
 
-      self.http_server_thread = threading.Thread(target=self.httpd.serve_forever)
-      self.http_server_thread.start()
+    # Respond with 503.
+    self.send_response(code=httplib.SERVICE_UNAVAILABLE, message="Service Unavailable")
+    if self.should_send_body_text():
+      # Optionally send ody text with 503 message.
+      self.end_headers()
+      self.wfile.write("EXTRA")
 
-  server = TestHTTPServer503()
-  yield server
 
-  # Cleanup after test.
+class RequestHandler503Extra(RequestHandler503):
+  """"Override RequestHandler503 so as to send body text with the 503 message."""
+
+  def __init__(self, request, client_address, server):
+    RequestHandler503.__init__(self, request, client_address, server)
+
+  def should_send_body_text(self):
+    # in RequestHandler503Extra we will send body text
+    return True
+
+
+class TestHTTPServer503(object):
+  def __init__(self, clazz):
+    self.HOST = "localhost"
+    self.PORT = get_unused_port()
+    self.httpd = SocketServer.TCPServer((self.HOST, self.PORT), clazz)
+
+    self.http_server_thread = threading.Thread(target=self.httpd.serve_forever)
+    self.http_server_thread.start()
+
+
+def shutdown_server(server):
+  """Helper method to shutdown a http server."""
   if server.httpd is not None:
     server.httpd.shutdown()
   if server.http_server_thread is not None:
     server.http_server_thread.join()
 
 
+@pytest.yield_fixture
+def http_503_server():
+  """A fixture that creates an http server that returns a 503 http code."""
+  server = TestHTTPServer503(RequestHandler503)
+  yield server
+
+  # Cleanup after test.
+  shutdown_server(server)
+
+
+@pytest.yield_fixture
+def http_503_server_extra():
+  """A fixture that creates an http server that returns a 503 http code with extra
+  body text."""
+  server = TestHTTPServer503(RequestHandler503Extra)
+  yield server
+
+  # Cleanup after test.
+  shutdown_server(server)
+
+
 class TestImpalaShellInteractive(ImpalaTestSuite):
   """Test the impala shell interactively"""
 
@@ -1026,6 +1068,21 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
     shell_proc = spawn_shell([IMPALA_SHELL_EXECUTABLE] + shell_args)
     shell_proc.expect("HTTP code 503", timeout=10)
 
+  def test_http_interactions_extra(self, vector, http_503_server_extra):
+    """Test interactions with the http server when using hs2-http protocol.
+    Check that the shell prints a good message when the server returns a 503 error,
+    including the body text from the message."""
+    protocol = vector.get_value("protocol")
+    if protocol != 'hs2-http':
+      pytest.skip()
+
+    # Check that we get a message about the 503 error when we try to connect.
+    shell_args = ["--protocol={0}".format(protocol),
+                  "-i{0}:{1}".format(http_503_server_extra.HOST,
+                                     http_503_server_extra.PORT)]
+    shell_proc = spawn_shell([IMPALA_SHELL_EXECUTABLE] + shell_args)
+    shell_proc.expect("HTTP code 503: Service Unavailable \[EXTRA\]", timeout=10)
+
 
 def run_impala_shell_interactive(vector, input_lines, shell_args=None,
                                  wait_until_connected=True):