You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/07/16 15:26:38 UTC

[impala] branch master updated (a0cc0b7 -> 2dbd7ee)

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from a0cc0b7  IMPALA-8681: Fix null pointer exception in  ValidWriteIdLists generation
     new 9f0cd97  Support SPNEGO for Impala webserver
     new 1cd85d1  IMPALA-8486: fix stale libCache entries in LocalCatalog mode coordinators
     new b6b6b22  IMPALA-8686: docker entrypoint script execs daemon
     new 21586fb  IMPALA-8425: part 2: avoid chown when building containers
     new a852b9f  IMPALA-5031: Out-of-range enums are undefined behavior
     new 2dbd7ee  IMPALA-8758: Improve error message when no executors are online

The 6 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/exec/kudu-util.h                            |  34 +----
 be/src/exec/parquet/parquet-column-readers.cc      |   4 +-
 be/src/exec/parquet/parquet-level-decoder.cc       |  12 +-
 be/src/exec/parquet/parquet-level-decoder.h        |   2 +-
 be/src/gutil/strings/escaping.cc                   |   8 +-
 be/src/scheduling/cluster-membership-mgr.cc        |   4 +
 be/src/scheduling/scheduler.cc                     |   5 +
 be/src/util/CMakeLists.txt                         |   1 +
 be/src/util/kudu-status-util.h                     |  59 ++++++++
 be/src/util/webserver-test.cc                      |  33 ++++-
 be/src/util/webserver.cc                           | 165 ++++++++++++++++++++-
 be/src/util/webserver.h                            |   7 +
 bin/start-impala-cluster.py                        |   5 -
 common/thrift/parquet.thrift                       |   9 ++
 docker/daemon_entrypoint.sh                        |   8 +-
 docker/impala_base/Dockerfile                      |  20 +--
 .../impala/catalog/CatalogServiceCatalog.java      |   6 +-
 .../impala/catalog/local/CatalogdMetaProvider.java |   9 +-
 tests/common/skip.py                               |   7 -
 tests/custom_cluster/test_coordinators.py          |   6 +
 tests/query_test/test_udfs.py                      |   4 +-
 21 files changed, 326 insertions(+), 82 deletions(-)
 create mode 100644 be/src/util/kudu-status-util.h


[impala] 03/06: IMPALA-8686: docker entrypoint script execs daemon

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b6b6b22c86702fd457fcfbb6401a9b27207c5f58
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Wed Jun 19 16:42:41 2019 -0700

    IMPALA-8686: docker entrypoint script execs daemon
    
    The script now execs the subprocess, which is required for signals, etc
    to be handled correctly.
    
    Change-Id: Ifefbe0a926cf9cfb8acbd37c3f691dc28847dd8b
    Reviewed-on: http://gerrit.cloudera.org:8080/13682
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docker/daemon_entrypoint.sh | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/docker/daemon_entrypoint.sh b/docker/daemon_entrypoint.sh
index 16fa7f0..d413162 100755
--- a/docker/daemon_entrypoint.sh
+++ b/docker/daemon_entrypoint.sh
@@ -56,10 +56,4 @@ if ! whoami ; then
   cat /etc/passwd
 fi
 
-"$@"
-EXIT_CODE=$?
-
-# Print out any INFO logs to help with debugging container startup failures.
-# TODO: remove once we have proper logging
-cat /tmp/*.INFO
-exit $EXIT_CODE
+exec "$@"


[impala] 05/06: IMPALA-5031: Out-of-range enums are undefined behavior

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a852b9fb749860221d8919ac94efc39115d8a65b
Author: Jim Apple <jb...@apache.org>
AuthorDate: Thu Jul 4 11:32:22 2019 -0700

    IMPALA-5031: Out-of-range enums are undefined behavior
    
    This eliminates an out-of-range enum value in the end-to-end
    tests. The interesting part of the backtrace is:
    
    exec/parquet/parquet-column-readers.cc:1530:112: runtime error: load
      of value 38, which is not a valid value for type 'Encoding::type'
        #0 BaseScalarColumnReader::ReadDataPage()
           exec/parquet/parquet-column-readers.cc:1530:112
        #1 BaseScalarColumnReader::NextPage()
           exec/parquet/parquet-column-readers.cc:1769:28
        #2 bool ScalarColumnReader<long, (parquet::Type::type)2, true>
           ::ReadValueBatch<false>(int, int, unsigned char*, int*)
           exec/parquet/parquet-column-readers.cc:459:12
        #3 ScalarColumnReader<long, (parquet::Type::type)2, true>
           ::ReadNonRepeatedValueBatch(MemPool*, int, int, unsigned char*,
           int*) exec/parquet/parquet-column-readers.cc:106:12
        #4 HdfsParquetScanner::AssembleRows(vector<ParquetColumnReader*>
           const&, RowBatch*, bool*)
           exec/parquet/hdfs-parquet-scanner.cc:1113:42
        #5 HdfsParquetScanner::GetNextInternal(RowBatch*)
           exec/parquet/hdfs-parquet-scanner.cc:456:19
        #6 HdfsParquetScanner::ProcessSplit()
           exec/parquet/hdfs-parquet-scanner.cc:353:21
        #7 HdfsScanNode::ProcessSplit(vector<FilterContext> const&,
           MemPool*, io::ScanRange*, long*) exec/hdfs-scan-node.cc:514:21
        #8 HdfsScanNode::ScannerThread(bool, long)
           exec/hdfs-scan-node.cc:415:7
        #9 HdfsScanNode::ThreadTokenAvailableCb(ThreadResourcePool*)::$_0
           ::operator()() const exec/hdfs-scan-node.cc:337:13
    
    Change-Id: Ia86de44daaf56a941fb95b15d5dfd7b5a2752129
    Reviewed-on: http://gerrit.cloudera.org:8080/13804
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/parquet/parquet-column-readers.cc |  4 ++--
 be/src/exec/parquet/parquet-level-decoder.cc  | 12 +++++++++---
 be/src/exec/parquet/parquet-level-decoder.h   |  2 +-
 common/thrift/parquet.thrift                  |  9 +++++++++
 4 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/be/src/exec/parquet/parquet-column-readers.cc b/be/src/exec/parquet/parquet-column-readers.cc
index 0ed8cd8..f115002 100644
--- a/be/src/exec/parquet/parquet-column-readers.cc
+++ b/be/src/exec/parquet/parquet-column-readers.cc
@@ -1522,13 +1522,13 @@ Status BaseScalarColumnReader::ReadDataPage() {
 
     // Initialize the repetition level data
     RETURN_IF_ERROR(rep_levels_.Init(filename(),
-        current_page_header_.data_page_header.repetition_level_encoding,
+        &current_page_header_.data_page_header.repetition_level_encoding,
         parent_->perm_pool_.get(), parent_->state_->batch_size(), max_rep_level(), &data_,
         &data_size));
 
     // Initialize the definition level data
     RETURN_IF_ERROR(def_levels_.Init(filename(),
-        current_page_header_.data_page_header.definition_level_encoding,
+        &current_page_header_.data_page_header.definition_level_encoding,
         parent_->perm_pool_.get(), parent_->state_->batch_size(), max_def_level(), &data_,
         &data_size));
 
diff --git a/be/src/exec/parquet/parquet-level-decoder.cc b/be/src/exec/parquet/parquet-level-decoder.cc
index 166230c..e4c2abc 100644
--- a/be/src/exec/parquet/parquet-level-decoder.cc
+++ b/be/src/exec/parquet/parquet-level-decoder.cc
@@ -21,6 +21,7 @@
 #include "runtime/mem-pool.h"
 #include "runtime/mem-tracker.h"
 #include "util/bit-util.h"
+#include "util/ubsan.h"
 
 #include "common/names.h"
 
@@ -32,7 +33,7 @@ const int16_t ParquetLevel::ROW_GROUP_END;
 const int16_t ParquetLevel::INVALID_LEVEL;
 const int16_t ParquetLevel::INVALID_POS;
 
-Status ParquetLevelDecoder::Init(const string& filename, Encoding::type encoding,
+Status ParquetLevelDecoder::Init(const string& filename, const Encoding::type* encoding,
     MemPool* cache_pool, int cache_size, int max_level, uint8_t** data, int* data_size) {
   DCHECK(*data != nullptr);
   DCHECK_GE(*data_size, 0);
@@ -46,7 +47,12 @@ Status ParquetLevelDecoder::Init(const string& filename, Encoding::type encoding
   if (max_level == 0) return Status::OK();
 
   int32_t num_bytes = 0;
-  switch (encoding) {
+  if (Ubsan::EnumToInt(encoding) > Encoding::MAX_ENUM_VALUE) {
+    stringstream ss;
+    ss << "Unsupported encoding: " << Ubsan::EnumToInt(encoding);
+    return Status(ss.str());
+  }
+  switch (*encoding) {
     case Encoding::RLE: {
       Status status;
       if (!ReadWriteUtil::Read(data, data_size, &num_bytes, &status)) {
@@ -63,7 +69,7 @@ Status ParquetLevelDecoder::Init(const string& filename, Encoding::type encoding
       return Status(TErrorCode::PARQUET_BIT_PACKED_LEVELS, filename);
     default: {
       stringstream ss;
-      ss << "Unsupported encoding: " << encoding;
+      ss << "Unsupported encoding: " << *encoding;
       return Status(ss.str());
     }
   }
diff --git a/be/src/exec/parquet/parquet-level-decoder.h b/be/src/exec/parquet/parquet-level-decoder.h
index 8626b4d..58bda02 100644
--- a/be/src/exec/parquet/parquet-level-decoder.h
+++ b/be/src/exec/parquet/parquet-level-decoder.h
@@ -52,7 +52,7 @@ class ParquetLevelDecoder {
   /// Initialize the LevelDecoder. Reads and advances the provided data buffer if the
   /// encoding requires reading metadata from the page header. 'cache_size' will be
   /// rounded up to a multiple of 32 internally.
-  Status Init(const string& filename, parquet::Encoding::type encoding,
+  Status Init(const string& filename, const parquet::Encoding::type* encoding,
       MemPool* cache_pool, int cache_size, int max_level, uint8_t** data, int* data_size);
 
   /// Returns the next level or INVALID_LEVEL if there was an error. Not as efficient
diff --git a/common/thrift/parquet.thrift b/common/thrift/parquet.thrift
index 6c9011b..1197c2e 100644
--- a/common/thrift/parquet.thrift
+++ b/common/thrift/parquet.thrift
@@ -450,6 +450,15 @@ enum Encoding {
   /** Dictionary encoding: the ids are encoded using the RLE encoding
    */
   RLE_DICTIONARY = 8;
+
+  /**
+   * Useful for checking an integer's value before casting it to an enum of this type.
+   * That check has value in avoiding undefined behavior in the [expr] section of the
+   * C++14 standard: "If during the evaluation of an expression, the result is not
+   * mathematically defined or not in the range of representable values for its type,
+   * the behavior is undefined."
+   */
+  MAX_ENUM_VALUE = 8;
 }
 
 /**


[impala] 04/06: IMPALA-8425: part 2: avoid chown when building containers

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 21586fbfbc885b1a330154a1199189d9f9a38609
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Jun 20 19:37:34 2019 -0700

    IMPALA-8425: part 2: avoid chown when building containers
    
    This reduces the size of an image from 1.36GB to 705MB with
    a release build on my system.
    
    Thanks to Joe McDonnell for the suggestion.
    
    Testing:
    Precommit docker tests are sufficient to validate that
    the containers are functional.
    
    Change-Id: I5476a97a7a030499a60a6cef67f8c3cdffa7e756
    Reviewed-on: http://gerrit.cloudera.org:8080/13699
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docker/impala_base/Dockerfile | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/docker/impala_base/Dockerfile b/docker/impala_base/Dockerfile
index 225adda..203f42b 100644
--- a/docker/impala_base/Dockerfile
+++ b/docker/impala_base/Dockerfile
@@ -27,11 +27,18 @@ RUN apt-get update && \
   apt-get clean && \
   rm -rf /var/lib/apt/lists/*
 
+# Use a non-privileged impala user to run the daemons in the container.
+# That user should own everything in the /opt/impala subdirectory.
+RUN groupadd -r impala && useradd --no-log-init -r -g impala impala && \
+    mkdir -p /opt/impala && chown impala /opt/impala && \
+    chmod ugo+w /etc/passwd
+USER impala
+
 # Copy build artifacts required for the daemon processes.
 # Need to have multiple copy commands to preserve directory structure.
-COPY lib /opt/impala/lib
-COPY www /opt/impala/www
-COPY bin /opt/impala/bin
+COPY --chown=impala lib /opt/impala/lib
+COPY --chown=impala www /opt/impala/www
+COPY --chown=impala bin /opt/impala/bin
 # Symlink here instead of in setup_build_context to avoid duplicate binaries.
 RUN cd /opt/impala/bin && ln -s impalad statestored && ln -s impalad catalogd && \
 # Create conf directory for later config injection.
@@ -39,11 +46,4 @@ RUN cd /opt/impala/bin && ln -s impalad statestored && ln -s impalad catalogd &&
 # Create logs directory to collect container logs.
     mkdir /opt/impala/logs
 
-# Use a non-privileged impala user to run the daemons in the container.
-# That user should own everything in the /opt/impala subdirectory.
-RUN groupadd -r impala && useradd --no-log-init -r -g impala impala && \
-    mkdir -p /opt/impala && chown impala -R /opt/impala && \
-    chmod ugo+w /etc/passwd
-USER impala
-
 WORKDIR /opt/impala/


[impala] 06/06: IMPALA-8758: Improve error message when no executors are online

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2dbd7eec81485293f79bcddd5b40546c7a0471c2
Author: Lars Volker <lv...@cloudera.com>
AuthorDate: Mon Jul 15 13:31:23 2019 -0700

    IMPALA-8758: Improve error message when no executors are online
    
    Prior to this change a dedicated coordinator would not create the
    default executor group when registering its own backend descriptor in
    the cluster membership. This caused a misleading error message during
    scheduling when the default executor group could not be found.
    
    To improve this, we now always create the default executor group and
    return an improved error message if it is empty.
    
    This change adds a test that validates that a query against a cluster
    without executors returns the expected error.
    
    Change-Id: Ia4428ef833363f52b14dfff253569212427a8e2f
    Reviewed-on: http://gerrit.cloudera.org:8080/13866
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/scheduling/cluster-membership-mgr.cc | 4 ++++
 be/src/scheduling/scheduler.cc              | 5 +++++
 bin/start-impala-cluster.py                 | 5 -----
 tests/custom_cluster/test_coordinators.py   | 6 ++++++
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/be/src/scheduling/cluster-membership-mgr.cc b/be/src/scheduling/cluster-membership-mgr.cc
index ca8f20d..09a3678 100644
--- a/be/src/scheduling/cluster-membership-mgr.cc
+++ b/be/src/scheduling/cluster-membership-mgr.cc
@@ -255,6 +255,10 @@ void ClusterMembershipMgr::UpdateMembership(
       } else if (local_be_desc->is_executor) {
         VLOG(1) << "Adding local backend to group " << group;
         (*new_executor_groups)[group].AddExecutor(*local_be_desc);
+      } else {
+        //TODO(IMPALA-8484): Remove this when it's no longer needed
+        VLOG(1) << "Creating empty default executor group";
+        new_executor_groups->emplace(group, ExecutorGroup());
       }
     }
     AddLocalBackendToStatestore(*local_be_desc, subscriber_topic_updates);
diff --git a/be/src/scheduling/scheduler.cc b/be/src/scheduling/scheduler.cc
index bb64f0c..12871ef 100644
--- a/be/src/scheduling/scheduler.cc
+++ b/be/src/scheduling/scheduler.cc
@@ -666,7 +666,12 @@ Status Scheduler::Schedule(QuerySchedule* schedule) {
   if (it == membership_snapshot->executor_groups.end()) {
     return Status(Substitute("Unknown executor group: $0", group_name));
   }
+
   const ExecutorGroup& executor_group = it->second;
+  if (executor_group.NumExecutors() == 0) {
+    return Status(Substitute("No executors registered in group: $0", group_name));
+  }
+
   ExecutorConfig executor_config =
       {executor_group, *membership_snapshot->local_be_desc};
   RETURN_IF_ERROR(ComputeScanRangeAssignment(executor_config, schedule));
diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index 8b95af3..df32bf1 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -622,11 +622,6 @@ def validate_options():
     LOG.error("Please specify a valid number of coordinators > 0")
     sys.exit(1)
 
-  if (options.use_exclusive_coordinators and
-      options.num_coordinators >= options.cluster_size):
-    LOG.error("Cannot start an Impala cluster with no executors")
-    sys.exit(1)
-
   if not os.path.isdir(options.log_dir):
     LOG.error("Log dir does not exist or is not a directory: {log_dir}".format(
         log_dir=options.log_dir))
diff --git a/tests/custom_cluster/test_coordinators.py b/tests/custom_cluster/test_coordinators.py
index 3729749..fdf3004 100644
--- a/tests/custom_cluster/test_coordinators.py
+++ b/tests/custom_cluster/test_coordinators.py
@@ -287,3 +287,9 @@ class TestCoordinators(CustomClusterTestSuite):
     finally:
       assert client is not None
       self._stop_impala_cluster()
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(cluster_size=1, num_exclusive_coordinators=1)
+  def test_dedicated_coordinator_without_executors(self):
+    result = self.execute_query_expect_failure(self.client, "select 2")
+    assert "No executors registered in group: default" in str(result)


[impala] 01/06: Support SPNEGO for Impala webserver

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9f0cd9743a9c364d1eb42f29f67494298ed574ae
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Mon Jul 1 16:59:24 2019 -0700

    Support SPNEGO for Impala webserver
    
    This ports over changes from kudu commit
    1f291b77ef0868ac888a850678adc2d7cce65529 which implemented SPNEGO for
    the Kudu webserver.
    
    Unfortunately, thorough testing of this is difficult given that curl
    isn't currently in the toolchain. I was able to manually test this by
    adding a 'sleep(1000)' call into the newly added test case, then setting
    up $KRB5_CONFIG in my shell to point to the temporary KDC's environment,
    and using 'curl -u : --negotiate http://...' to authenticate.
    
    Strangely, using the version of curl on el7 didn't seem to work properly
    (perhaps an el7 curl bug) but using curl on my Ubuntu 18 laptop I was
    able to authenticate with SPNEGO.
    
    Change-Id: Ife2b04310e1571d231bf8ee1bcfd3b7afc2edd8f
    Reviewed-on: http://gerrit.cloudera.org:8080/13774
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/kudu-util.h          |  34 +-------
 be/src/gutil/strings/escaping.cc |   8 +-
 be/src/util/CMakeLists.txt       |   1 +
 be/src/util/kudu-status-util.h   |  59 ++++++++++++++
 be/src/util/webserver-test.cc    |  33 +++++++-
 be/src/util/webserver.cc         | 165 ++++++++++++++++++++++++++++++++++++++-
 be/src/util/webserver.h          |   7 ++
 7 files changed, 266 insertions(+), 41 deletions(-)

diff --git a/be/src/exec/kudu-util.h b/be/src/exec/kudu-util.h
index 37755bd..b6c049f 100644
--- a/be/src/exec/kudu-util.h
+++ b/be/src/exec/kudu-util.h
@@ -21,33 +21,16 @@
 // TODO: Remove when toolchain callbacks.h properly defines ::tm.
 struct tm;
 
-#include <gutil/strings/substitute.h>
 #include <kudu/client/callbacks.h>
 #include <kudu/client/client.h>
 #include <kudu/client/value.h>
 
-#include "common/status.h"
+#include "util/kudu-status-util.h"
 #include "runtime/string-value.h"
 #include "runtime/types.h"
 
 namespace impala {
 
-/// Takes a Kudu status and returns an impala one, if it's not OK.
-/// Evaluates the prepend argument only if the status is not OK.
-#define KUDU_RETURN_IF_ERROR(expr, prepend) \
-  do { \
-    const kudu::Status& _s = (expr); \
-    if (UNLIKELY(!_s.ok())) {                                      \
-      return Status(strings::Substitute("$0: $1", prepend, _s.ToString())); \
-    } \
-  } while (0)
-
-#define KUDU_ASSERT_OK(status)                                     \
-  do {                                                             \
-    const Status& _s = FromKuduStatus(status);                     \
-    ASSERT_TRUE(_s.ok()) << "Error: " << _s.GetDetail();           \
-  } while (0)
-
 class TimestampValue;
 
 /// Returns false when running on an operating system that Kudu doesn't support. If this
@@ -94,21 +77,6 @@ Status CreateKuduValue(const ColumnType& col_type, void* value,
 ColumnType KuduDataTypeToColumnType(kudu::client::KuduColumnSchema::DataType type,
     const kudu::client::KuduColumnTypeAttributes& type_attributes);
 
-/// Utility function for creating an Impala Status object based on a kudu::Status object.
-/// 'k_status' is the kudu::Status object.
-/// 'prepend' is a string to be prepended to details of 'k_status' when creating the
-/// Impala Status object.
-/// Note that we don't translate the kudu::Status error code to Impala error code
-/// so the returned status' type is always of TErrorCode::GENERAL.
-inline Status FromKuduStatus(
-    const kudu::Status& k_status, const std::string prepend = "") {
-  if (LIKELY(k_status.ok())) return Status::OK();
-  const std::string& err_msg = prepend.empty() ? k_status.ToString() :
-      strings::Substitute("$0: $1", prepend, k_status.ToString());
-  VLOG(1) << err_msg;
-  return Status::Expected(err_msg);
-}
-
 /// Converts 'mode' to its equivalent ReadMode, stored in 'out'. Possible values for
 /// 'mode' are 'READ_LATEST' and 'READ_AT_SNAPSHOT'. If 'mode' is invalid, an error is
 /// returned.
diff --git a/be/src/gutil/strings/escaping.cc b/be/src/gutil/strings/escaping.cc
index adb8093..aaae9bf 100644
--- a/be/src/gutil/strings/escaping.cc
+++ b/be/src/gutil/strings/escaping.cc
@@ -895,10 +895,10 @@ int Base64UnescapeInternal(const char *src, int szsrc,
       // szsrc claims the string is).
 
       if (!src[0] || !src[1] || !src[2] ||
-          (temp = ((unbase64[src[0]] << 18) |
-                   (unbase64[src[1]] << 12) |
-                   (unbase64[src[2]] << 6) |
-                   (unbase64[src[3]]))) & 0x80000000) {
+          (temp = ((unsigned(unbase64[src[0]]) << 18) |
+                   (unsigned(unbase64[src[1]]) << 12) |
+                   (unsigned(unbase64[src[2]]) << 6) |
+                   (unsigned(unbase64[src[3]])))) & 0x80000000) {
         // Iff any of those four characters was bad (null, illegal,
         // whitespace, padding), then temp's high bit will be set
         // (because unbase64[] is -1 for all bad characters).
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index b12ae2d..605060e 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -201,4 +201,5 @@ ADD_UNIFIED_BE_LSAN_TEST(time-test "TimeTest.*")
 ADD_UNIFIED_BE_LSAN_TEST(uid-util-test "UidUtil.*")
 # Using standalone webserver-test for now, nonstandard main() passes in a port.
 ADD_BE_LSAN_TEST(webserver-test)
+TARGET_LINK_LIBRARIES(webserver-test mini_kdc)
 ADD_UNIFIED_BE_LSAN_TEST(zip-util-test "ZipUtilTest.*")
diff --git a/be/src/util/kudu-status-util.h b/be/src/util/kudu-status-util.h
new file mode 100644
index 0000000..5b9484b
--- /dev/null
+++ b/be/src/util/kudu-status-util.h
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/status.h"
+#include "gutil/strings/substitute.h"
+#include "kudu/util/status.h"
+
+/// Takes a Kudu status and returns an impala one, if it's not OK.
+/// Evaluates the prepend argument only if the status is not OK.
+#define KUDU_RETURN_IF_ERROR(expr, prepend)                        \
+  do {                                                             \
+    const kudu::Status& _s = (expr);                               \
+    if (UNLIKELY(!_s.ok())) {                                      \
+      return impala::Status(strings::Substitute(                   \
+          "$0: $1", prepend, _s.ToString()));                      \
+    }                                                              \
+  } while (0)
+
+#define KUDU_ASSERT_OK(status)                                     \
+  do {                                                             \
+    const impala::Status& _s = FromKuduStatus(status);             \
+    ASSERT_TRUE(_s.ok()) << "Error: " << _s.GetDetail();           \
+  } while (0)
+
+
+namespace impala {
+
+/// Utility function for creating an Impala Status object based on a kudu::Status object.
+/// 'k_status' is the kudu::Status object.
+/// 'prepend' is a string to be prepended to details of 'k_status' when creating the
+/// Impala Status object.
+/// Note that we don't translate the kudu::Status error code to Impala error code
+/// so the returned status' type is always of TErrorCode::GENERAL.
+inline Status FromKuduStatus(
+    const kudu::Status& k_status, const std::string prepend = "") {
+  if (LIKELY(k_status.ok())) return Status::OK();
+  const std::string& err_msg = prepend.empty() ? k_status.ToString() :
+      strings::Substitute("$0: $1", prepend, k_status.ToString());
+  VLOG(1) << err_msg;
+  return Status::Expected(err_msg);
+}
+
+} // namespace impala
diff --git a/be/src/util/webserver-test.cc b/be/src/util/webserver-test.cc
index c0c542a..f56bc68 100644
--- a/be/src/util/webserver-test.cc
+++ b/be/src/util/webserver-test.cc
@@ -25,9 +25,14 @@
 #include "common/init.h"
 #include "testutil/gtest-util.h"
 #include "testutil/scoped-flag-setter.h"
-#include "util/webserver.h"
+
 #include "util/default-path-handlers.h"
+#include "util/kudu-status-util.h"
+#include "util/webserver.h"
+
+#include "kudu/security/test/mini_kdc.h"
 
+DECLARE_bool(webserver_require_spnego);
 DECLARE_int32(webserver_port);
 DECLARE_string(webserver_password_file);
 DECLARE_string(webserver_certificate_file);
@@ -320,6 +325,32 @@ TEST(Webserver, SslGoodTlsVersion) {
   }
 }
 
+using kudu::MiniKdc;
+using kudu::MiniKdcOptions;
+
+TEST(Webserver, TestWithSpnego) {
+  MiniKdc kdc(MiniKdcOptions{});
+  KUDU_ASSERT_OK(kdc.Start());
+  kdc.SetKrb5Environment();
+
+  string kt_path;
+  KUDU_ASSERT_OK(kdc.CreateServiceKeytab("HTTP/127.0.0.1", &kt_path));
+  CHECK_ERR(setenv("KRB5_KTNAME", kt_path.c_str(), 1));
+  KUDU_ASSERT_OK(kdc.CreateUserPrincipal("alice"));
+
+  gflags::FlagSaver saver;
+  FLAGS_webserver_require_spnego = true;
+
+  Webserver webserver(FLAGS_webserver_port);
+  ASSERT_OK(webserver.Start());
+
+  // Don't expect HTTP requests to work without Kerberos credentials.
+  stringstream contents;
+  ASSERT_FALSE(HttpGet("localhost", FLAGS_webserver_port, "/", &contents).ok());
+
+  // TODO(todd): import curl into native-toolchain and test this with
+  // authentication.
+}
 
 TEST(Webserver, StartWithPasswordFileTest) {
   stringstream password_file;
diff --git a/be/src/util/webserver.cc b/be/src/util/webserver.cc
index caf45d3..f222bdf 100644
--- a/be/src/util/webserver.cc
+++ b/be/src/util/webserver.cc
@@ -28,12 +28,18 @@
 #include <boost/lexical_cast.hpp>
 #include <boost/mem_fn.hpp>
 #include <boost/thread/locks.hpp>
-#include <gutil/strings/substitute.h>
 #include <rapidjson/document.h>
 #include <rapidjson/prettywriter.h>
 #include <rapidjson/stringbuffer.h>
 
 #include "common/logging.h"
+#include "gutil/endian.h"
+#include "gutil/strings/substitute.h"
+#include "gutil/strings/strip.h"
+#include "kudu/util/env.h"
+#include "kudu/util/logging.h"
+#include "kudu/util/net/sockaddr.h"
+#include "kudu/security/gssapi.h"
 #include "rpc/thrift-util.h"
 #include "runtime/exec-env.h"
 #include "service/impala-server.h"
@@ -41,13 +47,13 @@
 #include "util/asan.h"
 #include "util/coding-util.h"
 #include "util/cpu-info.h"
+#include "util/debug-util.h"
 #include "util/disk-info.h"
 #include "util/mem-info.h"
 #include "util/os-info.h"
 #include "util/os-util.h"
-#include "util/process-state-info.h"
-#include "util/debug-util.h"
 #include "util/pretty-printer.h"
+#include "util/process-state-info.h"
 #include "util/stopwatch.h"
 
 #include "common/names.h"
@@ -105,6 +111,10 @@ DEFINE_int32(webserver_max_post_length_bytes, 1024 * 1024,
              "The maximum length of a POST request that will be accepted by "
              "the embedded web server.");
 
+DEFINE_bool(webserver_require_spnego, false,
+            "Require connections to the web server to authenticate via Kerberos "
+            "using SPNEGO.");
+
 DECLARE_bool(is_coordinator);
 DECLARE_string(ssl_minimum_version);
 DECLARE_string(ssl_cipher_list);
@@ -163,6 +173,53 @@ string HttpStatusCodeToString(HttpStatusCode code) {
   LOG(FATAL) << "Unexpected HTTP response code";
   return "";
 }
+
+
+void SendPlainResponse(struct sq_connection* connection,
+                       const string& response_code_line,
+                       const string& content,
+                       const vector<string>& header_lines) {
+  sq_printf(connection, "HTTP/1.1 %s\r\n", response_code_line.c_str());
+  for (const auto& h : header_lines) {
+    sq_printf(connection, "%s\r\n", h.c_str());
+  }
+  sq_printf(connection, "Content-Type: text/plain\r\n");
+  sq_printf(connection, "Content-Length: %zd\r\n\r\n", content.size());
+  sq_printf(connection, "%s", content.c_str());
+}
+
+// Return the address of the remote user from the squeasel request info.
+kudu::Sockaddr GetRemoteAddress(const struct sq_request_info* req) {
+  struct sockaddr_in addr;
+  addr.sin_family = AF_INET;
+  addr.sin_port = NetworkByteOrder::FromHost16(req->remote_port);
+  addr.sin_addr.s_addr = NetworkByteOrder::FromHost32(req->remote_ip);
+  return kudu::Sockaddr(addr);
+}
+
+
+// Performs a step of SPNEGO authorization by parsing the HTTP Authorization header
+// 'authz_header' and running it through GSSAPI. If authentication fails or the header
+// is invalid, a bad Status will be returned (and the other out-parameters left
+// untouched).
+kudu::Status RunSpnegoStep(const char* authz_header, string* resp_header,
+                     string* authn_user) {
+  string neg_token;
+  if (authz_header && !TryStripPrefixString(authz_header, "Negotiate ", &neg_token)) {
+    return kudu::Status::InvalidArgument("bad Negotiate header");
+  }
+
+  string resp_token_b64;
+  bool is_complete;
+  RETURN_NOT_OK(kudu::gssapi::SpnegoStep(
+      neg_token, &resp_token_b64, &is_complete, authn_user));
+
+  if (!resp_token_b64.empty()) {
+    *resp_header = Substitute("WWW-Authenticate: Negotiate $0", resp_token_b64);
+  }
+   return is_complete ? kudu::Status::OK() : kudu::Status::Incomplete("authn incomplete");
+}
+
 } // anonymous namespace
 
 // Builds a valid HTTP header given the response code and a content type.
@@ -310,6 +367,19 @@ Status Webserver::Start() {
     options.push_back(FLAGS_webserver_password_file.c_str());
   }
 
+  if (FLAGS_webserver_require_spnego) {
+    // If Kerberos has been configured, security::InitKerberosForServer() will
+    // already have been called, ensuring that the keytab path has been
+    // propagated into this environment variable where the GSSAPI calls will
+    // pick it up. In other words, we aren't expecting users to pass in this
+    // environment variable specifically.
+    const char* kt_file = getenv("KRB5_KTNAME");
+    if (!kt_file || !kudu::Env::Default()->FileExists(kt_file)) {
+      return Status("Unable to configure web server for SPNEGO authentication: "
+                    "must configure a keytab file for the server");
+    }
+  }
+
   options.push_back("listening_ports");
   options.push_back(listening_str.c_str());
 
@@ -399,6 +469,13 @@ sq_callback_result_t Webserver::BeginRequestCallbackStatic(
 
 sq_callback_result_t Webserver::BeginRequestCallback(struct sq_connection* connection,
     struct sq_request_info* request_info) {
+  if (FLAGS_webserver_require_spnego){
+    sq_callback_result_t spnego_result = HandleSpnego(connection, request_info);
+    if (spnego_result != SQ_CONTINUE_HANDLING) {
+      return spnego_result;
+    }
+  }
+
   if (!FLAGS_webserver_doc_root.empty() && FLAGS_enable_webserver_doc_root) {
     if (strncmp(DOC_FOLDER, request_info->uri, DOC_FOLDER_LEN) == 0) {
       VLOG(2) << "HTTP File access: " << request_info->uri;
@@ -501,6 +578,88 @@ sq_callback_result_t Webserver::BeginRequestCallback(struct sq_connection* conne
   return SQ_HANDLED_OK;
 }
 
+sq_callback_result_t Webserver::HandleSpnego(
+    struct sq_connection* connection,
+    struct sq_request_info* request_info) {
+  const char* authz_header = sq_get_header(connection, "Authorization");
+  string resp_header, authn_princ;
+  kudu::Status s = RunSpnegoStep(authz_header, &resp_header, &authn_princ);
+  if (s.IsIncomplete()) {
+    SendPlainResponse(connection, "401 Authentication Required",
+                      "Must authenticate with SPNEGO.",
+                      { resp_header });
+    return SQ_HANDLED_OK;
+  }
+  if (s.ok() && authn_princ.empty()) {
+    s = kudu::Status::RuntimeError("SPNEGO indicated complete, but got empty principal");
+    // Crash in debug builds, but fall through to treating as an error 500 in
+    // release.
+    LOG(DFATAL) << "Got no authenticated principal for SPNEGO-authenticated "
+                << " connection from "
+                << GetRemoteAddress(request_info).ToString()
+                << ": " << s.ToString();
+  }
+  if (!s.ok()) {
+    LOG(WARNING) << "Failed to authenticate request from "
+                 << GetRemoteAddress(request_info).ToString()
+                 << " via SPNEGO: " << s.ToString();
+    const char* http_status = s.IsNotAuthorized() ? "401 Authentication Required" :
+        "500 Internal Server Error";
+
+    SendPlainResponse(connection, http_status, s.ToString(), {});
+    return SQ_HANDLED_OK;
+  }
+
+
+  request_info->remote_user = strdup(authn_princ.c_str());
+
+  // NOTE: According to the SPNEGO RFC (https://tools.ietf.org/html/rfc4559) it
+  // is possible that a non-empty token will be returned along with the HTTP 200
+  // response:
+  //
+  //     A status code 200 status response can also carry a "WWW-Authenticate"
+  //     response header containing the final leg of an authentication.  In
+  //     this case, the gssapi-data will be present.  Before using the
+  //     contents of the response, the gssapi-data should be processed by
+  //     gss_init_security_context to determine the state of the security
+  //     context.  If this function indicates success, the response can be
+  //     used by the application.  Otherwise, an appropriate action, based on
+  //     the authentication status, should be taken.
+  //
+  //     For example, the authentication could have failed on the final leg if
+  //     mutual authentication was requested and the server was not able to
+  //     prove its identity.  In this case, the returned results are suspect.
+  //     It is not always possible to mutually authenticate the server before
+  //     the HTTP operation.  POST methods are in this category.
+  //
+  // In fact, from inspecting the MIT krb5 source code, it appears that this
+  // only happens when the client requests mutual authentication by passing
+  // 'GSS_C_MUTUAL_FLAG' when establishing its side of the protocol. In practice,
+  // this seems to be widely unimplemented:
+  //
+  // - curl has some source code to support GSS_C_MUTUAL_FLAG, but in order to
+  //   enable it, you have to modify a FALSE constant to TRUE and recompile curl.
+  //   In fact, it was broken for all of 2015 without anyone noticing (see curl
+  //   commit 73f1096335d468b5be7c3cc99045479c3314f433)
+  //
+  // - Chrome doesn't support mutual auth at all -- see DelegationTypeToFlag(...)
+  //   in src/net/http/http_auth_gssapi_posix.cc.
+  //
+  // In practice, users depend on TLS to authenticate the server, and SPNEGO
+  // is used to authenticate the client.
+  //
+  // Given this, and because actually sending back the token on an OK response
+  // would require significant code restructuring (eg buffering the header until
+  // after the response handler has run) we just ignore any response token, but
+  // log a periodic warning just in case it turns out we're wrong about the above.
+  if (!resp_header.empty()) {
+    KLOG_EVERY_N_SECS(WARNING, 5) << "ignoring SPNEGO token on HTTP 200 response "
+                                  << "for user " << authn_princ << " at host "
+                                  << GetRemoteAddress(request_info).ToString();
+  }
+  return SQ_CONTINUE_HANDLING;
+}
+
 void Webserver::RenderUrlWithTemplate(const WebRequest& req,
     const UrlHandler& url_handler, stringstream* output, ContentType* content_type) {
   Document document;
diff --git a/be/src/util/webserver.h b/be/src/util/webserver.h
index 21b85be..7d36894 100644
--- a/be/src/util/webserver.h
+++ b/be/src/util/webserver.h
@@ -163,6 +163,13 @@ class Webserver {
   sq_callback_result_t BeginRequestCallback(struct sq_connection* connection,
       struct sq_request_info* request_info);
 
+  // Handle SPNEGO authentication for this request. Returns SQ_CONTINUE_HANDLING
+  // if authentication was successful, otherwise responds to the request and
+  // returns SQ_HANDLED_OK.
+  sq_callback_result_t HandleSpnego(
+      struct sq_connection* connection,
+      struct sq_request_info* request_info);
+
   /// Renders URLs through the Mustache templating library.
   /// - Default ContentType is HTML.
   /// - Argument 'raw' renders the page with PLAIN ContentType.


[impala] 02/06: IMPALA-8486: fix stale libCache entries in LocalCatalog mode coordinators

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 1cd85d1f8a0d772a4cab263cce4f41728f6ebac7
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Fri Jul 12 00:20:36 2019 +0000

    IMPALA-8486: fix stale libCache entries in LocalCatalog mode coordinators
    
    In LocalCatalog mode, after a function is dropped, statestored will
    broadcast the update to invalidate the cached CatalogObject in each
    coordinator (if they have). However, the current code path does not
    trigger libCache to remove the cached JAR/SO file. If we replace the
    function file in HDFS with a new one and create the function again
    using the same HDFS path, the SELECT statements in other coordinators
    won't trigger libCache to refresh the local cached file, so they still
    use the old cached file which causes errors.
    
    When a coordinator invalidates its cached CatalogObject of a function,
    it should also mark the corresponding libCache entry as "needs refresh".
    So the later usage of this function will check the last modified time of
    the HDFS file and refresh it in needs. To achieve this, we have to
    propagate the HDFS path of the function along with the full function
    name in the minimal topic, so libCache can target the cached entry.
    
    Note that this does not prevent the dedicated executors to have stale
    libCache entries. It needs some architecture changes. We'll follow it
    in IMPALA-8763.
    
    Tests
     - Re-enable test_udf_update_via_drop and test_udf_update_via_create for
    LocalCatalog mode.
    
    Change-Id: Ie4812fb8737de3ba6074ffeb9007927bfbbbaf9b
    Reviewed-on: http://gerrit.cloudera.org:8080/13849
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Reviewed-by: Bharath Vissapragada <bh...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/catalog/CatalogServiceCatalog.java    | 6 +++++-
 .../org/apache/impala/catalog/local/CatalogdMetaProvider.java    | 9 +++++++--
 tests/common/skip.py                                             | 7 -------
 tests/query_test/test_udfs.py                                    | 4 +---
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
index 0c8ea59..b881b60 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
@@ -642,7 +642,11 @@ public class CatalogServiceCatalog extends Catalog {
         // the full information rather than doing fetch-on-demand.
         return obj;
       case FUNCTION:
-        min.setFn(new TFunction(obj.fn.getName()));
+        TFunction fnObject = new TFunction(obj.fn.getName());
+        // IMPALA-8486: add the hdfs location so coordinators can mark their libCache
+        // entry for this function to be stale.
+        if (obj.fn.hdfs_location != null) fnObject.setHdfs_location(obj.fn.hdfs_location);
+        min.setFn(fnObject);
         break;
       case DATA_SOURCE:
       case HDFS_CACHE_POOL:
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java b/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
index 9a36bb4..81432de 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
@@ -260,8 +260,6 @@ public class CatalogdMetaProvider implements MetaProvider {
    *
    * For details of the usage of Futures within the cache, see
    * {@link #loadWithCaching(String, String, Object, Callable).
-   *
-
    */
   final Cache<Object,Object> cache_;
 
@@ -1213,6 +1211,13 @@ public class CatalogdMetaProvider implements MetaProvider {
           invalidated);
       invalidateCacheForFunction(obj.fn.name.db_name, obj.fn.name.function_name,
           invalidated);
+      if (obj.fn.hdfs_location != null) {
+        // After the coordinator creates a function, it will also receive an invalidation
+        // update for this function from the statestored's broadcast. We shouldn't remove
+        // the libcache entry for this case, just mark it as needs refresh. LibCache will
+        // refresh the cached file if its mtime changes in HDFS.
+        FeSupport.NativeLibCacheSetNeedsRefresh(obj.fn.hdfs_location);
+      }
       break;
     case DATABASE:
       if (cache_.asMap().remove(DB_LIST_CACHE_KEY) != null) {
diff --git a/tests/common/skip.py b/tests/common/skip.py
index c4d03ca..151f2c8 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -232,13 +232,6 @@ class SkipIfCatalogV2:
       IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),
       reason="Test is specific to old implementation of catalog.")
 
-  # TODO: IMPALA-8486: fix invalidation or update tests to reflect expected behaviour.
-  @classmethod
-  def lib_cache_invalidation_broken(self):
-    return pytest.mark.skipif(
-      IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),
-      reason="IMPALA-8486: LibCache isn't invalidated by function DDL.")
-
   # TODO: IMPALA-7131: add support or update tests to reflect expected behaviour.
   @classmethod
   def data_sources_unsupported(self):
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index 5823ad3..3839044 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -24,7 +24,7 @@ from subprocess import call, check_call
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfLocal, SkipIfCatalogV2
+from tests.common.skip import SkipIfLocal
 from tests.common.test_dimensions import (
     create_exec_option_dimension,
     create_exec_option_dimension_from_dict,
@@ -507,7 +507,6 @@ class TestUdfTargeted(TestUdfBase):
   def test_libs_with_same_filenames(self, vector, unique_database):
     self.run_test_case('QueryTest/libs_with_same_filenames', vector, use_db=unique_database)
 
-  @SkipIfCatalogV2.lib_cache_invalidation_broken()
   def test_udf_update_via_drop(self, vector, unique_database):
     """Test updating the UDF binary without restarting Impala. Dropping
     the function should remove the binary from the local cache."""
@@ -541,7 +540,6 @@ class TestUdfTargeted(TestUdfBase):
     self.execute_query_expect_success(self.client, create_fn_stmt, exec_options)
     self._run_query_all_impalads(exec_options, query_stmt, ["New UDF"])
 
-  @SkipIfCatalogV2.lib_cache_invalidation_broken()
   def test_udf_update_via_create(self, vector, unique_database):
     """Test updating the UDF binary without restarting Impala. Creating a new function
     from the library should refresh the cache."""