You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/13 18:37:16 UTC

[impala] branch master updated (a2f1ba1 -> 8db7f27)

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from a2f1ba1  Clean up stress tests in core
     new e1b93f2  IMPALA-8823: DROP TABLE support for insert-only ACID tables
     new 8db7f27  IMPALA-8752: Added Jaro-Winkler edit distance and similarity built-in function

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/exprs/expr-test.cc                          | 108 +++++++++++++
 be/src/exprs/string-functions-ir.cc                | 173 +++++++++++++++++++++
 be/src/exprs/string-functions.h                    |  26 ++++
 common/function-registry/impala_functions.py       |  16 ++
 .../org/apache/impala/compat/MetastoreShim.java    |  10 +-
 .../org/apache/impala/compat/MetastoreShim.java    |  44 +++++-
 .../impala/analysis/DropTableOrViewStmt.java       |   2 +-
 .../java/org/apache/impala/catalog/Catalog.java    |  54 +++++++
 .../{service => common}/TransactionKeepalive.java  |  39 +++--
 .../apache/impala/service/CatalogOpExecutor.java   |  25 +++
 .../java/org/apache/impala/service/Frontend.java   |  14 +-
 .../org/apache/impala/analysis/AnalyzerTest.java   |   3 +-
 .../functional-query/queries/QueryTest/acid.test   |  34 +++-
 tests/metadata/test_hms_integration.py             |  18 +++
 tests/query_test/test_acid.py                      |   2 +-
 15 files changed, 528 insertions(+), 40 deletions(-)
 rename fe/src/main/java/org/apache/impala/{service => common}/TransactionKeepalive.java (89%)


[impala] 02/02: IMPALA-8752: Added Jaro-Winkler edit distance and similarity built-in function

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8db7f27ddde226f3efd3bddcc00665d0d9b99ef0
Author: luksan47 <no...@gmail.com>
AuthorDate: Wed Jul 17 02:17:20 2019 -0700

    IMPALA-8752: Added Jaro-Winkler edit distance and similarity built-in function
    
    The added functions return the Jaro/Jaro-Winkler similarity/distance
    of two strings. The algorithm calcuates the Jaro-Similarity of the
    strings, then adds more weight to the result if there are
    common prefixes. (Jaro-Winkler)
    For more detail, see:
    https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
    
    Extended the algorithm with another optional parameter: boost threshold
    The prefix weight will only be applied if the Jaro-similarity
    exceeds the given threshold. By default, its value is 0.7.
    
    The new built-in functions are:
     * jaro_distance, jaro_dst
     * jaro_similarity, jaro_sim
     * jaro_winkler_distance, jw_dst
     * jaro_winkler_similarity, jw_sim
    
    Testing:
     * Added unit tests to expr-test.cc
     * Manual testing over 1400 word pairs from
       http://marvin.cs.uidaho.edu/misspell.html
       Results match Apache commons
    
    Change-Id: I64d7f461516c5e66cc27d62612bc8cc0e8f0178c
    Reviewed-on: http://gerrit.cloudera.org:8080/13870
    Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exprs/expr-test.cc                    | 108 +++++++++++++++++
 be/src/exprs/string-functions-ir.cc          | 173 +++++++++++++++++++++++++++
 be/src/exprs/string-functions.h              |  26 ++++
 common/function-registry/impala_functions.py |  16 +++
 4 files changed, 323 insertions(+)

diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 9cb4899..be80c2b 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -4013,6 +4013,114 @@ TEST_P(ExprTest, StringFunctions) {
   TestErrorString("le_dst(repeat('x', 256), 'z')",
       "levenshtein argument exceeds maximum length of 255 characters\n");
 
+  for (const string fn_name: { "jaro_dst", "jaro_distance" }) {
+    TestIsNull(fn_name + "('foo', NULL)", TYPE_DOUBLE);
+    TestIsNull(fn_name + "(NULL, 'foo')", TYPE_DOUBLE);
+    TestIsNull(fn_name + "(NULL, NULL)", TYPE_DOUBLE);
+    TestValue(fn_name + "('foo', 'foo')", TYPE_DOUBLE, 0.0);
+    TestValue(fn_name + "('foo', 'bar')", TYPE_DOUBLE, 1.0);
+    TestValue(fn_name + "('', '')", TYPE_DOUBLE, 0.0);
+    TestValue(fn_name + "('', 'jaro')", TYPE_DOUBLE, 1.0);
+    TestValue(fn_name + "('jaro', '')", TYPE_DOUBLE, 1.0);
+    TestValue(fn_name + "('crate', 'trace')", TYPE_DOUBLE, 0.2666666666666666);
+    TestValue(fn_name + "('dwayne', 'duane')", TYPE_DOUBLE, 0.1777777777777778);
+    TestValue(fn_name + "('martha', 'marhta')", TYPE_DOUBLE, 0.05555555555555558);
+    TestValue(fn_name + "('frog', 'fog')", TYPE_DOUBLE, 0.08333333333333337);
+    TestValue(fn_name + "('hello', 'haloa')", TYPE_DOUBLE, 0.2666666666666666);
+    TestValue(fn_name + "('atcg', 'tagc')", TYPE_DOUBLE, 0.1666666666666667);
+    TestErrorString(fn_name + "('z', repeat('x', 256))",
+        "jaro argument exceeds maximum length of 255 characters\n");
+    TestErrorString(fn_name + "(repeat('x', 256), 'z')",
+        "jaro argument exceeds maximum length of 255 characters\n");
+  }
+
+  for (const string fn_name: { "jaro_sim", "jaro_similarity" }) {
+    TestIsNull(fn_name + "('foo', NULL)", TYPE_DOUBLE);
+    TestIsNull(fn_name + "(NULL, 'foo')", TYPE_DOUBLE);
+    TestIsNull(fn_name + "(NULL, NULL)", TYPE_DOUBLE);
+    TestValue(fn_name + "('foo', 'foo')", TYPE_DOUBLE, 1.0);
+    TestValue(fn_name + "('foo', 'bar')", TYPE_DOUBLE, 0.0);
+    TestValue(fn_name + "('', '')", TYPE_DOUBLE, 1.0);
+    TestValue(fn_name + "('', 'jaro')", TYPE_DOUBLE, 0.0);
+    TestValue(fn_name + "('jaro', '')", TYPE_DOUBLE, 0.0);
+    TestValue(fn_name + "('crate', 'trace')", TYPE_DOUBLE, 0.7333333333333334);
+    TestValue(fn_name + "('dwayne', 'duane')", TYPE_DOUBLE, 0.82222222222222222);
+    TestValue(fn_name + "('martha', 'marhta')", TYPE_DOUBLE, 0.944444444444444444);
+    TestValue(fn_name + "('frog', 'fog')", TYPE_DOUBLE, 0.9166666666666666);
+    TestValue(fn_name + "('hello', 'haloa')", TYPE_DOUBLE, 0.73333333333333334);
+    TestValue(fn_name + "('atcg', 'tagc')", TYPE_DOUBLE, 0.8333333333333333);
+    TestErrorString(fn_name + "('z', repeat('x', 256))",
+        "jaro argument exceeds maximum length of 255 characters\n");
+    TestErrorString(fn_name + "(repeat('x', 256), 'z')",
+        "jaro argument exceeds maximum length of 255 characters\n");
+  }
+
+  for (const string fn_name: { "jaro_winkler_distance", "jw_dst" }) {
+    TestIsNull(fn_name + "('foo', NULL)", TYPE_DOUBLE);
+    TestIsNull(fn_name + "(NULL, 'foo')", TYPE_DOUBLE);
+    TestIsNull(fn_name + "(NULL, NULL)", TYPE_DOUBLE);
+    TestValue(fn_name + "('foo', 'foo')", TYPE_DOUBLE, 0.0);
+    TestValue(fn_name + "('foo', 'bar')", TYPE_DOUBLE, 1.0);
+    TestValue(fn_name + "('', '')", TYPE_DOUBLE, 0.0);
+    TestValue(fn_name + "('', 'jaro')", TYPE_DOUBLE, 1.0);
+    TestValue(fn_name + "('jaro', '')", TYPE_DOUBLE, 1.0);
+    TestValue(fn_name + "('crate', 'trace')", TYPE_DOUBLE, 0.2666666666666666);
+    TestValue(fn_name + "('crate', 'trace', 0.2)", TYPE_DOUBLE, 0.2666666666666666);
+    TestValue(fn_name + "('dwayne', 'duane')", TYPE_DOUBLE, 0.16);
+    TestValue(fn_name + "('martha', 'marhta', 0.0)", TYPE_DOUBLE, 0.05555555555555558);
+    TestValue(fn_name + "('martha', 'marhta')", TYPE_DOUBLE, 0.03888888888888886);
+    TestValue(fn_name + "('martha', 'marhta', 0.2)", TYPE_DOUBLE, 0.02222222222222225);
+    TestValue(fn_name + "('atcg', 'tagc')", TYPE_DOUBLE, 0.1666666666666667);
+    TestValue(fn_name + "('martha', 'marhta', 0.1, 0.99)", TYPE_DOUBLE,
+        0.05555555555555558);
+    TestValue(fn_name + "('dwayne', 'duane', 0.1, 0.9)", TYPE_DOUBLE, 0.1777777777777778);
+    TestErrorString(fn_name + "('z', repeat('x', 256))",
+        "jaro-winkler argument exceeds maximum length of 255 characters\n");
+    TestErrorString(fn_name + "(repeat('x', 256), 'z')",
+        "jaro-winkler argument exceeds maximum length of 255 characters\n");
+    TestErrorString(fn_name + "('foo', 'bar', 0.26)",
+        "jaro-winkler scaling factor values can range between 0.0 and 0.25\n");
+    TestErrorString(fn_name + "('foo', 'bar', -0.01)",
+        "jaro-winkler scaling factor values can range between 0.0 and 0.25\n");
+    TestErrorString(fn_name + "('foo', 'bar', 0.1, -0.01)",
+        "jaro-winkler boost threshold values can range between 0.0 and 1.0\n");
+    TestErrorString(fn_name + "('foo', 'bar', 0.1, 1.01)",
+        "jaro-winkler boost threshold values can range between 0.0 and 1.0\n");
+  }
+  for (const string fn_name: { "jaro_winkler_similarity", "jw_sim"}) {
+    TestIsNull(fn_name + "('foo', NULL)", TYPE_DOUBLE);
+    TestIsNull(fn_name + "(NULL, 'foo')", TYPE_DOUBLE);
+    TestIsNull(fn_name + "(NULL, NULL)", TYPE_DOUBLE);
+    TestValue(fn_name + "('foo', 'foo')", TYPE_DOUBLE, 1.0);
+    TestValue(fn_name + "('foo', 'bar')", TYPE_DOUBLE, 0.0);
+    TestValue(fn_name + "('', '')", TYPE_DOUBLE, 1.0);
+    TestValue(fn_name + "('', 'jaro')", TYPE_DOUBLE, 0.0);
+    TestValue(fn_name + "('jaro', '')", TYPE_DOUBLE, 0.0);
+    TestValue(fn_name + "('crate', 'trace')", TYPE_DOUBLE, 0.7333333333333334);
+    TestValue(fn_name + "('crate', 'trace', 0.2)", TYPE_DOUBLE, 0.7333333333333334);
+    TestValue(fn_name + "('dwayne', 'duane')", TYPE_DOUBLE, 0.84);
+    TestValue(fn_name + "('martha', 'marhta', 0.0)", TYPE_DOUBLE, 0.94444444444444442);
+    TestValue(fn_name + "('martha', 'marhta', 0.1)", TYPE_DOUBLE, 0.96111111111111111);
+    TestValue(fn_name + "('martha', 'marhta', 0.2)", TYPE_DOUBLE, 0.97777777777777777);
+    TestValue(fn_name + "('atcg', 'tagc')", TYPE_DOUBLE, 0.8333333333333333);;
+    TestValue(fn_name + "('martha', 'marhta', 0.1, 0.99)", TYPE_DOUBLE,
+        0.94444444444444442);
+    TestValue(fn_name + "('dwayne', 'duane', 0.1, 0.9)", TYPE_DOUBLE,
+        0.82222222222222222);
+    TestErrorString(fn_name + "('z', repeat('x', 256))",
+        "jaro-winkler argument exceeds maximum length of 255 characters\n");
+    TestErrorString(fn_name + "(repeat('x', 256), 'z')",
+        "jaro-winkler argument exceeds maximum length of 255 characters\n");
+    TestErrorString(fn_name + "('foo', 'bar', 0.26)",
+        "jaro-winkler scaling factor values can range between 0.0 and 0.25\n");
+    TestErrorString(fn_name + "('foo', 'bar', -0.01)",
+        "jaro-winkler scaling factor values can range between 0.0 and 0.25\n");
+    TestErrorString(fn_name + "('foo', 'bar', 0.1, -0.01)",
+        "jaro-winkler boost threshold values can range between 0.0 and 1.0\n");
+    TestErrorString(fn_name + "('foo', 'bar', 0.1, 1.01)",
+        "jaro-winkler boost threshold values can range between 0.0 and 1.0\n");
+  }
+
   TestStringValue("substring('Hello', 1)", "Hello");
   TestStringValue("substring('Hello', -2)", "lo");
   TestStringValue("substring('Hello', cast(0 as bigint))", "");
diff --git a/be/src/exprs/string-functions-ir.cc b/be/src/exprs/string-functions-ir.cc
index 67b7ce1..5606fcb 100644
--- a/be/src/exprs/string-functions-ir.cc
+++ b/be/src/exprs/string-functions-ir.cc
@@ -1167,4 +1167,177 @@ IntVal StringFunctions::Levenshtein(
 
   return IntVal(result);
 }
+
+// Based on https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
+// Implements Jaro similarity
+DoubleVal StringFunctions::JaroSimilarity(
+    FunctionContext* ctx, const StringVal& s1, const StringVal& s2) {
+
+  int s1len = s1.len;
+  int s2len = s2.len;
+
+  // error if either input exceeds 255 characters
+  if (s1len > 255 || s2len > 255) {
+    ctx->SetError("jaro argument exceeds maximum length of 255 characters");
+    return DoubleVal(-1.0);
+  }
+
+  // short cut cases:
+  // - null strings
+  // - zero length strings
+  // - identical length and value strings
+  if (s1.is_null || s2.is_null) return DoubleVal::null();
+  if (s1len == 0 && s2len == 0) return DoubleVal(1.0);
+  if (s1len == 0 || s2len == 0) return DoubleVal(0.0);
+  if (s1len == s2len && memcmp(s1.ptr, s2.ptr, s1len) == 0) return DoubleVal(1.0);
+
+  // the window size to search for matches in the other string
+  int max_range = std::max(0, std::max(s1len, s2len) / 2 - 1);
+
+  int s1_matching[s1len];
+  int s2_matching[s2len];
+  std::fill_n(s1_matching, s1len, -1);
+  std::fill_n(s2_matching, s2len, -1);
+
+  // calculate matching characters
+  int matching_characters = 0;
+  for (int i = 0; i < s1len; i++) {
+    // matching window
+    int min_index = std::max(i - max_range, 0);
+    int max_index = std::min(i + max_range + 1, s2len);
+    if (min_index >= max_index) break;
+
+    for (int j = min_index; j < max_index; j++) {
+      if (s2_matching[j] == -1 && s1.ptr[i] == s2.ptr[j]) {
+        s1_matching[i] = i;
+        s2_matching[j] = j;
+        matching_characters++;
+        break;
+      }
+    }
+  }
+
+  if (matching_characters == 0) return DoubleVal(0.0);
+
+  // transpositions (one-way only)
+  double transpositions = 0.0;
+  for (int i = 0, s1i = 0, s2i = 0; i < matching_characters; i++) {
+    while (s1_matching[s1i] == -1) {
+      s1i++;
+    }
+    while (s2_matching[s2i] == -1) {
+      s2i++;
+    }
+    if (s1.ptr[s1i] != s2.ptr[s2i]) {
+      transpositions += 0.5;
+    }
+    s1i++;
+    s2i++;
+  }
+  double m = static_cast<double>(matching_characters);
+  double jaro_similarity = 1.0 / 3.0  * ( m / static_cast<double>(s1len)
+                                        + m / static_cast<double>(s2len)
+                                        + (m - transpositions) / m );
+
+  return DoubleVal(jaro_similarity);
+}
+
+DoubleVal StringFunctions::JaroDistance(
+    FunctionContext* ctx, const StringVal& s1, const StringVal& s2) {
+
+  DoubleVal jaro_similarity = StringFunctions::JaroSimilarity(ctx, s1, s2);
+  if (jaro_similarity.is_null) return DoubleVal::null();
+  if (jaro_similarity.val == -1.0) return DoubleVal(-1.0);
+  return DoubleVal(1.0 - jaro_similarity.val);
+}
+
+DoubleVal StringFunctions::JaroWinklerDistance(FunctionContext* ctx,
+      const StringVal& s1, const StringVal& s2) {
+  return StringFunctions::JaroWinklerDistance(ctx, s1, s2,
+    DoubleVal(0.1), DoubleVal(0.7));
+}
+
+DoubleVal StringFunctions::JaroWinklerDistance(FunctionContext* ctx,
+      const StringVal& s1, const StringVal& s2,
+      const DoubleVal& scaling_factor) {
+  return StringFunctions::JaroWinklerDistance(ctx, s1, s2,
+    scaling_factor, DoubleVal(0.7));
+}
+
+// Based on https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
+// Implements Jaro-Winkler distance
+// Extended with boost_theshold: Winkler's modification only applies if Jaro exceeds it
+DoubleVal StringFunctions::JaroWinklerDistance(FunctionContext* ctx,
+      const StringVal& s1, const StringVal& s2,
+      const DoubleVal& scaling_factor, const DoubleVal& boost_threshold) {
+
+  DoubleVal jaro_winkler_similarity = StringFunctions::JaroWinklerSimilarity(
+    ctx, s1, s2, scaling_factor, boost_threshold);
+
+  if (jaro_winkler_similarity.is_null) return DoubleVal::null();
+  if (jaro_winkler_similarity.val == -1.0) return DoubleVal(-1.0);
+  return DoubleVal(1.0 - jaro_winkler_similarity.val);
+}
+
+DoubleVal StringFunctions::JaroWinklerSimilarity(FunctionContext* ctx,
+      const StringVal& s1, const StringVal& s2) {
+  return StringFunctions::JaroWinklerSimilarity(ctx, s1, s2,
+    DoubleVal(0.1), DoubleVal(0.7));
+}
+
+DoubleVal StringFunctions::JaroWinklerSimilarity(FunctionContext* ctx,
+      const StringVal& s1, const StringVal& s2,
+      const DoubleVal& scaling_factor) {
+  return StringFunctions::JaroWinklerSimilarity(ctx, s1, s2,
+    scaling_factor, DoubleVal(0.7));
+}
+
+// Based on https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
+// Implements Jaro-Winkler similarity
+// Extended with boost_theshold: Winkler's modification only applies if Jaro exceeds it
+DoubleVal StringFunctions::JaroWinklerSimilarity(FunctionContext* ctx,
+      const StringVal& s1, const StringVal& s2,
+      const DoubleVal& scaling_factor, const DoubleVal& boost_threshold) {
+
+  constexpr int MAX_PREFIX_LENGTH = 4;
+  int s1len = s1.len;
+  int s2len = s2.len;
+
+  // error if either input exceeds 255 characters
+  if (s1len > 255 || s2len > 255) {
+    ctx->SetError("jaro-winkler argument exceeds maximum length of 255 characters");
+    return DoubleVal(-1.0);
+  }
+  // scaling factor has to be between 0.0 and 0.25
+  if (scaling_factor.val < 0.0 || scaling_factor.val > 0.25) {
+    ctx->SetError("jaro-winkler scaling factor values can range between 0.0 and 0.25");
+    return DoubleVal(-1.0);
+  }
+  // error if boost threshold is out of range 0.0..1.0
+  if (boost_threshold.val < 0.0 || boost_threshold.val > 1.0) {
+    ctx->SetError("jaro-winkler boost threshold values can range between 0.0 and 1.0");
+    return DoubleVal(-1.0);
+  }
+
+  if (s1.is_null || s2.is_null) return DoubleVal::null();
+
+  DoubleVal jaro_similarity = StringFunctions::JaroSimilarity(ctx, s1, s2);
+  if (jaro_similarity.is_null) return DoubleVal::null();
+  if (jaro_similarity.val == -1.0) return DoubleVal(-1.0);
+
+  double jaro_winkler_similarity = jaro_similarity.val;
+
+  if (jaro_similarity.val > boost_threshold.val) {
+    int common_length = std::min(MAX_PREFIX_LENGTH, std::min(s1len, s2len));
+    int common_prefix = 0;
+    while (common_prefix < common_length &&
+           s1.ptr[common_prefix] == s2.ptr[common_prefix]) {
+      common_prefix++;
+    }
+
+    jaro_winkler_similarity += common_prefix * scaling_factor.val *
+      (1.0 - jaro_similarity.val);
+  }
+  return DoubleVal(jaro_winkler_similarity);
+}
 }
diff --git a/be/src/exprs/string-functions.h b/be/src/exprs/string-functions.h
index 84ee595..8386461 100644
--- a/be/src/exprs/string-functions.h
+++ b/be/src/exprs/string-functions.h
@@ -158,6 +158,32 @@ class StringFunctions {
   static IntVal Levenshtein(
       FunctionContext* context, const StringVal& s1, const StringVal& s2);
 
+  static DoubleVal JaroDistance(
+      FunctionContext* ctx, const StringVal& s1, const StringVal& s2);
+
+  static DoubleVal JaroSimilarity(
+      FunctionContext* ctx, const StringVal& s1, const StringVal& s2);
+
+  static DoubleVal JaroWinklerDistance(FunctionContext* ctx, const StringVal& s1,
+      const StringVal& s2);
+
+  static DoubleVal JaroWinklerDistance(FunctionContext* ctx, const StringVal& s1,
+      const StringVal& s2, const DoubleVal& scaling_factor);
+
+  static DoubleVal JaroWinklerDistance(FunctionContext* ctx, const StringVal& s1,
+      const StringVal& s2, const DoubleVal& scaling_factor,
+      const DoubleVal& boost_threshold);
+
+  static DoubleVal JaroWinklerSimilarity(FunctionContext* ctx, const StringVal& s1,
+      const StringVal& s2);
+
+  static DoubleVal JaroWinklerSimilarity(FunctionContext* ctx, const StringVal& s1,
+      const StringVal& s2, const DoubleVal& scaling_factor);
+
+  static DoubleVal JaroWinklerSimilarity(FunctionContext* ctx, const StringVal& s1,
+      const StringVal& s2, const DoubleVal& scaling_factor,
+      const DoubleVal& boost_threshold);
+
  private:
   /// Templatized implementation of the actual string trimming function.
   /// The first parameter, 'D', is one of StringFunctions::TrimPosition values.
diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py
index 06bf8ce..d7d1ceb 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -586,6 +586,22 @@ visible_functions = [
    'impala::StringFunctions::GetJsonObject'],
   [['levenshtein', 'le_dst'], 'INT', ['STRING', 'STRING'],
    '_ZN6impala15StringFunctions11LevenshteinEPN10impala_udf15FunctionContextERKNS1_9StringValES6_'],
+  [['jaro_distance', 'jaro_dst'], 'DOUBLE', ['STRING', 'STRING'],
+   '_ZN6impala15StringFunctions12JaroDistanceEPN10impala_udf15FunctionContextERKNS1_9StringValES6_'],
+  [['jaro_similarity', 'jaro_sim'], 'DOUBLE', ['STRING', 'STRING'],
+   '_ZN6impala15StringFunctions14JaroSimilarityEPN10impala_udf15FunctionContextERKNS1_9StringValES6_'],
+  [['jaro_winkler_distance', 'jw_dst'], 'DOUBLE', ['STRING', 'STRING'],
+   '_ZN6impala15StringFunctions19JaroWinklerDistanceEPN10impala_udf15FunctionContextERKNS1_9StringValES6_'],
+  [['jaro_winkler_distance', 'jw_dst'], 'DOUBLE', ['STRING', 'STRING', 'DOUBLE'],
+   '_ZN6impala15StringFunctions19JaroWinklerDistanceEPN10impala_udf15FunctionContextERKNS1_9StringValES6_RKNS1_9DoubleValE'],
+  [['jaro_winkler_distance', 'jw_dst'], 'DOUBLE', ['STRING', 'STRING', 'DOUBLE', 'DOUBLE'],
+   '_ZN6impala15StringFunctions19JaroWinklerDistanceEPN10impala_udf15FunctionContextERKNS1_9StringValES6_RKNS1_9DoubleValES9_'],
+  [['jaro_winkler_similarity', 'jw_sim'], 'DOUBLE', ['STRING', 'STRING'],
+   '_ZN6impala15StringFunctions21JaroWinklerSimilarityEPN10impala_udf15FunctionContextERKNS1_9StringValES6_'],
+  [['jaro_winkler_similarity', 'jw_sim'], 'DOUBLE', ['STRING', 'STRING', 'DOUBLE'],
+   '_ZN6impala15StringFunctions21JaroWinklerSimilarityEPN10impala_udf15FunctionContextERKNS1_9StringValES6_RKNS1_9DoubleValE'],
+  [['jaro_winkler_similarity', 'jw_sim'], 'DOUBLE', ['STRING', 'STRING', 'DOUBLE', 'DOUBLE'],
+   '_ZN6impala15StringFunctions21JaroWinklerSimilarityEPN10impala_udf15FunctionContextERKNS1_9StringValES6_RKNS1_9DoubleValES9_'],
 
   # Conditional Functions
   # Some of these have empty symbols because the BE special-cases them based on the


[impala] 01/02: IMPALA-8823: DROP TABLE support for insert-only ACID tables

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e1b93f27f3fb2a0f3d1c754f8e0e988bad838f11
Author: Gabor Kaszab <ga...@cloudera.com>
AuthorDate: Wed Aug 7 15:02:30 2019 +0200

    IMPALA-8823: DROP TABLE support for insert-only ACID tables
    
    Enhances Impala to be able to drop insert-only transactional tables.
    In order to do this Impala acquires an exclusive table lock in HMS
    before performing the drop operation and releases the lock once
    dropping the table finished.
    INSERT statement does the locking and heartbeating on coordinator
    side but for DROP TABLE all of these are done from Catalog side. This
    means that alongside Impala coordinators now Catalog also does
    heartbeating towards HMS.
    
    Testing:
     - E2E test: Dropped a table, re-created it and dropped again to
       check if no locks remained in HMS.
     - E2E test: After dropping a table from Impala checked if Hive also
       sees it being dropped.
     - Manual test: With a hacked Impala that runs a drop table long
       enough I checked that there is a table lock entry in HMS during the
       execution and disappears once the query finishes.
    
    Change-Id: Ic41ca73268c4b75af5a08fe3dd1ada1df3f6fd34
    Reviewed-on: http://gerrit.cloudera.org:8080/14038
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/compat/MetastoreShim.java    | 10 +++-
 .../org/apache/impala/compat/MetastoreShim.java    | 44 +++++++++++++++---
 .../impala/analysis/DropTableOrViewStmt.java       |  2 +-
 .../java/org/apache/impala/catalog/Catalog.java    | 54 ++++++++++++++++++++++
 .../{service => common}/TransactionKeepalive.java  | 39 ++++++++++------
 .../apache/impala/service/CatalogOpExecutor.java   | 25 ++++++++++
 .../java/org/apache/impala/service/Frontend.java   | 14 ++----
 .../org/apache/impala/analysis/AnalyzerTest.java   |  3 +-
 .../functional-query/queries/QueryTest/acid.test   | 34 ++++++++++++--
 tests/metadata/test_hms_integration.py             | 18 ++++++++
 tests/query_test/test_acid.py                      |  2 +-
 11 files changed, 205 insertions(+), 40 deletions(-)

diff --git a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
index 93437e9..96bd997 100644
--- a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
@@ -329,6 +329,14 @@ public class MetastoreShim {
   /**
    * Hive-3 only function
    */
+  public static void releaseLock(IMetaStoreClient client, long lockId)
+      throws TransactionException {
+    throw new UnsupportedOperationException("releaseLock is not supported.");
+  }
+
+  /**
+   * Hive-3 only function
+   */
   public static boolean heartbeat(IMetaStoreClient client,
       long txnId, long lockId) throws TransactionException {
     throw new UnsupportedOperationException("heartbeat is not supported.");
@@ -338,7 +346,7 @@ public class MetastoreShim {
    * Hive-3 only function
    */
   public static long acquireLock(IMetaStoreClient client, long txnId,
-      List<LockComponent> lockComponents, int lockRetries, int retryWaitSeconds)
+      List<LockComponent> lockComponents)
       throws TransactionException {
     throw new UnsupportedOperationException("acquireLock is not supported.");
   }
diff --git a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
index 954327a..18cd0a7 100644
--- a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
@@ -72,11 +72,11 @@ import org.apache.impala.authorization.User;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.Pair;
 import org.apache.impala.common.TransactionException;
+import org.apache.impala.common.TransactionKeepalive;
 import org.apache.impala.compat.HiveMetadataFormatUtils;
 import org.apache.impala.service.BackendConfig;
 import org.apache.impala.service.Frontend;
 import org.apache.impala.service.MetadataOp;
-import org.apache.impala.service.TransactionKeepalive;
 import org.apache.impala.thrift.TMetadataOpRequest;
 import org.apache.impala.thrift.TResultSet;
 import org.apache.log4j.Logger;
@@ -104,6 +104,13 @@ public class MetastoreShim {
   private static final String HIVESQL = "HIVESQL";
   private static final long MAJOR_VERSION = 3;
   private static boolean capabilitiestSet_ = false;
+
+  // Number of retries to acquire an HMS ACID lock.
+  private static final int LOCK_RETRIES = 10;
+
+  // Time interval between retries of acquiring an HMS ACID lock
+  private static final int LOCK_RETRY_WAIT_SECONDS = 3;
+
   /**
    * Wrapper around MetaStoreUtils.validateName() to deal with added arguments.
    */
@@ -539,18 +546,18 @@ public class MetastoreShim {
    * Creates a lock for the given lock components. Returns the acquired lock, this
    * might involve some waiting.
    * @param client is the HMS client to be used.
+   * @param txnId The transaction ID associated with the lock. Zero if the lock doesn't
+   * belong to a transaction.
    * @param lockComponents the lock components to include in this lock.
-   * @param lockRetries the number of retries to acquire the lock.
-   * @param retryWaitSeconds wait interval between retries.
    * @return the lock id
    * @throws TransactionException in case of failure
    */
   public static long acquireLock(IMetaStoreClient client, long txnId,
-      List<LockComponent> lockComponents, int lockRetries, int retryWaitSeconds)
+      List<LockComponent> lockComponents)
           throws TransactionException {
     LockRequestBuilder lockRequestBuilder = new LockRequestBuilder();
     lockRequestBuilder.setUser("Impala");
-    lockRequestBuilder.setTransactionId(txnId);
+    if (txnId > 0) lockRequestBuilder.setTransactionId(txnId);
     for (LockComponent lockComponent : lockComponents) {
       lockRequestBuilder.addLockComponent(lockComponent);
     }
@@ -559,9 +566,9 @@ public class MetastoreShim {
       LockResponse lockResponse = client.lock(lockRequest);
       long lockId = lockResponse.getLockid();
       int retries = 0;
-      while (lockResponse.getState() == LockState.WAITING && retries < lockRetries) {
+      while (lockResponse.getState() == LockState.WAITING && retries < LOCK_RETRIES) {
         try {
-          Thread.sleep(retryWaitSeconds * 1000);
+          Thread.sleep(LOCK_RETRY_WAIT_SECONDS * 1000);
           ++retries;
           lockResponse = client.checkLock(lockId);
         } catch (InterruptedException e) {
@@ -571,6 +578,14 @@ public class MetastoreShim {
         }
       }
       if (lockResponse.getState() == LockState.ACQUIRED) return lockId;
+      if (lockId > 0) {
+        try {
+          releaseLock(client, lockId);
+        } catch (TransactionException te) {
+          LOG.error("Failed to release lock as a cleanup step after acquiring a lock " +
+              "has failed: " + lockId + " " + te.getMessage());
+        }
+      }
       throw new TransactionException("Failed to acquire lock for transaction " +
           String.valueOf(txnId));
     } catch (TException e) {
@@ -579,6 +594,21 @@ public class MetastoreShim {
   }
 
   /**
+   * Releases a lock in HMS.
+   * @param client is the HMS client to be used.
+   * @param lockId is the lock ID to be released.
+   * @throws TransactionException
+   */
+  public static void releaseLock(IMetaStoreClient client, long lockId)
+      throws TransactionException {
+    try {
+      client.unlock(lockId);
+    } catch (Exception e) {
+      throw new TransactionException(e.getMessage());
+    }
+  }
+
+  /**
    * Allocates a write id for the given table.
    * @param client is the HMS client to be used.
    * @param txnId is the transaction id.
diff --git a/fe/src/main/java/org/apache/impala/analysis/DropTableOrViewStmt.java b/fe/src/main/java/org/apache/impala/analysis/DropTableOrViewStmt.java
index 047e285..86da283 100644
--- a/fe/src/main/java/org/apache/impala/analysis/DropTableOrViewStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/DropTableOrViewStmt.java
@@ -121,7 +121,7 @@ public class DropTableOrViewStmt extends StatementBase {
       if (dropTable_) {
         // To drop a view needs not write capabilities, only checks for tables.
         analyzer.checkTableCapability(table, Analyzer.OperationType.WRITE);
-        analyzer.ensureTableNotTransactional(table);
+        analyzer.ensureTableNotFullAcid(table);
       }
 
     } catch (TableLoadingException e) {
diff --git a/fe/src/main/java/org/apache/impala/catalog/Catalog.java b/fe/src/main/java/org/apache/impala/catalog/Catalog.java
index 67441dc..e73de73 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Catalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Catalog.java
@@ -18,6 +18,7 @@
 package org.apache.impala.catalog;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
@@ -25,9 +26,18 @@ import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicReference;
 
+import org.apache.hadoop.hive.metastore.api.DataOperationType;
+import org.apache.hadoop.hive.metastore.api.LockComponent;
+import org.apache.hadoop.hive.metastore.api.LockLevel;
+import org.apache.hadoop.hive.metastore.api.LockType;
+
 import org.apache.impala.analysis.FunctionName;
 import org.apache.impala.authorization.AuthorizationPolicy;
 import org.apache.impala.catalog.MetaStoreClientPool.MetaStoreClient;
+import org.apache.impala.common.TransactionException;
+import org.apache.impala.common.TransactionKeepalive;
+import org.apache.impala.common.TransactionKeepalive.HeartbeatContext;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.thrift.TCatalogObject;
 import org.apache.impala.thrift.TFunction;
 import org.apache.impala.thrift.TPartitionKeyValue;
@@ -88,12 +98,20 @@ public abstract class Catalog implements AutoCloseable {
   protected final CatalogObjectCache<AuthzCacheInvalidation> authzCacheInvalidation_ =
       new CatalogObjectCache<>();
 
+  // This member is responsible for heartbeating HMS locks and transactions.
+  private TransactionKeepalive transactionKeepalive_;
+
   /**
    * Creates a new instance of Catalog backed by a given MetaStoreClientPool.
    */
   public Catalog(MetaStoreClientPool metaStoreClientPool) {
     dataSources_ = new CatalogObjectCache<DataSource>();
     metaStoreClientPool_ = Preconditions.checkNotNull(metaStoreClientPool);
+    if (MetastoreShim.getMajorVersion() > 2) {
+      transactionKeepalive_ = new TransactionKeepalive(metaStoreClientPool_);
+    } else {
+      transactionKeepalive_ = null;
+    }
   }
 
   /**
@@ -647,4 +665,40 @@ public abstract class Catalog implements AutoCloseable {
   public static boolean keyEquals(TCatalogObject first, TCatalogObject second) {
     return toCatalogObjectKey(first).equals(toCatalogObjectKey(second));
   }
+
+  /**
+   * Creates an exclusive lock for a particular table and acquires it in the HMS. Starts
+   * heartbeating the lock. This function is for locks that doesn't belong to a
+   * transaction.
+   * @param dbName Name of the DB where the particular table is.
+   * @param tableName Name of the table where the lock is acquired.
+   * @throws TransactionException
+   */
+  public long lockTable(String dbName, String tableName, HeartbeatContext ctx)
+      throws TransactionException {
+    LockComponent lockComponent = new LockComponent();
+    lockComponent.setDbname(dbName);
+    lockComponent.setTablename(tableName);
+    lockComponent.setLevel(LockLevel.TABLE);
+    lockComponent.setType(LockType.EXCLUSIVE);
+    lockComponent.setOperationType(DataOperationType.NO_TXN);
+    List<LockComponent> lockComponents = Arrays.asList(lockComponent);
+    long lockId = -1L;
+    try (MetaStoreClient client = metaStoreClientPool_.getClient()) {
+      lockId = MetastoreShim.acquireLock(client.getHiveClient(), 0L, lockComponents);
+      transactionKeepalive_.addLock(lockId, ctx);
+    }
+    return lockId;
+  }
+
+  /**
+   * Releases a lock based on its ID from HMS and stops heartbeating it.
+   * @param lockId is the ID of the lock to clear.
+   */
+  public void releaseTableLock(long lockId) throws TransactionException {
+    try (MetaStoreClient client = metaStoreClientPool_.getClient()) {
+      transactionKeepalive_.deleteLock(lockId);
+      MetastoreShim.releaseLock(client.getHiveClient(), lockId);
+    }
+  }
 }
diff --git a/fe/src/main/java/org/apache/impala/service/TransactionKeepalive.java b/fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java
similarity index 89%
rename from fe/src/main/java/org/apache/impala/service/TransactionKeepalive.java
rename to fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java
index 21ccbce..f7166f8 100644
--- a/fe/src/main/java/org/apache/impala/service/TransactionKeepalive.java
+++ b/fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-package org.apache.impala.service;
+package org.apache.impala.common;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -54,14 +54,29 @@ public class TransactionKeepalive {
 
   private final MetaStoreClientPool metaStoreClientPool_;
 
+  // Stores information for logging purposes. Stores either a TQueryCtx or a cause
+  // string. toString() returns the stored TQueryCtx if it is set or the string cause
+  // otherwise.
   public static class HeartbeatContext {
-    public TQueryCtx queryCtx;
-    public long creationTime;
+    private TQueryCtx queryCtx;
+    private String cause;
+    private long creationTime;
 
     public HeartbeatContext(TQueryCtx queryCtx, long creationTime) {
       this.queryCtx = queryCtx;
       this.creationTime = creationTime;
     }
+
+    public HeartbeatContext(String cause, long creationTime) {
+      this.queryCtx = null;
+      this.cause = "'" + cause + "'";
+      this.creationTime = creationTime;
+    }
+
+    public String toString() {
+      if (queryCtx != null) return queryCtx.query_id.toString();
+      return cause;
+    }
   }
 
   // Map of transactions
@@ -170,21 +185,19 @@ public class TransactionKeepalive {
           // Transaction or lock doesn't exist anymore, let's remove them.
           if (transactionId != 0) {
             LOG.warn("Transaction " + String.valueOf(transactionId) + " of query " +
-                context.queryCtx.query_id.toString() + " doesn't exist anymore. Stop " +
-                "heartbeating it.");
+                context.toString() + " doesn't exist anymore. Stop heartbeating it.");
             TransactionKeepalive.this.deleteTransaction(transactionId);
           }
           if (lockId != 0) {
             LOG.warn("Lock " + String.valueOf(lockId) + " of query " +
-                context.queryCtx.query_id.toString() + " doesn't exist anymore. Stop " +
-                "heartbeating it.");
+                context.toString() + " doesn't exist anymore. Stop heartbeating it.");
             TransactionKeepalive.this.deleteLock(lockId);
           }
         }
       } catch (TransactionException e) {
         LOG.warn("Caught exception during heartbeating transaction " +
             String.valueOf(transactionId) + " lock " + String.valueOf(lockId) +
-            " for query " + context.queryCtx.query_id.toString(), e);
+            " for query " + context.toString(), e);
       }
     }
   }
@@ -204,22 +217,20 @@ public class TransactionKeepalive {
   /**
    * Add transaction to heartbeat. Associated locks shouldn't be added.
    */
-  synchronized public void addTransaction(Long transactionId, TQueryCtx queryCtx) {
+  synchronized public void addTransaction(Long transactionId, HeartbeatContext ctx) {
     Preconditions.checkNotNull(transactionId);
-    Preconditions.checkNotNull(queryCtx);
+    Preconditions.checkNotNull(ctx);
     Preconditions.checkState(!transactions_.containsKey(transactionId));
-    HeartbeatContext ctx = new HeartbeatContext(queryCtx, System.nanoTime());
     transactions_.put(transactionId, ctx);
   }
 
   /**
    * Add lock to heartbeat. This should be a lock without a transaction context.
    */
-  synchronized public void addLock(Long lockId, TQueryCtx queryCtx) {
+  synchronized public void addLock(Long lockId, HeartbeatContext ctx) {
     Preconditions.checkNotNull(lockId);
-    Preconditions.checkNotNull(queryCtx);
+    Preconditions.checkNotNull(ctx);
     Preconditions.checkState(!locks_.containsKey(lockId));
-    HeartbeatContext ctx = new HeartbeatContext(queryCtx, System.nanoTime());
     locks_.put(lockId, ctx);
   }
 
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index 7ad8b36..643a551 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -100,6 +100,7 @@ import org.apache.impala.common.JniUtil;
 import org.apache.impala.common.Pair;
 import org.apache.impala.common.Reference;
 import org.apache.impala.common.TransactionException;
+import org.apache.impala.common.TransactionKeepalive.HeartbeatContext;
 import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.thrift.JniCatalogConstants;
 import org.apache.impala.thrift.TAlterDbParams;
@@ -1546,6 +1547,8 @@ public class CatalogOpExecutor {
    * Also drops all associated caching requests on the table and/or table's partitions,
    * uncaching all table data. If params.purge is true, table data is permanently
    * deleted.
+   * In case of transactional tables acquires an exclusive HMS table lock before
+   * executing the drop operation.
    */
   private void dropTableOrView(TDropTableOrViewParams params, TDdlExecResponse resp)
       throws ImpalaException {
@@ -1571,6 +1574,28 @@ public class CatalogOpExecutor {
       // or non-existence of the database will be handled down below.
     }
 
+    Table tbl = catalog_.getTableIfCachedNoThrow(tableName.getDb(), tableName.getTbl());
+    long lockId = -1;
+    if (tbl != null && !(tbl instanceof IncompleteTable) &&
+        AcidUtils.isTransactionalTable(tbl.getMetaStoreTable().getParameters())) {
+      HeartbeatContext ctx = new HeartbeatContext(
+          String.format("Drop table/view %s.%s", tableName.getDb(), tableName.getTbl()),
+          System.nanoTime());
+      lockId = catalog_.lockTable(tableName.getDb(), tableName.getTbl(), ctx);
+    }
+
+    try {
+      dropTableOrViewInternal(params, tableName, resp);
+    } finally {
+      if (lockId > 0) catalog_.releaseTableLock(lockId);
+    }
+  }
+
+  /**
+   * Helper function for dropTableOrView().
+   */
+  private void dropTableOrViewInternal(TDropTableOrViewParams params,
+      TableName tableName, TDdlExecResponse resp) throws ImpalaException {
     TCatalogObject removedObject = new TCatalogObject();
     synchronized (metastoreDdlLock_) {
       Db db = catalog_.getDb(params.getTable_name().db_name);
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index 72c8764..e09164b 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -103,6 +103,8 @@ import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.common.NotImplementedException;
 import org.apache.impala.common.TransactionException;
+import org.apache.impala.common.TransactionKeepalive;
+import org.apache.impala.common.TransactionKeepalive.HeartbeatContext;
 import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.hooks.QueryCompleteContext;
 import org.apache.impala.hooks.QueryEventHook;
@@ -188,12 +190,6 @@ public class Frontend {
   private static final int INCONSISTENT_METADATA_NUM_RETRIES =
       BackendConfig.INSTANCE.getLocalCatalogMaxFetchRetries();
 
-  // Number of retries to acquire an HMS ACID lock.
-  private static final int LOCK_RETRIES = 10;
-
-  // Time interval between retries of acquiring an HMS ACID lock
-  private static final int LOCK_RETRY_WAIT_SECONDS = 3;
-
   /**
    * Plan-time context that allows capturing various artifacts created
    * during the process.
@@ -1675,7 +1671,8 @@ public class Frontend {
     try (MetaStoreClient client = metaStoreClientPool_.getClient()) {
       IMetaStoreClient hmsClient = client.getHiveClient();
       long transactionId = MetastoreShim.openTransaction(hmsClient, "Impala");
-      transactionKeepalive_.addTransaction(transactionId, queryCtx);
+      HeartbeatContext ctx = new HeartbeatContext(queryCtx, System.nanoTime());
+      transactionKeepalive_.addTransaction(transactionId, ctx);
       return transactionId;
     }
   }
@@ -1763,8 +1760,7 @@ public class Frontend {
     }
     try (MetaStoreClient client = metaStoreClientPool_.getClient()) {
       IMetaStoreClient hmsClient = client.getHiveClient();
-      MetastoreShim.acquireLock(hmsClient, txnId, lockComponents, LOCK_RETRIES,
-          LOCK_RETRY_WAIT_SECONDS);
+      MetastoreShim.acquireLock(hmsClient, txnId, lockComponents);
     }
   }
 }
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
index 57803f1..85a977f 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
@@ -583,8 +583,7 @@ public class AnalyzerTest extends FrontendTestBase {
     AnalysisError(
         "drop table functional_orc_def.full_transactional_table",
         insertOnlyErrorForFullMsg);
-    AnalysisError("drop table functional.insert_only_transactional_table",
-        insertOnlyErrorMsg);
+    AnalyzesOk("drop table functional.insert_only_transactional_table");
 
     AnalysisError(
         "truncate table functional_orc_def.full_transactional_table",
diff --git a/testdata/workloads/functional-query/queries/QueryTest/acid.test b/testdata/workloads/functional-query/queries/QueryTest/acid.test
index dac75f4..2939685 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/acid.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/acid.test
@@ -1,5 +1,6 @@
 ====
 ---- HIVE_QUERY
+# Create a table with Hive and run insert, select, and drop from Impala on it.
 use $DATABASE;
 create table tt (x int) tblproperties (
   'transactional'='true',
@@ -14,6 +15,7 @@ select * from tt
 1
 ====
 ---- HIVE_QUERY
+# Insert from Hive to test refresh table from Impala in the below test.
 use $DATABASE;
 insert into tt values (2);
 ====
@@ -32,8 +34,7 @@ select * from tt order by x;
 1
 2
 ====
----- HIVE_QUERY
-use $DATABASE;
+---- QUERY
 insert overwrite table tt values (3);
 insert into tt values (4);
 ====
@@ -44,8 +45,7 @@ select * from tt order by x;
 3
 4
 ====
----- HIVE_QUERY
-use $DATABASE;
+---- QUERY
 create table upgraded_table (x int);
 insert into upgraded_table values (1);
 # Upgrade to the table to insert only acid when there are already values in it.
@@ -55,10 +55,34 @@ insert into upgraded_table values (2);
 insert into upgraded_table values (3);
 ====
 ---- QUERY
-invalidate metadata upgraded_table;
 select * from upgraded_table;
 ---- RESULTS
 1
 2
 3
 ====
+---- QUERY
+drop table tt;
+show tables;
+---- RESULTS
+'upgraded_table'
+====
+---- QUERY
+# After dropping the table I re-create and drop it again to check that all the locks
+# are released properly from HMS.
+create table tt (x int) tblproperties (
+  'transactional'='true',
+  'transactional_properties'='insert_only');
+====
+---- QUERY
+show tables;
+---- RESULTS
+'upgraded_table'
+'tt'
+====
+---- QUERY
+drop table tt;
+show tables;
+---- RESULTS
+'upgraded_table'
+====
diff --git a/tests/metadata/test_hms_integration.py b/tests/metadata/test_hms_integration.py
index 9b1fd04..8df23cb 100644
--- a/tests/metadata/test_hms_integration.py
+++ b/tests/metadata/test_hms_integration.py
@@ -726,6 +726,24 @@ class TestHmsIntegration(ImpalaTestSuite):
     assert '3,30' == hive_result[3]
     assert '4,41' == hive_result[4]
 
+  @SkipIfHive2.acid
+  def test_drop_acid_table(self, vector, unique_database):
+    """
+    Tests that a transactional table dropped by Impala is also dropped if we check from
+    Hive.
+    """
+    table_name = "%s.acid_insert" % unique_database
+    self.client.execute(
+      "create table %s (i int) "
+      "TBLPROPERTIES('transactional'='true', "
+      "'transactional_properties'='insert_only')" % table_name)
+    show_tables_result = self.run_stmt_in_hive("show tables in %s" % unique_database)
+    assert "acid_insert" in show_tables_result
+    self.client.execute("drop table %s" % table_name)
+    show_tables_result_after_drop = self.run_stmt_in_hive(
+        "show tables in %s" % unique_database)
+    assert "acid_insert" not in show_tables_result_after_drop
+
   @pytest.mark.execute_serially
   def test_change_table_name(self, vector):
     """
diff --git a/tests/query_test/test_acid.py b/tests/query_test/test_acid.py
index 6ac72dd..8b3ef75 100644
--- a/tests/query_test/test_acid.py
+++ b/tests/query_test/test_acid.py
@@ -46,7 +46,7 @@ class TestAcid(ImpalaTestSuite):
   @SkipIfADLS.hive
   @SkipIfIsilon.hive
   @SkipIfLocal.hive
-  def test_acid(self, vector, unique_database):
+  def test_acid_basic(self, vector, unique_database):
     self.run_test_case('QueryTest/acid', vector, use_db=unique_database)
 
   @SkipIfHive2.acid