You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/13 18:37:18 UTC
[impala] 02/02: IMPALA-8752: Added Jaro-Winkler edit distance and
similarity built-in function
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8db7f27ddde226f3efd3bddcc00665d0d9b99ef0
Author: luksan47 <no...@gmail.com>
AuthorDate: Wed Jul 17 02:17:20 2019 -0700
IMPALA-8752: Added Jaro-Winkler edit distance and similarity built-in function
The added functions return the Jaro/Jaro-Winkler similarity/distance
of two strings. The algorithm calcuates the Jaro-Similarity of the
strings, then adds more weight to the result if there are
common prefixes. (Jaro-Winkler)
For more detail, see:
https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
Extended the algorithm with another optional parameter: boost threshold
The prefix weight will only be applied if the Jaro-similarity
exceeds the given threshold. By default, its value is 0.7.
The new built-in functions are:
* jaro_distance, jaro_dst
* jaro_similarity, jaro_sim
* jaro_winkler_distance, jw_dst
* jaro_winkler_similarity, jw_sim
Testing:
* Added unit tests to expr-test.cc
* Manual testing over 1400 word pairs from
http://marvin.cs.uidaho.edu/misspell.html
Results match Apache commons
Change-Id: I64d7f461516c5e66cc27d62612bc8cc0e8f0178c
Reviewed-on: http://gerrit.cloudera.org:8080/13870
Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exprs/expr-test.cc | 108 +++++++++++++++++
be/src/exprs/string-functions-ir.cc | 173 +++++++++++++++++++++++++++
be/src/exprs/string-functions.h | 26 ++++
common/function-registry/impala_functions.py | 16 +++
4 files changed, 323 insertions(+)
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 9cb4899..be80c2b 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -4013,6 +4013,114 @@ TEST_P(ExprTest, StringFunctions) {
TestErrorString("le_dst(repeat('x', 256), 'z')",
"levenshtein argument exceeds maximum length of 255 characters\n");
+ for (const string fn_name: { "jaro_dst", "jaro_distance" }) {
+ TestIsNull(fn_name + "('foo', NULL)", TYPE_DOUBLE);
+ TestIsNull(fn_name + "(NULL, 'foo')", TYPE_DOUBLE);
+ TestIsNull(fn_name + "(NULL, NULL)", TYPE_DOUBLE);
+ TestValue(fn_name + "('foo', 'foo')", TYPE_DOUBLE, 0.0);
+ TestValue(fn_name + "('foo', 'bar')", TYPE_DOUBLE, 1.0);
+ TestValue(fn_name + "('', '')", TYPE_DOUBLE, 0.0);
+ TestValue(fn_name + "('', 'jaro')", TYPE_DOUBLE, 1.0);
+ TestValue(fn_name + "('jaro', '')", TYPE_DOUBLE, 1.0);
+ TestValue(fn_name + "('crate', 'trace')", TYPE_DOUBLE, 0.2666666666666666);
+ TestValue(fn_name + "('dwayne', 'duane')", TYPE_DOUBLE, 0.1777777777777778);
+ TestValue(fn_name + "('martha', 'marhta')", TYPE_DOUBLE, 0.05555555555555558);
+ TestValue(fn_name + "('frog', 'fog')", TYPE_DOUBLE, 0.08333333333333337);
+ TestValue(fn_name + "('hello', 'haloa')", TYPE_DOUBLE, 0.2666666666666666);
+ TestValue(fn_name + "('atcg', 'tagc')", TYPE_DOUBLE, 0.1666666666666667);
+ TestErrorString(fn_name + "('z', repeat('x', 256))",
+ "jaro argument exceeds maximum length of 255 characters\n");
+ TestErrorString(fn_name + "(repeat('x', 256), 'z')",
+ "jaro argument exceeds maximum length of 255 characters\n");
+ }
+
+ for (const string fn_name: { "jaro_sim", "jaro_similarity" }) {
+ TestIsNull(fn_name + "('foo', NULL)", TYPE_DOUBLE);
+ TestIsNull(fn_name + "(NULL, 'foo')", TYPE_DOUBLE);
+ TestIsNull(fn_name + "(NULL, NULL)", TYPE_DOUBLE);
+ TestValue(fn_name + "('foo', 'foo')", TYPE_DOUBLE, 1.0);
+ TestValue(fn_name + "('foo', 'bar')", TYPE_DOUBLE, 0.0);
+ TestValue(fn_name + "('', '')", TYPE_DOUBLE, 1.0);
+ TestValue(fn_name + "('', 'jaro')", TYPE_DOUBLE, 0.0);
+ TestValue(fn_name + "('jaro', '')", TYPE_DOUBLE, 0.0);
+ TestValue(fn_name + "('crate', 'trace')", TYPE_DOUBLE, 0.7333333333333334);
+ TestValue(fn_name + "('dwayne', 'duane')", TYPE_DOUBLE, 0.82222222222222222);
+ TestValue(fn_name + "('martha', 'marhta')", TYPE_DOUBLE, 0.944444444444444444);
+ TestValue(fn_name + "('frog', 'fog')", TYPE_DOUBLE, 0.9166666666666666);
+ TestValue(fn_name + "('hello', 'haloa')", TYPE_DOUBLE, 0.73333333333333334);
+ TestValue(fn_name + "('atcg', 'tagc')", TYPE_DOUBLE, 0.8333333333333333);
+ TestErrorString(fn_name + "('z', repeat('x', 256))",
+ "jaro argument exceeds maximum length of 255 characters\n");
+ TestErrorString(fn_name + "(repeat('x', 256), 'z')",
+ "jaro argument exceeds maximum length of 255 characters\n");
+ }
+
+ for (const string fn_name: { "jaro_winkler_distance", "jw_dst" }) {
+ TestIsNull(fn_name + "('foo', NULL)", TYPE_DOUBLE);
+ TestIsNull(fn_name + "(NULL, 'foo')", TYPE_DOUBLE);
+ TestIsNull(fn_name + "(NULL, NULL)", TYPE_DOUBLE);
+ TestValue(fn_name + "('foo', 'foo')", TYPE_DOUBLE, 0.0);
+ TestValue(fn_name + "('foo', 'bar')", TYPE_DOUBLE, 1.0);
+ TestValue(fn_name + "('', '')", TYPE_DOUBLE, 0.0);
+ TestValue(fn_name + "('', 'jaro')", TYPE_DOUBLE, 1.0);
+ TestValue(fn_name + "('jaro', '')", TYPE_DOUBLE, 1.0);
+ TestValue(fn_name + "('crate', 'trace')", TYPE_DOUBLE, 0.2666666666666666);
+ TestValue(fn_name + "('crate', 'trace', 0.2)", TYPE_DOUBLE, 0.2666666666666666);
+ TestValue(fn_name + "('dwayne', 'duane')", TYPE_DOUBLE, 0.16);
+ TestValue(fn_name + "('martha', 'marhta', 0.0)", TYPE_DOUBLE, 0.05555555555555558);
+ TestValue(fn_name + "('martha', 'marhta')", TYPE_DOUBLE, 0.03888888888888886);
+ TestValue(fn_name + "('martha', 'marhta', 0.2)", TYPE_DOUBLE, 0.02222222222222225);
+ TestValue(fn_name + "('atcg', 'tagc')", TYPE_DOUBLE, 0.1666666666666667);
+ TestValue(fn_name + "('martha', 'marhta', 0.1, 0.99)", TYPE_DOUBLE,
+ 0.05555555555555558);
+ TestValue(fn_name + "('dwayne', 'duane', 0.1, 0.9)", TYPE_DOUBLE, 0.1777777777777778);
+ TestErrorString(fn_name + "('z', repeat('x', 256))",
+ "jaro-winkler argument exceeds maximum length of 255 characters\n");
+ TestErrorString(fn_name + "(repeat('x', 256), 'z')",
+ "jaro-winkler argument exceeds maximum length of 255 characters\n");
+ TestErrorString(fn_name + "('foo', 'bar', 0.26)",
+ "jaro-winkler scaling factor values can range between 0.0 and 0.25\n");
+ TestErrorString(fn_name + "('foo', 'bar', -0.01)",
+ "jaro-winkler scaling factor values can range between 0.0 and 0.25\n");
+ TestErrorString(fn_name + "('foo', 'bar', 0.1, -0.01)",
+ "jaro-winkler boost threshold values can range between 0.0 and 1.0\n");
+ TestErrorString(fn_name + "('foo', 'bar', 0.1, 1.01)",
+ "jaro-winkler boost threshold values can range between 0.0 and 1.0\n");
+ }
+ for (const string fn_name: { "jaro_winkler_similarity", "jw_sim"}) {
+ TestIsNull(fn_name + "('foo', NULL)", TYPE_DOUBLE);
+ TestIsNull(fn_name + "(NULL, 'foo')", TYPE_DOUBLE);
+ TestIsNull(fn_name + "(NULL, NULL)", TYPE_DOUBLE);
+ TestValue(fn_name + "('foo', 'foo')", TYPE_DOUBLE, 1.0);
+ TestValue(fn_name + "('foo', 'bar')", TYPE_DOUBLE, 0.0);
+ TestValue(fn_name + "('', '')", TYPE_DOUBLE, 1.0);
+ TestValue(fn_name + "('', 'jaro')", TYPE_DOUBLE, 0.0);
+ TestValue(fn_name + "('jaro', '')", TYPE_DOUBLE, 0.0);
+ TestValue(fn_name + "('crate', 'trace')", TYPE_DOUBLE, 0.7333333333333334);
+ TestValue(fn_name + "('crate', 'trace', 0.2)", TYPE_DOUBLE, 0.7333333333333334);
+ TestValue(fn_name + "('dwayne', 'duane')", TYPE_DOUBLE, 0.84);
+ TestValue(fn_name + "('martha', 'marhta', 0.0)", TYPE_DOUBLE, 0.94444444444444442);
+ TestValue(fn_name + "('martha', 'marhta', 0.1)", TYPE_DOUBLE, 0.96111111111111111);
+ TestValue(fn_name + "('martha', 'marhta', 0.2)", TYPE_DOUBLE, 0.97777777777777777);
+ TestValue(fn_name + "('atcg', 'tagc')", TYPE_DOUBLE, 0.8333333333333333);;
+ TestValue(fn_name + "('martha', 'marhta', 0.1, 0.99)", TYPE_DOUBLE,
+ 0.94444444444444442);
+ TestValue(fn_name + "('dwayne', 'duane', 0.1, 0.9)", TYPE_DOUBLE,
+ 0.82222222222222222);
+ TestErrorString(fn_name + "('z', repeat('x', 256))",
+ "jaro-winkler argument exceeds maximum length of 255 characters\n");
+ TestErrorString(fn_name + "(repeat('x', 256), 'z')",
+ "jaro-winkler argument exceeds maximum length of 255 characters\n");
+ TestErrorString(fn_name + "('foo', 'bar', 0.26)",
+ "jaro-winkler scaling factor values can range between 0.0 and 0.25\n");
+ TestErrorString(fn_name + "('foo', 'bar', -0.01)",
+ "jaro-winkler scaling factor values can range between 0.0 and 0.25\n");
+ TestErrorString(fn_name + "('foo', 'bar', 0.1, -0.01)",
+ "jaro-winkler boost threshold values can range between 0.0 and 1.0\n");
+ TestErrorString(fn_name + "('foo', 'bar', 0.1, 1.01)",
+ "jaro-winkler boost threshold values can range between 0.0 and 1.0\n");
+ }
+
TestStringValue("substring('Hello', 1)", "Hello");
TestStringValue("substring('Hello', -2)", "lo");
TestStringValue("substring('Hello', cast(0 as bigint))", "");
diff --git a/be/src/exprs/string-functions-ir.cc b/be/src/exprs/string-functions-ir.cc
index 67b7ce1..5606fcb 100644
--- a/be/src/exprs/string-functions-ir.cc
+++ b/be/src/exprs/string-functions-ir.cc
@@ -1167,4 +1167,177 @@ IntVal StringFunctions::Levenshtein(
return IntVal(result);
}
+
+// Based on https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
+// Implements Jaro similarity
+DoubleVal StringFunctions::JaroSimilarity(
+ FunctionContext* ctx, const StringVal& s1, const StringVal& s2) {
+
+ int s1len = s1.len;
+ int s2len = s2.len;
+
+ // error if either input exceeds 255 characters
+ if (s1len > 255 || s2len > 255) {
+ ctx->SetError("jaro argument exceeds maximum length of 255 characters");
+ return DoubleVal(-1.0);
+ }
+
+ // short cut cases:
+ // - null strings
+ // - zero length strings
+ // - identical length and value strings
+ if (s1.is_null || s2.is_null) return DoubleVal::null();
+ if (s1len == 0 && s2len == 0) return DoubleVal(1.0);
+ if (s1len == 0 || s2len == 0) return DoubleVal(0.0);
+ if (s1len == s2len && memcmp(s1.ptr, s2.ptr, s1len) == 0) return DoubleVal(1.0);
+
+ // the window size to search for matches in the other string
+ int max_range = std::max(0, std::max(s1len, s2len) / 2 - 1);
+
+ int s1_matching[s1len];
+ int s2_matching[s2len];
+ std::fill_n(s1_matching, s1len, -1);
+ std::fill_n(s2_matching, s2len, -1);
+
+ // calculate matching characters
+ int matching_characters = 0;
+ for (int i = 0; i < s1len; i++) {
+ // matching window
+ int min_index = std::max(i - max_range, 0);
+ int max_index = std::min(i + max_range + 1, s2len);
+ if (min_index >= max_index) break;
+
+ for (int j = min_index; j < max_index; j++) {
+ if (s2_matching[j] == -1 && s1.ptr[i] == s2.ptr[j]) {
+ s1_matching[i] = i;
+ s2_matching[j] = j;
+ matching_characters++;
+ break;
+ }
+ }
+ }
+
+ if (matching_characters == 0) return DoubleVal(0.0);
+
+ // transpositions (one-way only)
+ double transpositions = 0.0;
+ for (int i = 0, s1i = 0, s2i = 0; i < matching_characters; i++) {
+ while (s1_matching[s1i] == -1) {
+ s1i++;
+ }
+ while (s2_matching[s2i] == -1) {
+ s2i++;
+ }
+ if (s1.ptr[s1i] != s2.ptr[s2i]) {
+ transpositions += 0.5;
+ }
+ s1i++;
+ s2i++;
+ }
+ double m = static_cast<double>(matching_characters);
+ double jaro_similarity = 1.0 / 3.0 * ( m / static_cast<double>(s1len)
+ + m / static_cast<double>(s2len)
+ + (m - transpositions) / m );
+
+ return DoubleVal(jaro_similarity);
+}
+
+DoubleVal StringFunctions::JaroDistance(
+ FunctionContext* ctx, const StringVal& s1, const StringVal& s2) {
+
+ DoubleVal jaro_similarity = StringFunctions::JaroSimilarity(ctx, s1, s2);
+ if (jaro_similarity.is_null) return DoubleVal::null();
+ if (jaro_similarity.val == -1.0) return DoubleVal(-1.0);
+ return DoubleVal(1.0 - jaro_similarity.val);
+}
+
+DoubleVal StringFunctions::JaroWinklerDistance(FunctionContext* ctx,
+ const StringVal& s1, const StringVal& s2) {
+ return StringFunctions::JaroWinklerDistance(ctx, s1, s2,
+ DoubleVal(0.1), DoubleVal(0.7));
+}
+
+DoubleVal StringFunctions::JaroWinklerDistance(FunctionContext* ctx,
+ const StringVal& s1, const StringVal& s2,
+ const DoubleVal& scaling_factor) {
+ return StringFunctions::JaroWinklerDistance(ctx, s1, s2,
+ scaling_factor, DoubleVal(0.7));
+}
+
+// Based on https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
+// Implements Jaro-Winkler distance
+// Extended with boost_theshold: Winkler's modification only applies if Jaro exceeds it
+DoubleVal StringFunctions::JaroWinklerDistance(FunctionContext* ctx,
+ const StringVal& s1, const StringVal& s2,
+ const DoubleVal& scaling_factor, const DoubleVal& boost_threshold) {
+
+ DoubleVal jaro_winkler_similarity = StringFunctions::JaroWinklerSimilarity(
+ ctx, s1, s2, scaling_factor, boost_threshold);
+
+ if (jaro_winkler_similarity.is_null) return DoubleVal::null();
+ if (jaro_winkler_similarity.val == -1.0) return DoubleVal(-1.0);
+ return DoubleVal(1.0 - jaro_winkler_similarity.val);
+}
+
+DoubleVal StringFunctions::JaroWinklerSimilarity(FunctionContext* ctx,
+ const StringVal& s1, const StringVal& s2) {
+ return StringFunctions::JaroWinklerSimilarity(ctx, s1, s2,
+ DoubleVal(0.1), DoubleVal(0.7));
+}
+
+DoubleVal StringFunctions::JaroWinklerSimilarity(FunctionContext* ctx,
+ const StringVal& s1, const StringVal& s2,
+ const DoubleVal& scaling_factor) {
+ return StringFunctions::JaroWinklerSimilarity(ctx, s1, s2,
+ scaling_factor, DoubleVal(0.7));
+}
+
+// Based on https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
+// Implements Jaro-Winkler similarity
+// Extended with boost_theshold: Winkler's modification only applies if Jaro exceeds it
+DoubleVal StringFunctions::JaroWinklerSimilarity(FunctionContext* ctx,
+ const StringVal& s1, const StringVal& s2,
+ const DoubleVal& scaling_factor, const DoubleVal& boost_threshold) {
+
+ constexpr int MAX_PREFIX_LENGTH = 4;
+ int s1len = s1.len;
+ int s2len = s2.len;
+
+ // error if either input exceeds 255 characters
+ if (s1len > 255 || s2len > 255) {
+ ctx->SetError("jaro-winkler argument exceeds maximum length of 255 characters");
+ return DoubleVal(-1.0);
+ }
+ // scaling factor has to be between 0.0 and 0.25
+ if (scaling_factor.val < 0.0 || scaling_factor.val > 0.25) {
+ ctx->SetError("jaro-winkler scaling factor values can range between 0.0 and 0.25");
+ return DoubleVal(-1.0);
+ }
+ // error if boost threshold is out of range 0.0..1.0
+ if (boost_threshold.val < 0.0 || boost_threshold.val > 1.0) {
+ ctx->SetError("jaro-winkler boost threshold values can range between 0.0 and 1.0");
+ return DoubleVal(-1.0);
+ }
+
+ if (s1.is_null || s2.is_null) return DoubleVal::null();
+
+ DoubleVal jaro_similarity = StringFunctions::JaroSimilarity(ctx, s1, s2);
+ if (jaro_similarity.is_null) return DoubleVal::null();
+ if (jaro_similarity.val == -1.0) return DoubleVal(-1.0);
+
+ double jaro_winkler_similarity = jaro_similarity.val;
+
+ if (jaro_similarity.val > boost_threshold.val) {
+ int common_length = std::min(MAX_PREFIX_LENGTH, std::min(s1len, s2len));
+ int common_prefix = 0;
+ while (common_prefix < common_length &&
+ s1.ptr[common_prefix] == s2.ptr[common_prefix]) {
+ common_prefix++;
+ }
+
+ jaro_winkler_similarity += common_prefix * scaling_factor.val *
+ (1.0 - jaro_similarity.val);
+ }
+ return DoubleVal(jaro_winkler_similarity);
+}
}
diff --git a/be/src/exprs/string-functions.h b/be/src/exprs/string-functions.h
index 84ee595..8386461 100644
--- a/be/src/exprs/string-functions.h
+++ b/be/src/exprs/string-functions.h
@@ -158,6 +158,32 @@ class StringFunctions {
static IntVal Levenshtein(
FunctionContext* context, const StringVal& s1, const StringVal& s2);
+ static DoubleVal JaroDistance(
+ FunctionContext* ctx, const StringVal& s1, const StringVal& s2);
+
+ static DoubleVal JaroSimilarity(
+ FunctionContext* ctx, const StringVal& s1, const StringVal& s2);
+
+ static DoubleVal JaroWinklerDistance(FunctionContext* ctx, const StringVal& s1,
+ const StringVal& s2);
+
+ static DoubleVal JaroWinklerDistance(FunctionContext* ctx, const StringVal& s1,
+ const StringVal& s2, const DoubleVal& scaling_factor);
+
+ static DoubleVal JaroWinklerDistance(FunctionContext* ctx, const StringVal& s1,
+ const StringVal& s2, const DoubleVal& scaling_factor,
+ const DoubleVal& boost_threshold);
+
+ static DoubleVal JaroWinklerSimilarity(FunctionContext* ctx, const StringVal& s1,
+ const StringVal& s2);
+
+ static DoubleVal JaroWinklerSimilarity(FunctionContext* ctx, const StringVal& s1,
+ const StringVal& s2, const DoubleVal& scaling_factor);
+
+ static DoubleVal JaroWinklerSimilarity(FunctionContext* ctx, const StringVal& s1,
+ const StringVal& s2, const DoubleVal& scaling_factor,
+ const DoubleVal& boost_threshold);
+
private:
/// Templatized implementation of the actual string trimming function.
/// The first parameter, 'D', is one of StringFunctions::TrimPosition values.
diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py
index 06bf8ce..d7d1ceb 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -586,6 +586,22 @@ visible_functions = [
'impala::StringFunctions::GetJsonObject'],
[['levenshtein', 'le_dst'], 'INT', ['STRING', 'STRING'],
'_ZN6impala15StringFunctions11LevenshteinEPN10impala_udf15FunctionContextERKNS1_9StringValES6_'],
+ [['jaro_distance', 'jaro_dst'], 'DOUBLE', ['STRING', 'STRING'],
+ '_ZN6impala15StringFunctions12JaroDistanceEPN10impala_udf15FunctionContextERKNS1_9StringValES6_'],
+ [['jaro_similarity', 'jaro_sim'], 'DOUBLE', ['STRING', 'STRING'],
+ '_ZN6impala15StringFunctions14JaroSimilarityEPN10impala_udf15FunctionContextERKNS1_9StringValES6_'],
+ [['jaro_winkler_distance', 'jw_dst'], 'DOUBLE', ['STRING', 'STRING'],
+ '_ZN6impala15StringFunctions19JaroWinklerDistanceEPN10impala_udf15FunctionContextERKNS1_9StringValES6_'],
+ [['jaro_winkler_distance', 'jw_dst'], 'DOUBLE', ['STRING', 'STRING', 'DOUBLE'],
+ '_ZN6impala15StringFunctions19JaroWinklerDistanceEPN10impala_udf15FunctionContextERKNS1_9StringValES6_RKNS1_9DoubleValE'],
+ [['jaro_winkler_distance', 'jw_dst'], 'DOUBLE', ['STRING', 'STRING', 'DOUBLE', 'DOUBLE'],
+ '_ZN6impala15StringFunctions19JaroWinklerDistanceEPN10impala_udf15FunctionContextERKNS1_9StringValES6_RKNS1_9DoubleValES9_'],
+ [['jaro_winkler_similarity', 'jw_sim'], 'DOUBLE', ['STRING', 'STRING'],
+ '_ZN6impala15StringFunctions21JaroWinklerSimilarityEPN10impala_udf15FunctionContextERKNS1_9StringValES6_'],
+ [['jaro_winkler_similarity', 'jw_sim'], 'DOUBLE', ['STRING', 'STRING', 'DOUBLE'],
+ '_ZN6impala15StringFunctions21JaroWinklerSimilarityEPN10impala_udf15FunctionContextERKNS1_9StringValES6_RKNS1_9DoubleValE'],
+ [['jaro_winkler_similarity', 'jw_sim'], 'DOUBLE', ['STRING', 'STRING', 'DOUBLE', 'DOUBLE'],
+ '_ZN6impala15StringFunctions21JaroWinklerSimilarityEPN10impala_udf15FunctionContextERKNS1_9StringValES6_RKNS1_9DoubleValES9_'],
# Conditional Functions
# Some of these have empty symbols because the BE special-cases them based on the