You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/13 23:14:23 UTC
[impala] branch master updated: IMPALA-8187: UDF samples hide
symbols by default
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 55b9c89 IMPALA-8187: UDF samples hide symbols by default
55b9c89 is described below
commit 55b9c899b1c1f5d0e6d7fde5eff677bb015c2e54
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Mon Feb 11 15:02:31 2019 -0800
IMPALA-8187: UDF samples hide symbols by default
Testing:
Ran UDF tests, confirmed that changing the compile flags without adding
IMPALA_UDF_EXPORT caused them to fail because they couldn't find
the symbols.
Added a test to check that symbols are actually hidden.
Change-Id: Ie17b74b9bce9cc6962393017879b65409ce23b28
Reviewed-on: http://gerrit.cloudera.org:8080/12451
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/CMakeLists.txt | 10 +++++-
be/src/testutil/CMakeLists.txt | 2 ++
be/src/testutil/test-udas.cc | 57 +++++++++++++++++++++++++++++
be/src/testutil/test-udfs.cc | 67 ++++++++++++++++++++++++++++++++++-
be/src/udf/udf.h | 6 ++++
be/src/udf_samples/CMakeLists.txt | 9 ++++-
be/src/udf_samples/hyperloglog-uda.cc | 5 +++
be/src/udf_samples/uda-sample.cc | 15 ++++++++
be/src/udf_samples/udf-sample.cc | 1 +
tests/query_test/test_udfs.py | 19 ++++++++++
10 files changed, 188 insertions(+), 3 deletions(-)
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index ad163f2..0405b5c 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -256,6 +256,13 @@ endif()
# -inline: inline with low threshold to get rid of trivial accessor functions.
set(LLVM_OPT_IR_FLAGS "-inline" "-inlinehint-threshold=10" "-inline-threshold=10")
+# Additional compile flags that will hide symbols by default, e.g. for building
+# UDFs. We have both a concatenated string version and a list version for convenience,
+# depending on what is needed in the context.
+set(HIDE_SYMBOLS "-fvisibility=hidden -fvisibility-inlines-hidden")
+set(HIDE_SYMBOLS_ARGS "${HIDE_SYMBOLS_STRING}")
+separate_arguments(HIDE_SYMBOLS_ARGS)
+
# setup doc generation with Doxygen
find_package(Doxygen)
if (DOXYGEN_FOUND)
@@ -650,7 +657,8 @@ function(COMPILE_TO_IR SRC_FILE)
set(OUTPUT_FILE "${LIBRARY_OUTPUT_PATH}/${BASE_NAME}.ll")
add_custom_command(
OUTPUT ${OUTPUT_FILE}
- COMMAND ${LLVM_CLANG_EXECUTABLE} ${CLANG_IR_CXX_FLAGS} ${CLANG_INCLUDE_FLAGS} ${SRC_FILE} -o ${OUTPUT_FILE}
+ COMMAND ${LLVM_CLANG_EXECUTABLE} ${CLANG_IR_CXX_FLAGS} ${HIDE_SYMBOLS_ARGS}
+ ${CLANG_INCLUDE_FLAGS} ${SRC_FILE} -o ${OUTPUT_FILE}
DEPENDS ${SRC_FILE})
add_custom_target(${BASE_NAME}-ir ALL DEPENDS ${OUTPUT_FILE})
endfunction(COMPILE_TO_IR)
diff --git a/be/src/testutil/CMakeLists.txt b/be/src/testutil/CMakeLists.txt
index d95c914..828cf6a 100644
--- a/be/src/testutil/CMakeLists.txt
+++ b/be/src/testutil/CMakeLists.txt
@@ -37,10 +37,12 @@ add_dependencies(TestUtil gen-deps)
target_link_libraries(TestUtil security-test-for-impala)
add_library(TestUdfs SHARED test-udfs.cc)
+set_target_properties(TestUdfs PROPERTIES COMPILE_FLAGS "${HIDE_SYMBOLS}")
add_dependencies(TestUdfs gen-deps)
COMPILE_TO_IR(test-udfs.cc)
add_dependencies(test-udfs-ir gen-deps)
add_library(TestUdas SHARED test-udas.cc)
+set_target_properties(TestUdas PROPERTIES COMPILE_FLAGS "${HIDE_SYMBOLS}")
add_dependencies(TestUdas gen-deps)
diff --git a/be/src/testutil/test-udas.cc b/be/src/testutil/test-udas.cc
index c77ae9f..c0b0552 100644
--- a/be/src/testutil/test-udas.cc
+++ b/be/src/testutil/test-udas.cc
@@ -24,38 +24,59 @@
using namespace impala_udf;
+IMPALA_UDF_EXPORT
void TwoArgInit(FunctionContext*, IntVal*) {}
+IMPALA_UDF_EXPORT
void TwoArgUpdate(FunctionContext*, const IntVal&, const StringVal&, IntVal*) {}
+IMPALA_UDF_EXPORT
void TwoArgMerge(FunctionContext*, const IntVal&, IntVal*) {}
+IMPALA_UDF_EXPORT
void VarArgInit(FunctionContext*, IntVal*) {}
+IMPALA_UDF_EXPORT
void VarArgUpdate(FunctionContext*, const DoubleVal&, int, const StringVal*, IntVal*) {}
+IMPALA_UDF_EXPORT
void VarArgMerge(FunctionContext*, const IntVal&, IntVal*) {}
// Defines Agg(<some args>) returns int
+IMPALA_UDF_EXPORT
void AggUpdate(FunctionContext*, const IntVal&, IntVal*) {}
+IMPALA_UDF_EXPORT
void AggUpdate(FunctionContext*, const IntVal&, const IntVal&, IntVal*) {}
// Update function intentionally not called *Update for FE testing.
+IMPALA_UDF_EXPORT
void AggFn(FunctionContext*, const IntVal&, IntVal*) {}
+IMPALA_UDF_EXPORT
void AggInit(FunctionContext*, IntVal*) {}
+IMPALA_UDF_EXPORT
void AggMerge(FunctionContext*, const IntVal&, IntVal*) {}
+IMPALA_UDF_EXPORT
IntVal AggSerialize(FunctionContext*, const IntVal& i) { return i; }
+IMPALA_UDF_EXPORT
IntVal AggFinalize(FunctionContext*, const IntVal&i) { return i; }
// Defines Agg(<some args>) returns string intermediate string
+IMPALA_UDF_EXPORT
void AggUpdate(FunctionContext*, const StringVal&, const DoubleVal&, StringVal*) {}
+IMPALA_UDF_EXPORT
void Agg2Update(FunctionContext*, const StringVal&, const DoubleVal&, StringVal*) {}
+IMPALA_UDF_EXPORT
void Agg(FunctionContext*, const StringVal&, const DoubleVal&, StringVal*) {}
+IMPALA_UDF_EXPORT
void AggInit(FunctionContext*, StringVal*){}
+IMPALA_UDF_EXPORT
void AggMerge(FunctionContext*, const StringVal&, StringVal*) {}
+IMPALA_UDF_EXPORT
StringVal AggSerialize(FunctionContext*, const StringVal& v) { return v;}
+IMPALA_UDF_EXPORT
StringVal AggFinalize(FunctionContext*, const StringVal& v) {
return v;
}
// Defines AggIntermediate(int) returns BIGINT intermediate STRING
+IMPALA_UDF_EXPORT
void AggIntermediate(FunctionContext* context, const IntVal&, StringVal*) {}
static void ValidateFunctionContext(const FunctionContext* context) {
assert(context->GetNumArgs() == 1);
@@ -63,15 +84,19 @@ static void ValidateFunctionContext(const FunctionContext* context) {
assert(context->GetIntermediateType().type == FunctionContext::TYPE_STRING);
assert(context->GetReturnType().type == FunctionContext::TYPE_BIGINT);
}
+IMPALA_UDF_EXPORT
void AggIntermediateUpdate(FunctionContext* context, const IntVal&, StringVal*) {
ValidateFunctionContext(context);
}
+IMPALA_UDF_EXPORT
void AggIntermediateInit(FunctionContext* context, StringVal*) {
ValidateFunctionContext(context);
}
+IMPALA_UDF_EXPORT
void AggIntermediateMerge(FunctionContext* context, const StringVal&, StringVal*) {
ValidateFunctionContext(context);
}
+IMPALA_UDF_EXPORT
BigIntVal AggIntermediateFinalize(FunctionContext* context, const StringVal&) {
ValidateFunctionContext(context);
return BigIntVal::null();
@@ -93,17 +118,21 @@ static void ValidateFunctionContext2(const FunctionContext* context) {
assert(context->GetReturnType().precision == 6);
assert(context->GetReturnType().scale == 5);
}
+IMPALA_UDF_EXPORT
void AggDecimalIntermediateUpdate(FunctionContext* context, const DecimalVal&,
const IntVal&, DecimalVal*) {
ValidateFunctionContext2(context);
}
+IMPALA_UDF_EXPORT
void AggDecimalIntermediateInit(FunctionContext* context, DecimalVal*) {
ValidateFunctionContext2(context);
}
+IMPALA_UDF_EXPORT
void AggDecimalIntermediateMerge(FunctionContext* context, const DecimalVal&,
DecimalVal*) {
ValidateFunctionContext2(context);
}
+IMPALA_UDF_EXPORT
DecimalVal AggDecimalIntermediateFinalize(FunctionContext* context, const DecimalVal&) {
ValidateFunctionContext2(context);
return DecimalVal::null();
@@ -124,16 +153,20 @@ static void ValidateFunctionContext3(const FunctionContext* context) {
assert(context->GetReturnType().precision == 20);
assert(context->GetReturnType().scale == 0);
}
+IMPALA_UDF_EXPORT
void AggStringIntermediateUpdate(FunctionContext* context, const DecimalVal&,
const BigIntVal&, const StringVal&, StringVal*) {
ValidateFunctionContext3(context);
}
+IMPALA_UDF_EXPORT
void AggStringIntermediateInit(FunctionContext* context, StringVal*) {
ValidateFunctionContext3(context);
}
+IMPALA_UDF_EXPORT
void AggStringIntermediateMerge(FunctionContext* context, const StringVal&, StringVal*) {
ValidateFunctionContext3(context);
}
+IMPALA_UDF_EXPORT
DecimalVal AggStringIntermediateFinalize(FunctionContext* context, const StringVal&) {
ValidateFunctionContext3(context);
return DecimalVal(100);
@@ -142,16 +175,19 @@ DecimalVal AggStringIntermediateFinalize(FunctionContext* context, const StringV
// Defines MemTest(bigint) return bigint
// "Allocates" the specified number of bytes in the update function and frees them in the
// serialize function. Useful for testing mem limits.
+IMPALA_UDF_EXPORT
void MemTestInit(FunctionContext*, BigIntVal* total) {
*total = BigIntVal(0);
}
+IMPALA_UDF_EXPORT
void MemTestUpdate(FunctionContext* context, const BigIntVal& bytes, BigIntVal* total) {
if (bytes.is_null) return;
context->TrackAllocation(bytes.val); // freed by serialize()
total->val += bytes.val;
}
+IMPALA_UDF_EXPORT
void MemTestMerge(FunctionContext* context, const BigIntVal& src, BigIntVal* dst) {
if (src.is_null) return;
context->TrackAllocation(src.val); // freed by finalize()
@@ -162,12 +198,14 @@ void MemTestMerge(FunctionContext* context, const BigIntVal& src, BigIntVal* dst
dst->val += src.val;
}
+IMPALA_UDF_EXPORT
BigIntVal MemTestSerialize(FunctionContext* context, const BigIntVal& total) {
if (total.is_null) return BigIntVal(0);
context->Free(total.val);
return total;
}
+IMPALA_UDF_EXPORT
BigIntVal MemTestFinalize(FunctionContext* context, const BigIntVal& total) {
if (total.is_null) return BigIntVal(0);
context->Free(total.val);
@@ -176,6 +214,7 @@ BigIntVal MemTestFinalize(FunctionContext* context, const BigIntVal& total) {
// Defines aggregate function for testing different intermediate/output types that
// computes the truncated bigint sum of many floats.
+IMPALA_UDF_EXPORT
void TruncSumInit(FunctionContext* context, DoubleVal* total) {
// Arg types should be logical input types of UDA.
assert(context->GetNumArgs() == 1);
@@ -187,6 +226,7 @@ void TruncSumInit(FunctionContext* context, DoubleVal* total) {
*total = DoubleVal(0);
}
+IMPALA_UDF_EXPORT
void TruncSumUpdate(FunctionContext* context, const DoubleVal& val, DoubleVal* total) {
// Arg types should be logical input types of UDA.
assert(context->GetNumArgs() == 1);
@@ -198,6 +238,7 @@ void TruncSumUpdate(FunctionContext* context, const DoubleVal& val, DoubleVal* t
total->val += val.val;
}
+IMPALA_UDF_EXPORT
void TruncSumMerge(FunctionContext* context, const DoubleVal& src, DoubleVal* dst) {
// Arg types should be logical input types of UDA.
assert(context->GetNumArgs() == 1);
@@ -209,6 +250,7 @@ void TruncSumMerge(FunctionContext* context, const DoubleVal& src, DoubleVal* ds
dst->val += src.val;
}
+IMPALA_UDF_EXPORT
const DoubleVal TruncSumSerialize(FunctionContext* context, const DoubleVal& total) {
// Arg types should be logical input types of UDA.
assert(context->GetNumArgs() == 1);
@@ -220,6 +262,7 @@ const DoubleVal TruncSumSerialize(FunctionContext* context, const DoubleVal& tot
return total;
}
+IMPALA_UDF_EXPORT
BigIntVal TruncSumFinalize(FunctionContext* context, const DoubleVal& total) {
// Arg types should be logical input types of UDA.
assert(context->GetNumArgs() == 1);
@@ -233,23 +276,28 @@ BigIntVal TruncSumFinalize(FunctionContext* context, const DoubleVal& total) {
// Defines aggregate function for testing constant argument handling. The UDA returns
// true if its second argument is constant for all calls to Update().
+IMPALA_UDF_EXPORT
void ArgIsConstInit(FunctionContext* context, BooleanVal* is_const) {
*is_const = BooleanVal(context->IsArgConstant(1));
}
+IMPALA_UDF_EXPORT
void ArgIsConstUpdate(FunctionContext* context, const IntVal& val,
const IntVal& const_arg, BooleanVal* is_const) {}
+IMPALA_UDF_EXPORT
void ArgIsConstMerge(FunctionContext* context, const BooleanVal& src, BooleanVal* dst) {
dst->val |= src.val;
}
// Defines aggregate function for testing NULL handling. Returns NULL if an even number
// of non-NULL inputs are consumed or 1 if an odd number of non-NULL inputs are consumed.
+IMPALA_UDF_EXPORT
void ToggleNullInit(FunctionContext* context, IntVal* total) {
*total = IntVal::null();
}
+IMPALA_UDF_EXPORT
void ToggleNullUpdate(FunctionContext* context, const IntVal& val, IntVal* total) {
if (total->is_null) {
*total = IntVal(1);
@@ -258,6 +306,7 @@ void ToggleNullUpdate(FunctionContext* context, const IntVal& val, IntVal* total
}
}
+IMPALA_UDF_EXPORT
void ToggleNullMerge(FunctionContext* context, const IntVal& src, IntVal* dst) {
if (src.is_null != dst->is_null) {
*dst = IntVal(1);
@@ -268,14 +317,17 @@ void ToggleNullMerge(FunctionContext* context, const IntVal& src, IntVal* dst) {
// Defines aggregate function for testing input NULL handling. Returns the number of NULL
// input values.
+IMPALA_UDF_EXPORT
void CountNullsInit(FunctionContext* context, BigIntVal* total) {
*total = BigIntVal(0);
}
+IMPALA_UDF_EXPORT
void CountNullsUpdate(FunctionContext* context, const BigIntVal& val, BigIntVal* total) {
if (val.is_null) ++total->val;
}
+IMPALA_UDF_EXPORT
void CountNullsMerge(FunctionContext* context, const BigIntVal& src, BigIntVal* dst) {
dst->val += src.val;
}
@@ -289,11 +341,13 @@ static void ValidateCharIntermediateFunctionContext(const FunctionContext* conte
assert(context->GetIntermediateType().len == 10);
assert(context->GetReturnType().type == FunctionContext::TYPE_INT);
}
+IMPALA_UDF_EXPORT
void AggCharIntermediateInit(FunctionContext* context, StringVal* dst) {
ValidateCharIntermediateFunctionContext(context);
assert(dst->len == 10);
memset(dst->ptr, 0, 10);
}
+IMPALA_UDF_EXPORT
void AggCharIntermediateUpdate(
FunctionContext* context, const IntVal& val, StringVal* dst) {
ValidateCharIntermediateFunctionContext(context);
@@ -301,15 +355,18 @@ void AggCharIntermediateUpdate(
int* dst_val = reinterpret_cast<int*>(dst->ptr);
if (!val.is_null) *dst_val += val.val;
}
+IMPALA_UDF_EXPORT
void AggCharIntermediateMerge(FunctionContext* context, const StringVal& src, StringVal* dst) {
ValidateCharIntermediateFunctionContext(context);
int* dst_val = reinterpret_cast<int*>(dst->ptr);
*dst_val += *reinterpret_cast<int*>(src.ptr);
}
+IMPALA_UDF_EXPORT
StringVal AggCharIntermediateSerialize(FunctionContext* context, const StringVal& in) {
ValidateCharIntermediateFunctionContext(context);
return in;
}
+IMPALA_UDF_EXPORT
IntVal AggCharIntermediateFinalize(FunctionContext* context, const StringVal& src) {
ValidateCharIntermediateFunctionContext(context);
return IntVal(*reinterpret_cast<int*>(src.ptr));
diff --git a/be/src/testutil/test-udfs.cc b/be/src/testutil/test-udfs.cc
index a539c87..8adf423 100644
--- a/be/src/testutil/test-udfs.cc
+++ b/be/src/testutil/test-udfs.cc
@@ -26,26 +26,37 @@ using namespace impala_udf;
// These functions are intended to test the "glue" that runs UDFs. Thus, the UDFs
// themselves are kept very simple.
+IMPALA_UDF_EXPORT
BooleanVal Identity(FunctionContext* context, const BooleanVal& arg) { return arg; }
+IMPALA_UDF_EXPORT
TinyIntVal Identity(FunctionContext* context, const TinyIntVal& arg) { return arg; }
+IMPALA_UDF_EXPORT
SmallIntVal Identity(FunctionContext* context, const SmallIntVal& arg) { return arg; }
+IMPALA_UDF_EXPORT
IntVal Identity(FunctionContext* context, const IntVal& arg) { return arg; }
+IMPALA_UDF_EXPORT
BigIntVal Identity(FunctionContext* context, const BigIntVal& arg) { return arg; }
+IMPALA_UDF_EXPORT
FloatVal Identity(FunctionContext* context, const FloatVal& arg) { return arg; }
+IMPALA_UDF_EXPORT
DoubleVal Identity(FunctionContext* context, const DoubleVal& arg) { return arg; }
+IMPALA_UDF_EXPORT
StringVal Identity(FunctionContext* context, const StringVal& arg) { return arg; }
+IMPALA_UDF_EXPORT
TimestampVal Identity(FunctionContext* context, const TimestampVal& arg) { return arg; }
+IMPALA_UDF_EXPORT
DecimalVal Identity(FunctionContext* context, const DecimalVal& arg) { return arg; }
+IMPALA_UDF_EXPORT
IntVal AllTypes(
FunctionContext* context, const StringVal& string, const BooleanVal& boolean,
const TinyIntVal& tiny_int, const SmallIntVal& small_int, const IntVal& int_val,
@@ -57,6 +68,7 @@ IntVal AllTypes(
return IntVal(result);
}
+IMPALA_UDF_EXPORT
StringVal NoArgs(FunctionContext* context) {
const char* result = "string";
StringVal ret(context, strlen(result));
@@ -64,6 +76,7 @@ StringVal NoArgs(FunctionContext* context) {
return ret;
}
+IMPALA_UDF_EXPORT
BooleanVal VarAnd(FunctionContext* context, int n, const BooleanVal* args) {
bool result = true;
for (int i = 0; i < n; ++i) {
@@ -73,6 +86,7 @@ BooleanVal VarAnd(FunctionContext* context, int n, const BooleanVal* args) {
return BooleanVal(result);
}
+IMPALA_UDF_EXPORT
IntVal VarSum(FunctionContext* context, int n, const IntVal* args) {
int result = 0;
bool is_null = true;
@@ -85,6 +99,7 @@ IntVal VarSum(FunctionContext* context, int n, const IntVal* args) {
return IntVal(result);
}
+IMPALA_UDF_EXPORT
DoubleVal VarSum(FunctionContext* context, int n, const DoubleVal* args) {
double result = 0;
bool is_null = true;
@@ -99,6 +114,7 @@ DoubleVal VarSum(FunctionContext* context, int n, const DoubleVal* args) {
// TODO: have this return a StringVal (make sure not to use functions defined in other
// compilation units, or change how this is built).
+IMPALA_UDF_EXPORT
IntVal VarSum(FunctionContext* context, int n, const StringVal* args) {
int total_len = 0;
for (int i = 0; i < n; ++i) {
@@ -109,6 +125,7 @@ IntVal VarSum(FunctionContext* context, int n, const StringVal* args) {
}
// Decimal4Value... => Decimal8Value
+IMPALA_UDF_EXPORT
DecimalVal VarSum(FunctionContext* context, int n, const DecimalVal* args) {
int64_t result = 0;
bool is_null = true;
@@ -126,6 +143,7 @@ DecimalVal VarSum(FunctionContext* context, int n, const DecimalVal* args) {
return DecimalVal(result);
}
+IMPALA_UDF_EXPORT
DoubleVal NO_INLINE VarSumMultiply(FunctionContext* context,
const DoubleVal& d, int n, const IntVal* args) {
if (d.is_null) return DoubleVal::null();
@@ -142,6 +160,7 @@ DoubleVal NO_INLINE VarSumMultiply(FunctionContext* context,
}
// Call the non-inlined function in the same module to make sure linking works correctly.
+IMPALA_UDF_EXPORT
DoubleVal VarSumMultiply2(FunctionContext* context,
const DoubleVal& d, int n, const IntVal* args) {
return VarSumMultiply(context, d, n, args);
@@ -152,6 +171,7 @@ extern "C" StringVal
_ZN6impala15StringFunctions5LowerEPN10impala_udf15FunctionContextERKNS1_9StringValE(
FunctionContext* context, const StringVal& str);
+IMPALA_UDF_EXPORT
StringVal ToLower(FunctionContext* context, const StringVal& str) {
// StringVal::null() doesn't inline its callee when compiled without optimization.
// Useful for testing cases such as IMPALA-4595.
@@ -168,10 +188,12 @@ extern "C" StringVal
typedef StringVal (*ToUpperFn)(FunctionContext* context, const StringVal& str);
+IMPALA_UDF_EXPORT
StringVal ToUpperWork(FunctionContext* context, const StringVal& str, ToUpperFn fn) {
return fn(context, str);
}
+IMPALA_UDF_EXPORT
StringVal ToUpper(FunctionContext* context, const StringVal& str) {
// StringVal::null() doesn't inline its callee when compiled without optimization.
// Useful for testing cases such as IMPALA-4595.
@@ -216,6 +238,7 @@ namespace impala {
// This function has the same signature as a built-in function (pow()) in Impalad.
// It has a weak linkage type so it can be overridden at linking when tested as IR UDF.
+IMPALA_UDF_EXPORT
DoubleVal WEAK_SYM impala::MathFunctions::Pow(FunctionContext* context,
const DoubleVal& base, const DoubleVal& exp) {
// Just references 'global_array' to stop the compiler from complaining.
@@ -225,12 +248,14 @@ DoubleVal WEAK_SYM impala::MathFunctions::Pow(FunctionContext* context,
return PrivateFn1(base, exp);
}
+IMPALA_UDF_EXPORT
BooleanVal TestError(FunctionContext* context) {
context->SetError("test UDF error");
context->SetError("this shouldn't show up");
return BooleanVal(false);
}
+IMPALA_UDF_EXPORT
BooleanVal TestWarnings(FunctionContext* context) {
context->AddWarning("test UDF warning 1");
context->AddWarning("test UDF warning 2");
@@ -238,17 +263,25 @@ BooleanVal TestWarnings(FunctionContext* context) {
}
// Dummy functions to test ddl.
+IMPALA_UDF_EXPORT
IntVal Fn(FunctionContext*) { return IntVal::null(); }
+IMPALA_UDF_EXPORT
IntVal Fn(FunctionContext*, const IntVal&) { return IntVal::null(); }
+IMPALA_UDF_EXPORT
IntVal Fn(FunctionContext*, const IntVal&, const StringVal&) { return IntVal::null(); }
+IMPALA_UDF_EXPORT
IntVal Fn(FunctionContext*, const StringVal&, const IntVal&) { return IntVal::null(); }
+IMPALA_UDF_EXPORT
IntVal Fn2(FunctionContext*, const IntVal&) { return IntVal::null(); }
+IMPALA_UDF_EXPORT
IntVal Fn2(FunctionContext*, const IntVal&, const StringVal&) { return IntVal::null(); }
+IMPALA_UDF_EXPORT
TimestampVal ConstantTimestamp(FunctionContext* context) {
return TimestampVal(2456575, 1); // 2013-10-09 00:00:00.000000001
}
+IMPALA_UDF_EXPORT
BooleanVal ValidateArgType(FunctionContext* context, const StringVal& dummy) {
if (context->GetArgType(0)->type != FunctionContext::TYPE_STRING) {
return BooleanVal(false);
@@ -259,6 +292,7 @@ BooleanVal ValidateArgType(FunctionContext* context, const StringVal& dummy) {
}
// Count UDF: counts the number of input rows per thread-local FunctionContext
+IMPALA_UDF_EXPORT
void CountPrepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::THREAD_LOCAL) {
uint64_t* state = reinterpret_cast<uint64_t*>(context->Allocate(sizeof(uint64_t)));
@@ -267,12 +301,14 @@ void CountPrepare(FunctionContext* context, FunctionContext::FunctionStateScope
}
}
+IMPALA_UDF_EXPORT
BigIntVal Count(FunctionContext* context) {
uint64_t* state = reinterpret_cast<uint64_t*>(
context->GetFunctionState(FunctionContext::THREAD_LOCAL));
return BigIntVal(++(*state));
}
+IMPALA_UDF_EXPORT
void CountClose(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::THREAD_LOCAL) {
void* state = context->GetFunctionState(scope);
@@ -282,6 +318,7 @@ void CountClose(FunctionContext* context, FunctionContext::FunctionStateScope sc
}
// ConstantArg UDF: returns the first argument if it's constant, otherwise returns NULL.
+IMPALA_UDF_EXPORT
void ConstantArgPrepare(
FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::THREAD_LOCAL) {
@@ -295,12 +332,14 @@ void ConstantArgPrepare(
}
}
+IMPALA_UDF_EXPORT
IntVal ConstantArg(FunctionContext* context, const IntVal& const_val) {
IntVal* state = reinterpret_cast<IntVal*>(
context->GetFunctionState(FunctionContext::THREAD_LOCAL));
return *state;
}
+IMPALA_UDF_EXPORT
void ConstantArgClose(
FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::THREAD_LOCAL) {
@@ -312,6 +351,7 @@ void ConstantArgClose(
// ValidateOpen UDF: returns true if the UDF was opened, false otherwise. Can also be
// used to validate close since it will leak if it's not closed.
+IMPALA_UDF_EXPORT
void ValidateOpenPrepare(
FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::THREAD_LOCAL) {
@@ -320,11 +360,13 @@ void ValidateOpenPrepare(
}
}
+IMPALA_UDF_EXPORT
BooleanVal ValidateOpen(FunctionContext* context, const IntVal& dummy) {
void* state = context->GetFunctionState(FunctionContext::THREAD_LOCAL);
return BooleanVal(state != NULL);
}
+IMPALA_UDF_EXPORT
void ValidateOpenClose(
FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::THREAD_LOCAL) {
@@ -335,6 +377,7 @@ void ValidateOpenClose(
}
// This prepare function always fails to make sure clean up is done afterwards.
+IMPALA_UDF_EXPORT
void BadExprPrepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::FRAGMENT_LOCAL) {
int32_t* state = reinterpret_cast<int32_t*>(context->Allocate(sizeof(int32_t)));
@@ -347,6 +390,7 @@ void BadExprPrepare(FunctionContext* context, FunctionContext::FunctionStateScop
// This prepare function always fails for cloned evaluators to exercise IMPALA-6184.
// It does so by detecting whether the caller is a cloned evaluator and inserts an error
// in FunctionContext if that's the case.
+IMPALA_UDF_EXPORT
void BadExpr2Prepare(FunctionContext* context,
FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::FRAGMENT_LOCAL) {
@@ -363,6 +407,7 @@ void BadExpr2Prepare(FunctionContext* context,
}
// Used by both BadExprPrepare() and BadExpr2Prepare() above.
+IMPALA_UDF_EXPORT
BooleanVal BadExpr(FunctionContext* context, const DoubleVal& slot) {
static int32_t count = 0;
if (slot.is_null) return BooleanVal(false);
@@ -374,6 +419,7 @@ BooleanVal BadExpr(FunctionContext* context, const DoubleVal& slot) {
}
// Used by both BadExprPrepare() and BadExpr2Prepare() above.
+IMPALA_UDF_EXPORT
void BadExprClose(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::FRAGMENT_LOCAL) {
int32_t* state = reinterpret_cast<int32_t*>(context->GetFunctionState(scope));
@@ -384,6 +430,7 @@ void BadExprClose(FunctionContext* context, FunctionContext::FunctionStateScope
}
// MemTest UDF: "Allocates" the specified number of bytes per call.
+IMPALA_UDF_EXPORT
void MemTestPrepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::THREAD_LOCAL) {
int64_t* total =
@@ -393,6 +440,7 @@ void MemTestPrepare(FunctionContext* context, FunctionContext::FunctionStateScop
}
}
+IMPALA_UDF_EXPORT
BigIntVal MemTest(FunctionContext* context, const BigIntVal& bytes) {
int64_t* total = reinterpret_cast<int64_t*>(
context->GetFunctionState(FunctionContext::THREAD_LOCAL));
@@ -401,6 +449,7 @@ BigIntVal MemTest(FunctionContext* context, const BigIntVal& bytes) {
return bytes;
}
+IMPALA_UDF_EXPORT
void MemTestClose(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::THREAD_LOCAL) {
int64_t* total = reinterpret_cast<int64_t*>(
@@ -413,6 +462,7 @@ void MemTestClose(FunctionContext* context, FunctionContext::FunctionStateScope
}
}
+IMPALA_UDF_EXPORT
BigIntVal DoubleFreeTest(FunctionContext* context, BigIntVal bytes) {
context->TrackAllocation(bytes.val);
context->Free(bytes.val);
@@ -420,38 +470,51 @@ BigIntVal DoubleFreeTest(FunctionContext* context, BigIntVal bytes) {
return bytes;
}
-extern "C" BigIntVal UnmangledSymbol(FunctionContext* context) {
+extern "C"
+IMPALA_UDF_EXPORT
+BigIntVal UnmangledSymbol(FunctionContext* context) {
+ return BigIntVal(5);
+}
+
+// Test that unexported symbols can't be found.
+BigIntVal UnexportedSymbol(FunctionContext* context) {
return BigIntVal(5);
}
// Functions to test interpreted path
+IMPALA_UDF_EXPORT
IntVal FourArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
const IntVal& v3, const IntVal& v4) {
return IntVal(v1.val + v2.val + v3.val + v4.val);
}
+IMPALA_UDF_EXPORT
IntVal FiveArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
const IntVal& v3, const IntVal& v4, const IntVal& v5) {
return IntVal(v1.val + v2.val + v3.val + v4.val + v5.val);
}
+IMPALA_UDF_EXPORT
IntVal SixArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
const IntVal& v3, const IntVal& v4, const IntVal& v5, const IntVal& v6) {
return IntVal(v1.val + v2.val + v3.val + v4.val + v5.val + v6.val);
}
+IMPALA_UDF_EXPORT
IntVal SevenArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
const IntVal& v3, const IntVal& v4, const IntVal& v5, const IntVal& v6,
const IntVal& v7) {
return IntVal(v1.val + v2.val + v3.val + v4.val + v5.val + v6.val + v7.val);
}
+IMPALA_UDF_EXPORT
IntVal EightArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
const IntVal& v3, const IntVal& v4, const IntVal& v5, const IntVal& v6,
const IntVal& v7, const IntVal& v8) {
return IntVal(v1.val + v2.val + v3.val + v4.val + v5.val + v6.val + v7.val + v8.val);
}
+IMPALA_UDF_EXPORT
IntVal NineArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
const IntVal& v3, const IntVal& v4, const IntVal& v5, const IntVal& v6,
const IntVal& v7, const IntVal& v8, const IntVal& v9) {
@@ -459,6 +522,7 @@ IntVal NineArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
v9.val);
}
+IMPALA_UDF_EXPORT
IntVal TwentyArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
const IntVal& v3, const IntVal& v4, const IntVal& v5, const IntVal& v6,
const IntVal& v7, const IntVal& v8, const IntVal& v9, const IntVal& v10,
@@ -470,6 +534,7 @@ IntVal TwentyArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
v17.val + v18.val + v19.val + v20.val);
}
+IMPALA_UDF_EXPORT
IntVal TwentyOneArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
const IntVal& v3, const IntVal& v4, const IntVal& v5, const IntVal& v6,
const IntVal& v7, const IntVal& v8, const IntVal& v9, const IntVal& v10,
diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h
index 62a1576..0774d48 100644
--- a/be/src/udf/udf.h
+++ b/be/src/udf/udf.h
@@ -34,6 +34,12 @@
#define NOEXCEPT
#endif
+// Macro to prepend to function definitions that will export the symbols to be visible
+// for loading by Impala. It is recommended that UDFs be built with the compiler flags
+// "-fvisibility=hidden -fvisibility-inlines-hidden" and only functions that are entry
+// points for UDFs be exported with this macro.
+#define IMPALA_UDF_EXPORT __attribute__ ((visibility ("default")))
+
/// This is the only Impala header required to develop UDFs and UDAs. This header
/// contains the types that need to be used and the FunctionContext object. The context
/// object serves as the interface object between the UDF/UDA and the impala process.
diff --git a/be/src/udf_samples/CMakeLists.txt b/be/src/udf_samples/CMakeLists.txt
index b07b9d7..e4aaf8d 100644
--- a/be/src/udf_samples/CMakeLists.txt
+++ b/be/src/udf_samples/CMakeLists.txt
@@ -20,11 +20,16 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/udf_samples")
# where to put generated binaries
set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/udf_samples")
+set(HIDE_SYMBOLS "-fvisibility=hidden -fvisibility-inlines-hidden")
+set(HIDE_SYMBOLS_ARGS "${HIDE_SYMBOLS}")
+separate_arguments(HIDE_SYMBOLS_ARGS)
+
# Function to generate rule to cross compile a source file to an IR module.
# This should be called with the .cc src file and it will generate a
# src-file-ir target that can be built.
# e.g. COMPILE_TO_IR(test.cc) generates the "test-ir" make target.
-set(IR_COMPILE_FLAGS "-emit-llvm" "-O3" "-std=c++14" "-c" "-I../" ${CLANG_BASE_FLAGS})
+set(IR_COMPILE_FLAGS "-emit-llvm" "-O3" "-std=c++14" "-c" "-I../" ${HIDE_SYMBOLS_ARGS})
+set(IR_COMPILE_FLAGS ${IR_COMPILE_FLAGS} ${CLANG_BASE_FLAGS})
function(COMPILE_TO_IR SRC_FILE)
get_filename_component(BASE_NAME ${SRC_FILE} NAME_WE)
set(OUTPUT_FILE "${LIBRARY_OUTPUT_PATH}/${BASE_NAME}.ll")
@@ -37,8 +42,10 @@ endfunction(COMPILE_TO_IR)
# Build the UDA/UDFs into a shared library.
add_library(udfsample SHARED udf-sample.cc)
+set_target_properties(udfsample PROPERTIES COMPILE_FLAGS "${HIDE_SYMBOLS}")
add_dependencies(udfsample gen-deps)
add_library(udasample SHARED uda-sample.cc hyperloglog-uda.cc)
+set_target_properties(udasample PROPERTIES COMPILE_FLAGS "${HIDE_SYMBOLS}")
add_dependencies(udasample gen-deps)
# Custom targest to cross compile UDA/UDF to ir
diff --git a/be/src/udf_samples/hyperloglog-uda.cc b/be/src/udf_samples/hyperloglog-uda.cc
index d6db712..5f00d88 100644
--- a/be/src/udf_samples/hyperloglog-uda.cc
+++ b/be/src/udf_samples/hyperloglog-uda.cc
@@ -37,6 +37,7 @@ using namespace impala_udf;
// Precision taken from the paper. Doesn't seem to matter very much when between [6,12]
const int HLL_PRECISION = 10;
+IMPALA_UDF_EXPORT
void HllInit(FunctionContext* ctx, StringVal* dst) {
int str_len = pow(2, HLL_PRECISION);
uint8_t* ptr = ctx->Allocate(str_len);
@@ -66,6 +67,7 @@ static uint64_t Hash(const IntVal& v) {
return FnvHash(&v.val, sizeof(int32_t), FNV64_SEED);
}
+IMPALA_UDF_EXPORT
void HllUpdate(FunctionContext* ctx, const IntVal& src, StringVal* dst) {
if (src.is_null) return;
assert(dst != NULL);
@@ -82,6 +84,7 @@ void HllUpdate(FunctionContext* ctx, const IntVal& src, StringVal* dst) {
dst->ptr[idx] = ::max(dst->ptr[idx], first_one_bit);
}
+IMPALA_UDF_EXPORT
void HllMerge(FunctionContext* ctx, const StringVal& src, StringVal* dst) {
assert(dst != NULL);
assert(!dst->is_null);
@@ -93,6 +96,7 @@ void HllMerge(FunctionContext* ctx, const StringVal& src, StringVal* dst) {
}
}
+IMPALA_UDF_EXPORT
StringVal HllSerialize(FunctionContext* ctx, const StringVal& src) {
if (src.is_null) return src;
StringVal result(ctx, src.len);
@@ -101,6 +105,7 @@ StringVal HllSerialize(FunctionContext* ctx, const StringVal& src) {
return result;
}
+IMPALA_UDF_EXPORT
StringVal HllFinalize(FunctionContext* ctx, const StringVal& src) {
assert(!src.is_null);
assert(src.len == pow(2, HLL_PRECISION));
diff --git a/be/src/udf_samples/uda-sample.cc b/be/src/udf_samples/uda-sample.cc
index e8ff4fa..7dfddbe 100644
--- a/be/src/udf_samples/uda-sample.cc
+++ b/be/src/udf_samples/uda-sample.cc
@@ -23,20 +23,24 @@ using namespace impala_udf;
// ---------------------------------------------------------------------------
// This is a sample of implementing a COUNT aggregate function.
// ---------------------------------------------------------------------------
+IMPALA_UDF_EXPORT
void CountInit(FunctionContext* context, BigIntVal* val) {
val->is_null = false;
val->val = 0;
}
+IMPALA_UDF_EXPORT
void CountUpdate(FunctionContext* context, const IntVal& input, BigIntVal* val) {
if (input.is_null) return;
++val->val;
}
+IMPALA_UDF_EXPORT
void CountMerge(FunctionContext* context, const BigIntVal& src, BigIntVal* dst) {
dst->val += src.val;
}
+IMPALA_UDF_EXPORT
BigIntVal CountFinalize(FunctionContext* context, const BigIntVal& val) {
return val;
}
@@ -49,11 +53,13 @@ struct AvgStruct {
int64_t count;
};
+IMPALA_UDF_EXPORT
void AvgInit(FunctionContext* context, BufferVal* val) {
static_assert(sizeof(AvgStruct) == 16, "AvgStruct is an unexpected size");
memset(*val, 0, sizeof(AvgStruct));
}
+IMPALA_UDF_EXPORT
void AvgUpdate(FunctionContext* context, const DoubleVal& input, BufferVal* val) {
if (input.is_null) return;
AvgStruct* avg = reinterpret_cast<AvgStruct*>(*val);
@@ -61,6 +67,7 @@ void AvgUpdate(FunctionContext* context, const DoubleVal& input, BufferVal* val)
++avg->count;
}
+IMPALA_UDF_EXPORT
void AvgMerge(FunctionContext* context, const BufferVal& src, BufferVal* dst) {
if (src == NULL) return;
const AvgStruct* src_struct = reinterpret_cast<const AvgStruct*>(src);
@@ -69,6 +76,7 @@ void AvgMerge(FunctionContext* context, const BufferVal& src, BufferVal* dst) {
dst_struct->count += src_struct->count;
}
+IMPALA_UDF_EXPORT
DoubleVal AvgFinalize(FunctionContext* context, const BufferVal& val) {
if (val == NULL) return DoubleVal::null();
AvgStruct* val_struct = reinterpret_cast<AvgStruct*>(val);
@@ -79,10 +87,12 @@ DoubleVal AvgFinalize(FunctionContext* context, const BufferVal& val) {
// This is a sample of implementing the STRING_CONCAT aggregate function.
// Example: select string_concat(string_col, ",") from table
// ---------------------------------------------------------------------------
+IMPALA_UDF_EXPORT
void StringConcatInit(FunctionContext* context, StringVal* val) {
val->is_null = true;
}
+IMPALA_UDF_EXPORT
void StringConcatUpdate(FunctionContext* context, const StringVal& arg1,
const StringVal& arg2, StringVal* val) {
if (val->is_null) {
@@ -100,11 +110,13 @@ void StringConcatUpdate(FunctionContext* context, const StringVal& arg1,
}
}
+IMPALA_UDF_EXPORT
void StringConcatMerge(FunctionContext* context, const StringVal& src, StringVal* dst) {
if (src.is_null) return;
StringConcatUpdate(context, src, ",", dst);
}
+IMPALA_UDF_EXPORT
StringVal StringConcatFinalize(FunctionContext* context, const StringVal& val) {
return val;
}
@@ -115,11 +127,13 @@ StringVal StringConcatFinalize(FunctionContext* context, const StringVal& val) {
// It is different than the builtin sum since it can easily overflow but can
// be faster for small tables.
// ---------------------------------------------------------------------------
+IMPALA_UDF_EXPORT
void SumSmallDecimalInit(FunctionContext*, DecimalVal* val) {
val->is_null = true;
val->val4 = 0;
}
+IMPALA_UDF_EXPORT
void SumSmallDecimalUpdate(FunctionContext* ctx,
const DecimalVal& src, DecimalVal* dst) {
assert(ctx->GetArgType(0)->scale == 2);
@@ -129,6 +143,7 @@ void SumSmallDecimalUpdate(FunctionContext* ctx,
dst->val4 += src.val4;
}
+IMPALA_UDF_EXPORT
void SumSmallDecimalMerge(FunctionContext*, const DecimalVal& src, DecimalVal* dst) {
if (src.is_null) return;
dst->is_null = false;
diff --git a/be/src/udf_samples/udf-sample.cc b/be/src/udf_samples/udf-sample.cc
index 3aad2f7..0aca164 100644
--- a/be/src/udf_samples/udf-sample.cc
+++ b/be/src/udf_samples/udf-sample.cc
@@ -18,6 +18,7 @@
#include "udf-sample.h"
// In this sample we are declaring a UDF that adds two ints and returns an int.
+IMPALA_UDF_EXPORT
IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2) {
if (arg1.is_null || arg2.is_null) return IntVal::null();
return IntVal(arg1.val + arg2.val);
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index 757c171..ab62df3 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -432,6 +432,25 @@ class TestUdfTargeted(TestUdfBase):
assert "Unable to find class" in str(ex)
self.client.execute(drop_fn_stmt)
+ def test_hidden_symbol(self, vector, unique_database):
+ """Test that symbols in the test UDFs are hidden by default and that therefore
+ they cannot be used as a UDF entry point."""
+ symbol = "_Z16UnexportedSymbolPN10impala_udf15FunctionContextE"
+ ex = self.execute_query_expect_failure(self.client, """
+ create function `{0}`.unexported() returns BIGINT LOCATION '{1}'
+ SYMBOL='{2}'""".format(
+ unique_database, get_fs_path('/test-warehouse/libTestUdfs.so'), symbol))
+ assert "Could not find symbol '{0}'".format(symbol) in str(ex), str(ex)
+ # IMPALA-8196: IR UDFs ignore whether symbol is hidden or not. Exercise the current
+ # behaviour, where the UDF can be created and executed.
+ result = self.execute_query_expect_success(self.client, """
+ create function `{0}`.unexported() returns BIGINT LOCATION '{1}'
+ SYMBOL='{2}'""".format(
+ unique_database, get_fs_path('/test-warehouse/test-udfs.ll'), symbol))
+ result = self.execute_query_expect_success(self.client,
+ "select `{0}`.unexported()".format(unique_database))
+ assert result.data[0][0] == '5'
+
@SkipIfLocal.multiple_impalad
def test_hive_udfs_missing_jar(self, vector, unique_database):
""" IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present