You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2022/02/02 09:12:45 UTC
[impala] 02/04: IMPALA-11056: Create option to fail query on Java UDF exceptions
This is an automated email from the ASF dual-hosted git repository.
boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 504e0d00121bb11b54bbd754d52c1f3f66d29d20
Author: Steve Carlin <sc...@cloudera.com>
AuthorDate: Wed Dec 8 11:37:53 2021 -0800
IMPALA-11056: Create option to fail query on Java UDF exceptions
This commit will create a new query option,
"abort_java_udf_on_exception". The current and default behavior
is that when the Java UDF throws an exception, a warning is logged
and the function returns NULL. If the query option is set to
true, the query will fail.
Change-Id: Ifece20cf16a6575f1c498238f754440e870e2ce9
Reviewed-on: http://gerrit.cloudera.org:8080/18080
Reviewed-by: Kurt Deschler <kd...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Aman Sinha <am...@cloudera.com>
---
be/src/exprs/hive-udf-call-ir.cc | 8 ++++++--
be/src/exprs/hive-udf-call.cc | 8 ++++++--
be/src/runtime/runtime-state.h | 3 +++
be/src/service/query-options.cc | 4 ++++
be/src/service/query-options.h | 4 +++-
common/thrift/ImpalaService.thrift | 4 ++++
common/thrift/Query.thrift | 4 ++++
.../workloads/functional-query/queries/QueryTest/java-udf.test | 6 ++++++
8 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/be/src/exprs/hive-udf-call-ir.cc b/be/src/exprs/hive-udf-call-ir.cc
index b4e88b7..99f1a2a 100644
--- a/be/src/exprs/hive-udf-call-ir.cc
+++ b/be/src/exprs/hive-udf-call-ir.cc
@@ -64,8 +64,12 @@ AnyVal* HiveUdfCall::CallJavaAndStoreResult(const ColumnType* type,
std::stringstream ss;
ss << "Hive UDF path=" << jni_ctx->hdfs_location << " class="
<< jni_ctx->scalar_fn_symbol << " failed due to: " << status.GetDetail();
- fn_ctx->AddWarning(ss.str().c_str());
- jni_ctx->warning_logged = true;
+ if (fn_ctx->impl()->state()->abort_java_udf_on_exception()) {
+ fn_ctx->SetError(ss.str().c_str());
+ } else {
+ fn_ctx->AddWarning(ss.str().c_str());
+ jni_ctx->warning_logged = true;
+ }
}
jni_ctx->output_anyval->is_null = true;
return jni_ctx->output_anyval;
diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc
index a200d37..b5e21c8 100644
--- a/be/src/exprs/hive-udf-call.cc
+++ b/be/src/exprs/hive-udf-call.cc
@@ -115,8 +115,12 @@ AnyVal* HiveUdfCall::Evaluate(ScalarExprEvaluator* eval, const TupleRow* row) co
stringstream ss;
ss << "Hive UDF path=" << fn_.hdfs_location << " class=" << fn_.scalar_fn.symbol
<< " failed due to: " << status.GetDetail();
- fn_ctx->AddWarning(ss.str().c_str());
- jni_ctx->warning_logged = true;
+ if (fn_ctx->impl()->state()->abort_java_udf_on_exception()) {
+ fn_ctx->SetError(ss.str().c_str());
+ } else {
+ fn_ctx->AddWarning(ss.str().c_str());
+ jni_ctx->warning_logged = true;
+ }
}
jni_ctx->output_anyval->is_null = true;
return jni_ctx->output_anyval;
diff --git a/be/src/runtime/runtime-state.h b/be/src/runtime/runtime-state.h
index 948fb52..7d2d57b 100644
--- a/be/src/runtime/runtime-state.h
+++ b/be/src/runtime/runtime-state.h
@@ -104,6 +104,9 @@ class RuntimeState {
bool strict_mode() const { return query_options().strict_mode; }
bool utf8_mode() const { return query_options().utf8_mode; }
bool decimal_v2() const { return query_options().decimal_v2; }
+ bool abort_java_udf_on_exception() const {
+ return query_options().abort_java_udf_on_exception;
+ }
const TQueryCtx& query_ctx() const;
const TPlanFragment& fragment() const { return *fragment_; }
const TPlanFragmentInstanceCtx& instance_ctx() const { return *instance_ctx_; }
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 3dab0d8..cc06645 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -1152,6 +1152,10 @@ Status impala::SetQueryOption(const string& key, const string& value,
query_options->__set_enable_async_load_data_execution(IsTrue(value));
break;
}
+ case TImpalaQueryOptions::ABORT_JAVA_UDF_ON_EXCEPTION: {
+ query_options->__set_abort_java_udf_on_exception(IsTrue(value));
+ break;
+ }
default:
if (IsRemovedQueryOption(key)) {
LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 14bd3ee..4c471c3 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -47,7 +47,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
// time we add or remove a query option to/from the enum TImpalaQueryOptions.
#define QUERY_OPTS_TABLE\
DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
- TImpalaQueryOptions::PARQUET_DICTIONARY_RUNTIME_FILTER_ENTRY_LIMIT+ 1);\
+ TImpalaQueryOptions::ABORT_JAVA_UDF_ON_EXCEPTION + 1);\
REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
@@ -269,6 +269,8 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
PARQUET_LATE_MATERIALIZATION_THRESHOLD, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(parquet_dictionary_runtime_filter_entry_limit,\
PARQUET_DICTIONARY_RUNTIME_FILTER_ENTRY_LIMIT, TQueryOptionLevel::ADVANCED)\
+ QUERY_OPT_FN(abort_java_udf_on_exception,\
+ ABORT_JAVA_UDF_ON_EXCEPTION, TQueryOptionLevel::ADVANCED)\
;
/// Enforce practical limits on some query options to avoid undesired query state.
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index d16edc2..e2a0705 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -714,6 +714,10 @@ enum TImpalaQueryOptions {
// enable runtime filtering on the row group. For example, 2 means that runtime filter
// will be evaluated when the dictionary size is smaller or equal to 2.
PARQUET_DICTIONARY_RUNTIME_FILTER_ENTRY_LIMIT = 139;
+
+ // Abort the Java UDF if an exception is thrown. Default is that only a
+ // warning will be logged if the Java UDF throws an exception.
+ ABORT_JAVA_UDF_ON_EXCEPTION = 140;
}
// The summary of a DML statement.
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index fa2f187..0313b85 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -567,6 +567,10 @@ struct TQueryOptions {
// enable runtime filtering on the row group. For example, 2 means that runtime filter
// will be evaluated when the dictionary size is smaller or equal to 2.
140: optional i32 parquet_dictionary_runtime_filter_entry_limit = 1024;
+
+ // Abort the Java UDF if an exception is thrown. Default is that only a
+ // warning will be logged if the Java UDF throws an exception.
+ 141: optional bool abort_java_udf_on_exception = false;
}
// Impala currently has three types of sessions: Beeswax, HiveServer2 and external
diff --git a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
index 152ba79..82a7fe9 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
@@ -104,6 +104,12 @@ boolean
NULL
====
---- QUERY
+set abort_java_udf_on_exception=true;
+select throws_exception() from functional.alltypestiny;
+---- CATCH
+Test exception
+====
+---- QUERY
select throws_exception() from functional.alltypestiny;
---- TYPES
boolean