You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2022/02/02 09:12:45 UTC

[impala] 02/04: IMPALA-11056: Create option to fail query on Java UDF exceptions

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 504e0d00121bb11b54bbd754d52c1f3f66d29d20
Author: Steve Carlin <sc...@cloudera.com>
AuthorDate: Wed Dec 8 11:37:53 2021 -0800

    IMPALA-11056: Create option to fail query on Java UDF exceptions
    
    This commit will create a new query option,
    "abort_java_udf_on_exception".  The current and default behavior
    is that when the Java UDF throws an exception, a warning is logged
    and the function returns NULL. If the query option is set to
    true, the query will fail.
    
    Change-Id: Ifece20cf16a6575f1c498238f754440e870e2ce9
    Reviewed-on: http://gerrit.cloudera.org:8080/18080
    Reviewed-by: Kurt Deschler <kd...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Aman Sinha <am...@cloudera.com>
---
 be/src/exprs/hive-udf-call-ir.cc                                  | 8 ++++++--
 be/src/exprs/hive-udf-call.cc                                     | 8 ++++++--
 be/src/runtime/runtime-state.h                                    | 3 +++
 be/src/service/query-options.cc                                   | 4 ++++
 be/src/service/query-options.h                                    | 4 +++-
 common/thrift/ImpalaService.thrift                                | 4 ++++
 common/thrift/Query.thrift                                        | 4 ++++
 .../workloads/functional-query/queries/QueryTest/java-udf.test    | 6 ++++++
 8 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/be/src/exprs/hive-udf-call-ir.cc b/be/src/exprs/hive-udf-call-ir.cc
index b4e88b7..99f1a2a 100644
--- a/be/src/exprs/hive-udf-call-ir.cc
+++ b/be/src/exprs/hive-udf-call-ir.cc
@@ -64,8 +64,12 @@ AnyVal* HiveUdfCall::CallJavaAndStoreResult(const ColumnType* type,
       std::stringstream ss;
       ss << "Hive UDF path=" << jni_ctx->hdfs_location << " class="
           << jni_ctx->scalar_fn_symbol << " failed due to: " << status.GetDetail();
-      fn_ctx->AddWarning(ss.str().c_str());
-      jni_ctx->warning_logged = true;
+      if (fn_ctx->impl()->state()->abort_java_udf_on_exception()) {
+        fn_ctx->SetError(ss.str().c_str());
+      } else {
+        fn_ctx->AddWarning(ss.str().c_str());
+        jni_ctx->warning_logged = true;
+      }
     }
     jni_ctx->output_anyval->is_null = true;
     return jni_ctx->output_anyval;
diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc
index a200d37..b5e21c8 100644
--- a/be/src/exprs/hive-udf-call.cc
+++ b/be/src/exprs/hive-udf-call.cc
@@ -115,8 +115,12 @@ AnyVal* HiveUdfCall::Evaluate(ScalarExprEvaluator* eval, const TupleRow* row) co
       stringstream ss;
       ss << "Hive UDF path=" << fn_.hdfs_location << " class=" << fn_.scalar_fn.symbol
         << " failed due to: " << status.GetDetail();
-      fn_ctx->AddWarning(ss.str().c_str());
-      jni_ctx->warning_logged = true;
+      if (fn_ctx->impl()->state()->abort_java_udf_on_exception()) {
+        fn_ctx->SetError(ss.str().c_str());
+      } else {
+        fn_ctx->AddWarning(ss.str().c_str());
+        jni_ctx->warning_logged = true;
+      }
     }
     jni_ctx->output_anyval->is_null = true;
     return jni_ctx->output_anyval;
diff --git a/be/src/runtime/runtime-state.h b/be/src/runtime/runtime-state.h
index 948fb52..7d2d57b 100644
--- a/be/src/runtime/runtime-state.h
+++ b/be/src/runtime/runtime-state.h
@@ -104,6 +104,9 @@ class RuntimeState {
   bool strict_mode() const { return query_options().strict_mode; }
   bool utf8_mode() const { return query_options().utf8_mode; }
   bool decimal_v2() const { return query_options().decimal_v2; }
+  bool abort_java_udf_on_exception() const {
+     return query_options().abort_java_udf_on_exception;
+  }
   const TQueryCtx& query_ctx() const;
   const TPlanFragment& fragment() const { return *fragment_; }
   const TPlanFragmentInstanceCtx& instance_ctx() const { return *instance_ctx_; }
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 3dab0d8..cc06645 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -1152,6 +1152,10 @@ Status impala::SetQueryOption(const string& key, const string& value,
         query_options->__set_enable_async_load_data_execution(IsTrue(value));
         break;
       }
+      case TImpalaQueryOptions::ABORT_JAVA_UDF_ON_EXCEPTION: {
+        query_options->__set_abort_java_udf_on_exception(IsTrue(value));
+        break;
+      }
       default:
         if (IsRemovedQueryOption(key)) {
           LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 14bd3ee..4c471c3 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -47,7 +47,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
 // time we add or remove a query option to/from the enum TImpalaQueryOptions.
 #define QUERY_OPTS_TABLE\
   DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
-      TImpalaQueryOptions::PARQUET_DICTIONARY_RUNTIME_FILTER_ENTRY_LIMIT+ 1);\
+      TImpalaQueryOptions::ABORT_JAVA_UDF_ON_EXCEPTION + 1);\
   REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
   QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
   REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
@@ -269,6 +269,8 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
       PARQUET_LATE_MATERIALIZATION_THRESHOLD, TQueryOptionLevel::ADVANCED)\
   QUERY_OPT_FN(parquet_dictionary_runtime_filter_entry_limit,\
       PARQUET_DICTIONARY_RUNTIME_FILTER_ENTRY_LIMIT, TQueryOptionLevel::ADVANCED)\
+  QUERY_OPT_FN(abort_java_udf_on_exception,\
+      ABORT_JAVA_UDF_ON_EXCEPTION, TQueryOptionLevel::ADVANCED)\
   ;
 
 /// Enforce practical limits on some query options to avoid undesired query state.
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index d16edc2..e2a0705 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -714,6 +714,10 @@ enum TImpalaQueryOptions {
   // enable runtime filtering on the row group. For example, 2 means that runtime filter
   // will be evaluated when the dictionary size is smaller or equal to 2.
   PARQUET_DICTIONARY_RUNTIME_FILTER_ENTRY_LIMIT = 139;
+
+  // Abort the Java UDF if an exception is thrown. Default is that only a
+  // warning will be logged if the Java UDF throws an exception.
+  ABORT_JAVA_UDF_ON_EXCEPTION = 140;
 }
 
 // The summary of a DML statement.
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index fa2f187..0313b85 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -567,6 +567,10 @@ struct TQueryOptions {
   // enable runtime filtering on the row group. For example, 2 means that runtime filter
   // will be evaluated when the dictionary size is smaller or equal to 2.
   140: optional i32 parquet_dictionary_runtime_filter_entry_limit = 1024;
+
+  // Abort the Java UDF if an exception is thrown. Default is that only a
+  // warning will be logged if the Java UDF throws an exception.
+  141: optional bool abort_java_udf_on_exception = false;
 }
 
 // Impala currently has three types of sessions: Beeswax, HiveServer2 and external
diff --git a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
index 152ba79..82a7fe9 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
@@ -104,6 +104,12 @@ boolean
 NULL
 ====
 ---- QUERY
+set abort_java_udf_on_exception=true;
+select throws_exception() from functional.alltypestiny;
+---- CATCH
+Test exception
+====
+---- QUERY
 select throws_exception() from functional.alltypestiny;
 ---- TYPES
 boolean