You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/10/28 12:56:13 UTC
[spark] branch master updated: [SPARK-37136][SQL] Remove code about
hive buildin function but not implement in spark
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 7ed7afe [SPARK-37136][SQL] Remove code about hive buildin function but not implement in spark
7ed7afe is described below
commit 7ed7afecd44d546391794d2ad2b2e7ea7a8accf7
Author: Angerszhuuuu <an...@gmail.com>
AuthorDate: Thu Oct 28 20:55:24 2021 +0800
[SPARK-37136][SQL] Remove code about hive buildin function but not implement in spark
### What changes were proposed in this pull request?
Since we have implement `histogram_numeric` in spark, now we can remove code about check function we can pass to hive.
### Why are the changes needed?
Remove unused code
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
existed UT
Closes #34410 from AngersZhuuuu/SPARK-37136.
Authored-by: Angerszhuuuu <an...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../sql/catalyst/catalog/SessionCatalog.scala | 6 +-
.../apache/spark/sql/hive/HiveSessionCatalog.scala | 87 ----------------------
2 files changed, 1 insertion(+), 92 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 141de75..12a5cbc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1507,16 +1507,12 @@ class SessionCatalog(
* Returns whether it is a temporary function. If not existed, returns false.
*/
def isTemporaryFunction(name: FunctionIdentifier): Boolean = {
- // copied from HiveSessionCatalog
- val hiveFunctions = Seq()
-
// A temporary function is a function that has been registered in functionRegistry
// without a database name, and is neither a built-in function nor a Hive function
name.database.isEmpty &&
(functionRegistry.functionExists(name) || tableFunctionRegistry.functionExists(name)) &&
!FunctionRegistry.builtin.functionExists(name) &&
- !TableFunctionRegistry.builtin.functionExists(name) &&
- !hiveFunctions.contains(name.funcName.toLowerCase(Locale.ROOT))
+ !TableFunctionRegistry.builtin.functionExists(name)
}
def isTempFunction(name: String): Boolean = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index b11774b..1cc0314 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -17,20 +17,11 @@
package org.apache.spark.sql.hive
-import java.util.Locale
-
-import scala.util.{Failure, Success, Try}
-import scala.util.control.NonFatal
-
import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hive.ql.exec.{FunctionRegistry => HiveFunctionRegistry}
-import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry}
import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.types.{DecimalType, DoubleType}
private[sql] class HiveSessionCatalog(
externalCatalogBuilder: () => ExternalCatalog,
@@ -51,82 +42,4 @@ private[sql] class HiveSessionCatalog(
parser,
functionResourceLoader,
functionExpressionBuilder) {
-
- override def lookupFunction(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
- try {
- lookupFunction0(name, children)
- } catch {
- case NonFatal(_) if children.exists(_.dataType.isInstanceOf[DecimalType]) =>
- // SPARK-16228 ExternalCatalog may recognize `double`-type only.
- val newChildren = children.map { child =>
- if (child.dataType.isInstanceOf[DecimalType]) Cast(child, DoubleType) else child
- }
- lookupFunction0(name, newChildren)
- }
- }
-
- private def lookupFunction0(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
- val database = name.database.map(formatDatabaseName)
- val funcName = name.copy(database = database)
- Try(super.lookupFunction(funcName, children)) match {
- case Success(expr) => expr
- case Failure(error) =>
- if (super.functionExists(name)) {
- // If the function exists (either in functionRegistry or externalCatalog),
- // it means that there is an error when we create the Expression using the given children.
- // We need to throw the original exception.
- throw error
- } else {
- // This function does not exist (neither in functionRegistry or externalCatalog),
- // let's try to load it as a Hive's built-in function.
- // Hive is case insensitive.
- val functionName = funcName.unquotedString.toLowerCase(Locale.ROOT)
- if (!hiveFunctions.contains(functionName)) {
- failFunctionLookup(funcName, Some(error))
- }
-
- // TODO: Remove this fallback path once we implement the list of fallback functions
- // defined below in hiveFunctions.
- val functionInfo = {
- try {
- Option(HiveFunctionRegistry.getFunctionInfo(functionName)).getOrElse(
- failFunctionLookup(funcName, Some(error)))
- } catch {
- // If HiveFunctionRegistry.getFunctionInfo throws an exception,
- // we are failing to load a Hive builtin function, which means that
- // the given function is not a Hive builtin function.
- case NonFatal(e) => failFunctionLookup(funcName, Some(e))
- }
- }
- val className = functionInfo.getFunctionClass.getName
- val functionIdentifier =
- FunctionIdentifier(functionName.toLowerCase(Locale.ROOT), database)
- val func = CatalogFunction(functionIdentifier, className, Nil)
- // Put this Hive built-in function to our function registry.
- registerFunction(func, overrideIfExists = false)
- // Now, we need to create the Expression.
- functionRegistry.lookupFunction(functionIdentifier, children)
- }
- }
- }
-
- // TODO Removes this method after implementing Spark native "histogram_numeric".
- override def functionExists(name: FunctionIdentifier): Boolean = {
- super.functionExists(name) || hiveFunctions.contains(name.funcName)
- }
-
- override def isPersistentFunction(name: FunctionIdentifier): Boolean = {
- super.isPersistentFunction(name) || hiveFunctions.contains(name.funcName)
- }
-
- /** List of functions we pass over to Hive. Note that over time this list should go to 0. */
- // We have a list of Hive built-in functions that we do not support. So, we will check
- // Hive's function registry and lazily load needed functions into our own function registry.
- // List of functions we are explicitly not supporting are:
- // compute_stats, context_ngrams, create_union,
- // current_user, ewah_bitmap, ewah_bitmap_and, ewah_bitmap_empty, ewah_bitmap_or, field,
- // in_file, index, matchpath, ngrams, noop, noopstreaming, noopwithmap,
- // noopwithmapstreaming, parse_url_tuple, reflect2, windowingtablefunction.
- // Note: don't forget to update SessionCatalog.isTemporaryFunction
- private val hiveFunctions = Seq()
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org