You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/10/28 12:56:13 UTC

[spark] branch master updated: [SPARK-37136][SQL] Remove code about hive buildin function but not implement in spark

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 7ed7afe  [SPARK-37136][SQL] Remove code about hive buildin function but not implement in spark
7ed7afe is described below

commit 7ed7afecd44d546391794d2ad2b2e7ea7a8accf7
Author: Angerszhuuuu <an...@gmail.com>
AuthorDate: Thu Oct 28 20:55:24 2021 +0800

    [SPARK-37136][SQL] Remove code about hive buildin function but not implement in spark
    
    ### What changes were proposed in this pull request?
    Since we have implement `histogram_numeric` in spark, now we can remove code about check function we can pass to hive.
    
    ### Why are the changes needed?
    Remove unused code
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    existed UT
    
    Closes #34410 from AngersZhuuuu/SPARK-37136.
    
    Authored-by: Angerszhuuuu <an...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala      |  6 +-
 .../apache/spark/sql/hive/HiveSessionCatalog.scala | 87 ----------------------
 2 files changed, 1 insertion(+), 92 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 141de75..12a5cbc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1507,16 +1507,12 @@ class SessionCatalog(
    * Returns whether it is a temporary function. If not existed, returns false.
    */
   def isTemporaryFunction(name: FunctionIdentifier): Boolean = {
-    // copied from HiveSessionCatalog
-    val hiveFunctions = Seq()
-
     // A temporary function is a function that has been registered in functionRegistry
     // without a database name, and is neither a built-in function nor a Hive function
     name.database.isEmpty &&
       (functionRegistry.functionExists(name) || tableFunctionRegistry.functionExists(name)) &&
       !FunctionRegistry.builtin.functionExists(name) &&
-      !TableFunctionRegistry.builtin.functionExists(name) &&
-      !hiveFunctions.contains(name.funcName.toLowerCase(Locale.ROOT))
+      !TableFunctionRegistry.builtin.functionExists(name)
   }
 
   def isTempFunction(name: String): Boolean = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index b11774b..1cc0314 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -17,20 +17,11 @@
 
 package org.apache.spark.sql.hive
 
-import java.util.Locale
-
-import scala.util.{Failure, Success, Try}
-import scala.util.control.NonFatal
-
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hive.ql.exec.{FunctionRegistry => HiveFunctionRegistry}
 
-import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.types.{DecimalType, DoubleType}
 
 private[sql] class HiveSessionCatalog(
     externalCatalogBuilder: () => ExternalCatalog,
@@ -51,82 +42,4 @@ private[sql] class HiveSessionCatalog(
     parser,
     functionResourceLoader,
     functionExpressionBuilder) {
-
-  override def lookupFunction(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
-    try {
-      lookupFunction0(name, children)
-    } catch {
-      case NonFatal(_) if children.exists(_.dataType.isInstanceOf[DecimalType]) =>
-        // SPARK-16228 ExternalCatalog may recognize `double`-type only.
-        val newChildren = children.map { child =>
-          if (child.dataType.isInstanceOf[DecimalType]) Cast(child, DoubleType) else child
-        }
-        lookupFunction0(name, newChildren)
-    }
-  }
-
-  private def lookupFunction0(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
-    val database = name.database.map(formatDatabaseName)
-    val funcName = name.copy(database = database)
-    Try(super.lookupFunction(funcName, children)) match {
-      case Success(expr) => expr
-      case Failure(error) =>
-        if (super.functionExists(name)) {
-          // If the function exists (either in functionRegistry or externalCatalog),
-          // it means that there is an error when we create the Expression using the given children.
-          // We need to throw the original exception.
-          throw error
-        } else {
-          // This function does not exist (neither in functionRegistry or externalCatalog),
-          // let's try to load it as a Hive's built-in function.
-          // Hive is case insensitive.
-          val functionName = funcName.unquotedString.toLowerCase(Locale.ROOT)
-          if (!hiveFunctions.contains(functionName)) {
-            failFunctionLookup(funcName, Some(error))
-          }
-
-          // TODO: Remove this fallback path once we implement the list of fallback functions
-          // defined below in hiveFunctions.
-          val functionInfo = {
-            try {
-              Option(HiveFunctionRegistry.getFunctionInfo(functionName)).getOrElse(
-                failFunctionLookup(funcName, Some(error)))
-            } catch {
-              // If HiveFunctionRegistry.getFunctionInfo throws an exception,
-              // we are failing to load a Hive builtin function, which means that
-              // the given function is not a Hive builtin function.
-              case NonFatal(e) => failFunctionLookup(funcName, Some(e))
-            }
-          }
-          val className = functionInfo.getFunctionClass.getName
-          val functionIdentifier =
-            FunctionIdentifier(functionName.toLowerCase(Locale.ROOT), database)
-          val func = CatalogFunction(functionIdentifier, className, Nil)
-          // Put this Hive built-in function to our function registry.
-          registerFunction(func, overrideIfExists = false)
-          // Now, we need to create the Expression.
-          functionRegistry.lookupFunction(functionIdentifier, children)
-        }
-    }
-  }
-
-  // TODO Removes this method after implementing Spark native "histogram_numeric".
-  override def functionExists(name: FunctionIdentifier): Boolean = {
-    super.functionExists(name) || hiveFunctions.contains(name.funcName)
-  }
-
-  override def isPersistentFunction(name: FunctionIdentifier): Boolean = {
-    super.isPersistentFunction(name) || hiveFunctions.contains(name.funcName)
-  }
-
-  /** List of functions we pass over to Hive. Note that over time this list should go to 0. */
-  // We have a list of Hive built-in functions that we do not support. So, we will check
-  // Hive's function registry and lazily load needed functions into our own function registry.
-  // List of functions we are explicitly not supporting are:
-  // compute_stats, context_ngrams, create_union,
-  // current_user, ewah_bitmap, ewah_bitmap_and, ewah_bitmap_empty, ewah_bitmap_or, field,
-  // in_file, index, matchpath, ngrams, noop, noopstreaming, noopwithmap,
-  // noopwithmapstreaming, parse_url_tuple, reflect2, windowingtablefunction.
-  // Note: don't forget to update SessionCatalog.isTemporaryFunction
-  private val hiveFunctions = Seq()
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org