You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/11/07 09:16:41 UTC
spark git commit: [SPARK-16904][SQL] Removal of Hive Built-in Hash
Functions and TestHiveFunctionRegistry
Repository: spark
Updated Branches:
refs/heads/master 9db06c442 -> 57626a557
[SPARK-16904][SQL] Removal of Hive Built-in Hash Functions and TestHiveFunctionRegistry
### What changes were proposed in this pull request?
Currently, the Hive built-in `hash` function is not being used in Spark since Spark 2.0. The public interface does not allow users to unregister the Spark built-in functions. Thus, users will never use Hive's built-in `hash` function.
The only exception here is `TestHiveFunctionRegistry`, which allows users to unregister the built-in functions. Thus, we can load Hive's hash function in the test cases. If we disable it, 10+ test cases will fail because the results are different from the Hive golden answer files.
This PR is to remove `hash` from the list of `hiveFunctions` in `HiveSessionCatalog`. It will also remove `TestHiveFunctionRegistry`. This removal makes us easier to remove `TestHiveSessionState` in the future.
### How was this patch tested?
N/A
Author: gatorsmile <ga...@gmail.com>
Closes #14498 from gatorsmile/removeHash.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/57626a55
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/57626a55
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/57626a55
Branch: refs/heads/master
Commit: 57626a55703a189e03148398f67c36cd0e557044
Parents: 9db06c4
Author: gatorsmile <ga...@gmail.com>
Authored: Mon Nov 7 01:16:37 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Mon Nov 7 01:16:37 2016 -0800
----------------------------------------------------------------------
.../hive/execution/HiveCompatibilitySuite.scala | 41 ++++++++++----------
.../spark/sql/hive/HiveSessionCatalog.scala | 1 -
.../apache/spark/sql/hive/test/TestHive.scala | 28 -------------
3 files changed, 20 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/57626a55/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index f5d10de..5cd4935 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -57,8 +57,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, 5)
// Enable in-memory partition pruning for testing purposes
TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true)
- // Use Hive hash expression instead of the native one
- TestHive.sessionState.functionRegistry.unregisterFunction("hash")
// Ensures that the plans generation use metastore relation and not OrcRelation
// Was done because SqlBuilder does not work with plans having logical relation
TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false)
@@ -76,7 +74,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc)
TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
- TestHive.sessionState.functionRegistry.restore()
// For debugging dump some statistics about how much time was spent in various optimizer rules
logWarning(RuleExecutor.dumpTimeSpent())
@@ -581,7 +578,26 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"auto_join6",
"auto_join7",
"auto_join8",
- "auto_join9"
+ "auto_join9",
+
+ // These tests are based on the Hive's hash function, which is different from Spark
+ "auto_join19",
+ "auto_join22",
+ "auto_join25",
+ "auto_join26",
+ "auto_join27",
+ "auto_join28",
+ "auto_join30",
+ "auto_join31",
+ "auto_join_nulls",
+ "auto_join_reordering_values",
+ "correlationoptimizer1",
+ "correlationoptimizer2",
+ "correlationoptimizer3",
+ "correlationoptimizer4",
+ "multiMapJoin1",
+ "orc_dictionary_threshold",
+ "udf_hash"
)
/**
@@ -601,16 +617,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"annotate_stats_part",
"annotate_stats_table",
"annotate_stats_union",
- "auto_join19",
- "auto_join22",
- "auto_join25",
- "auto_join26",
- "auto_join27",
- "auto_join28",
- "auto_join30",
- "auto_join31",
- "auto_join_nulls",
- "auto_join_reordering_values",
"binary_constant",
"binarysortable_1",
"cast1",
@@ -623,15 +629,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"compute_stats_long",
"compute_stats_string",
"convert_enum_to_string",
- "correlationoptimizer1",
"correlationoptimizer10",
"correlationoptimizer11",
"correlationoptimizer13",
"correlationoptimizer14",
"correlationoptimizer15",
- "correlationoptimizer2",
- "correlationoptimizer3",
- "correlationoptimizer4",
"correlationoptimizer6",
"correlationoptimizer7",
"correlationoptimizer8",
@@ -871,7 +873,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"merge2",
"merge4",
"mergejoins",
- "multiMapJoin1",
"multiMapJoin2",
"multi_insert_gby",
"multi_insert_gby3",
@@ -893,7 +894,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"nullinput2",
"nullscript",
"optional_outer",
- "orc_dictionary_threshold",
"order",
"order2",
"outer_join_ppr",
@@ -1026,7 +1026,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_from_unixtime",
"udf_greaterthan",
"udf_greaterthanorequal",
- "udf_hash",
"udf_hex",
"udf_if",
"udf_index",
http://git-wip-us.apache.org/repos/asf/spark/blob/57626a55/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 4f2910a..9df20ce 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -233,7 +233,6 @@ private[sql] class HiveSessionCatalog(
// in_file, index, matchpath, ngrams, noop, noopstreaming, noopwithmap,
// noopwithmapstreaming, parse_url_tuple, reflect2, windowingtablefunction.
private val hiveFunctions = Seq(
- "hash",
"histogram_numeric",
"percentile"
)
http://git-wip-us.apache.org/repos/asf/spark/blob/57626a55/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 9000044..a8dd510 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -492,24 +492,6 @@ private[hive] class TestHiveQueryExecution(
}
}
-
-private[hive] class TestHiveFunctionRegistry extends SimpleFunctionRegistry {
-
- private val removedFunctions =
- collection.mutable.ArrayBuffer.empty[(String, (ExpressionInfo, FunctionBuilder))]
-
- def unregisterFunction(name: String): Unit = synchronized {
- functionBuilders.remove(name).foreach(f => removedFunctions += name -> f)
- }
-
- def restore(): Unit = synchronized {
- removedFunctions.foreach {
- case (name, (info, builder)) => registerFunction(name, info, builder)
- }
- }
-}
-
-
private[hive] class TestHiveSessionState(
sparkSession: TestHiveSparkSession)
extends HiveSessionState(sparkSession) { self =>
@@ -525,16 +507,6 @@ private[hive] class TestHiveSessionState(
}
}
- override lazy val functionRegistry: TestHiveFunctionRegistry = {
- // We use TestHiveFunctionRegistry at here to track functions that have been explicitly
- // unregistered (through TestHiveFunctionRegistry.unregisterFunction method).
- val fr = new TestHiveFunctionRegistry
- org.apache.spark.sql.catalyst.analysis.FunctionRegistry.expressions.foreach {
- case (name, (info, builder)) => fr.registerFunction(name, info, builder)
- }
- fr
- }
-
override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
new TestHiveQueryExecution(sparkSession, plan)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org