You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/04/24 02:20:27 UTC

[spark] branch master updated: [SPARK-31526][SQL][TESTS] Add a new test suite for ExpressionInfo

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 42f496f  [SPARK-31526][SQL][TESTS] Add a new test suite for ExpressionInfo
42f496f is described below

commit 42f496f6ac82e51b5ce2463a375f78cb263f1e32
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Fri Apr 24 11:19:20 2020 +0900

    [SPARK-31526][SQL][TESTS] Add a new test suite for ExpressionInfo
    
    ### What changes were proposed in this pull request?
    
    This PR intends to add a new test suite for `ExpressionInfo`. Major changes are as follows;
    
     - Added a new test suite named `ExpressionInfoSuite`
     - To improve test coverage, added a test for error handling in `ExpressionInfoSuite`
     - Moved the `ExpressionInfo`-related tests from `UDFSuite` to `ExpressionInfoSuite`
     - Moved the related tests from `SQLQuerySuite` to `ExpressionInfoSuite`
     - Added a comment in `ExpressionInfoSuite` (followup of https://github.com/apache/spark/pull/28224)
    
    ### Why are the changes needed?
    
    To improve test suites/coverage.
    
    ### Does this PR introduce any user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added tests.
    
    Closes #28308 from maropu/SPARK-31526.
    
    Authored-by: Takeshi Yamamuro <ya...@apache.org>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../expressions/ExpressionDescription.java         |   6 +
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala |  80 -----------
 .../test/scala/org/apache/spark/sql/UDFSuite.scala |  31 ----
 .../sql/expressions/ExpressionInfoSuite.scala      | 156 +++++++++++++++++++++
 4 files changed, 162 insertions(+), 111 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
index 089fbe5..579f4b3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
@@ -103,6 +103,12 @@ public @interface ExpressionDescription {
     String arguments() default "";
     String examples() default "";
     String note() default "";
+    /**
+     * Valid group names are almost the same with one defined as `groupname` in
+     * `sql/functions.scala`. But, `collection_funcs` is split into fine-grained three groups:
+     * `array_funcs`, `map_funcs`, and `json_funcs`. See `ExpressionInfo` for the
+     * detailed group names.
+     */
     String group() default "";
     String since() default "";
     String deprecated() default "";
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index e199dcc..a958ab8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -22,8 +22,6 @@ import java.net.{MalformedURLException, URL}
 import java.sql.{Date, Timestamp}
 import java.util.concurrent.atomic.AtomicBoolean
 
-import scala.collection.parallel.immutable.ParVector
-
 import org.apache.spark.{AccumulatorSuite, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.expressions.GenericRow
@@ -31,7 +29,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial}
 import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite}
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.catalyst.util.StringUtils
-import org.apache.spark.sql.execution.HiveResult.hiveResultString
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
@@ -126,83 +123,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
-  test("using _FUNC_ instead of function names in examples") {
-    val exampleRe = "(>.*;)".r
-    val setStmtRe = "(?i)^(>\\s+set\\s+).+".r
-    val ignoreSet = Set(
-      // Examples for CaseWhen show simpler syntax:
-      // `CASE WHEN ... THEN ... WHEN ... THEN ... END`
-      "org.apache.spark.sql.catalyst.expressions.CaseWhen",
-      // _FUNC_ is replaced by `locate` but `locate(... IN ...)` is not supported
-      "org.apache.spark.sql.catalyst.expressions.StringLocate",
-      // _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)`
-      "org.apache.spark.sql.catalyst.expressions.Remainder",
-      // Examples demonstrate alternative names, see SPARK-20749
-      "org.apache.spark.sql.catalyst.expressions.Length")
-    spark.sessionState.functionRegistry.listFunction().foreach { funcId =>
-      val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
-      val className = info.getClassName
-      withClue(s"Expression class '$className'") {
-        val exprExamples = info.getOriginalExamples
-        if (!exprExamples.isEmpty && !ignoreSet.contains(className)) {
-          assert(exampleRe.findAllIn(exprExamples).toIterable
-            .filter(setStmtRe.findFirstIn(_).isEmpty) // Ignore SET commands
-            .forall(_.contains("_FUNC_")))
-        }
-      }
-    }
-  }
-
-  test("check outputs of expression examples") {
-    def unindentAndTrim(s: String): String = {
-      s.replaceAll("\n\\s+", "\n").trim
-    }
-    val beginSqlStmtRe = "  > ".r
-    val endSqlStmtRe = ";\n".r
-    def checkExampleSyntax(example: String): Unit = {
-      val beginStmtNum = beginSqlStmtRe.findAllIn(example).length
-      val endStmtNum = endSqlStmtRe.findAllIn(example).length
-      assert(beginStmtNum === endStmtNum,
-        "The number of ` > ` does not match to the number of `;`")
-    }
-    val exampleRe = """^(.+);\n(?s)(.+)$""".r
-    val ignoreSet = Set(
-      // One of examples shows getting the current timestamp
-      "org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
-      // Random output without a seed
-      "org.apache.spark.sql.catalyst.expressions.Rand",
-      "org.apache.spark.sql.catalyst.expressions.Randn",
-      "org.apache.spark.sql.catalyst.expressions.Shuffle",
-      "org.apache.spark.sql.catalyst.expressions.Uuid",
-      // The example calls methods that return unstable results.
-      "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection")
-
-    val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector)
-    parFuncs.foreach { funcId =>
-      // Examples can change settings. We clone the session to prevent tests clashing.
-      val clonedSpark = spark.cloneSession()
-      // Coalescing partitions can change result order, so disable it.
-      clonedSpark.sessionState.conf.setConf(SQLConf.COALESCE_PARTITIONS_ENABLED, false)
-      val info = clonedSpark.sessionState.catalog.lookupFunctionInfo(funcId)
-      val className = info.getClassName
-      if (!ignoreSet.contains(className)) {
-        withClue(s"Function '${info.getName}', Expression class '$className'") {
-          val example = info.getExamples
-          checkExampleSyntax(example)
-          example.split("  > ").toList.foreach(_ match {
-            case exampleRe(sql, output) =>
-              val df = clonedSpark.sql(sql)
-              val actual = unindentAndTrim(
-                hiveResultString(df.queryExecution.executedPlan).mkString("\n"))
-              val expected = unindentAndTrim(output)
-              assert(actual === expected)
-            case _ =>
-          })
-        }
-      }
-    }
-  }
-
   test("SPARK-6743: no columns from cache") {
     Seq(
       (83, 0, 38),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 92ea0ce..e2747d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -20,8 +20,6 @@ package org.apache.spark.sql
 import java.math.BigDecimal
 
 import org.apache.spark.sql.api.java._
-import org.apache.spark.sql.catalyst.FunctionIdentifier
-import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.{QueryExecution, SimpleMode}
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
@@ -534,35 +532,6 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     assert(spark.range(2).select(nonDeterministicJavaUDF()).distinct().count() == 2)
   }
 
-  test("Replace _FUNC_ in UDF ExpressionInfo") {
-    val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("upper"))
-    assert(info.getName === "upper")
-    assert(info.getClassName === "org.apache.spark.sql.catalyst.expressions.Upper")
-    assert(info.getUsage === "upper(str) - Returns `str` with all characters changed to uppercase.")
-    assert(info.getExamples.contains("> SELECT upper('SparkSql');"))
-    assert(info.getSince === "1.0.1")
-    assert(info.getNote === "")
-    assert(info.getExtended.contains("> SELECT upper('SparkSql');"))
-  }
-
-  test("group info in ExpressionInfo") {
-    val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("sum"))
-    assert(info.getGroup === "agg_funcs")
-
-    Seq("agg_funcs", "array_funcs", "datetime_funcs", "json_funcs", "map_funcs", "window_funcs")
-        .foreach { groupName =>
-      val info = new ExpressionInfo(
-        "testClass", null, "testName", null, "", "", "", groupName, "", "")
-      assert(info.getGroup === groupName)
-    }
-
-    val errMsg = intercept[IllegalArgumentException] {
-      val invalidGroupName = "invalid_group_funcs"
-      new ExpressionInfo("testClass", null, "testName", null, "", "", "", invalidGroupName, "", "")
-    }.getMessage
-    assert(errMsg.contains("'group' is malformed in the expression [testName]."))
-  }
-
   test("SPARK-28521 error message for CAST(parameter types contains DataType)") {
     val e = intercept[AnalysisException] {
       spark.sql("SELECT CAST(1)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
new file mode 100644
index 0000000..9a6fe46
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.expressions
+
+import scala.collection.parallel.immutable.ParVector
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
+import org.apache.spark.sql.execution.HiveResult.hiveResultString
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
+
+  test("Replace _FUNC_ in ExpressionInfo") {
+    val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("upper"))
+    assert(info.getName === "upper")
+    assert(info.getClassName === "org.apache.spark.sql.catalyst.expressions.Upper")
+    assert(info.getUsage === "upper(str) - Returns `str` with all characters changed to uppercase.")
+    assert(info.getExamples.contains("> SELECT upper('SparkSql');"))
+    assert(info.getSince === "1.0.1")
+    assert(info.getNote === "")
+    assert(info.getExtended.contains("> SELECT upper('SparkSql');"))
+  }
+
+  test("group info in ExpressionInfo") {
+    val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("sum"))
+    assert(info.getGroup === "agg_funcs")
+
+    Seq("agg_funcs", "array_funcs", "datetime_funcs", "json_funcs", "map_funcs", "window_funcs")
+        .foreach { groupName =>
+      val info = new ExpressionInfo(
+        "testClass", null, "testName", null, "", "", "", groupName, "", "")
+      assert(info.getGroup === groupName)
+    }
+
+    val errMsg = intercept[IllegalArgumentException] {
+      val invalidGroupName = "invalid_group_funcs"
+      new ExpressionInfo("testClass", null, "testName", null, "", "", "", invalidGroupName, "", "")
+    }.getMessage
+    assert(errMsg.contains("'group' is malformed in the expression [testName]."))
+  }
+
+  test("error handling in ExpressionInfo") {
+    val errMsg1 = intercept[IllegalArgumentException] {
+      val invalidNote = "  invalid note"
+      new ExpressionInfo("testClass", null, "testName", null, "", "", invalidNote, "", "", "")
+    }.getMessage
+    assert(errMsg1.contains("'note' is malformed in the expression [testName]."))
+
+    val errMsg2 = intercept[IllegalArgumentException] {
+      val invalidSince = "-3.0.0"
+      new ExpressionInfo("testClass", null, "testName", null, "", "", "", "", invalidSince, "")
+    }.getMessage
+    assert(errMsg2.contains("'since' is malformed in the expression [testName]."))
+
+    val errMsg3 = intercept[IllegalArgumentException] {
+      val invalidDeprecated = "  invalid deprecated"
+      new ExpressionInfo("testClass", null, "testName", null, "", "", "", "", "", invalidDeprecated)
+    }.getMessage
+    assert(errMsg3.contains("'deprecated' is malformed in the expression [testName]."))
+  }
+
+  test("using _FUNC_ instead of function names in examples") {
+    val exampleRe = "(>.*;)".r
+    val setStmtRe = "(?i)^(>\\s+set\\s+).+".r
+    val ignoreSet = Set(
+      // Examples for CaseWhen show simpler syntax:
+      // `CASE WHEN ... THEN ... WHEN ... THEN ... END`
+      "org.apache.spark.sql.catalyst.expressions.CaseWhen",
+      // _FUNC_ is replaced by `locate` but `locate(... IN ...)` is not supported
+      "org.apache.spark.sql.catalyst.expressions.StringLocate",
+      // _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)`
+      "org.apache.spark.sql.catalyst.expressions.Remainder",
+      // Examples demonstrate alternative names, see SPARK-20749
+      "org.apache.spark.sql.catalyst.expressions.Length")
+    spark.sessionState.functionRegistry.listFunction().foreach { funcId =>
+      val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
+      val className = info.getClassName
+      withClue(s"Expression class '$className'") {
+        val exprExamples = info.getOriginalExamples
+        if (!exprExamples.isEmpty && !ignoreSet.contains(className)) {
+          assert(exampleRe.findAllIn(exprExamples).toIterable
+            .filter(setStmtRe.findFirstIn(_).isEmpty) // Ignore SET commands
+            .forall(_.contains("_FUNC_")))
+        }
+      }
+    }
+  }
+
+  test("check outputs of expression examples") {
+    def unindentAndTrim(s: String): String = {
+      s.replaceAll("\n\\s+", "\n").trim
+    }
+    val beginSqlStmtRe = "  > ".r
+    val endSqlStmtRe = ";\n".r
+    def checkExampleSyntax(example: String): Unit = {
+      val beginStmtNum = beginSqlStmtRe.findAllIn(example).length
+      val endStmtNum = endSqlStmtRe.findAllIn(example).length
+      assert(beginStmtNum === endStmtNum,
+        "The number of ` > ` does not match to the number of `;`")
+    }
+    val exampleRe = """^(.+);\n(?s)(.+)$""".r
+    val ignoreSet = Set(
+      // One of examples shows getting the current timestamp
+      "org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
+      // Random output without a seed
+      "org.apache.spark.sql.catalyst.expressions.Rand",
+      "org.apache.spark.sql.catalyst.expressions.Randn",
+      "org.apache.spark.sql.catalyst.expressions.Shuffle",
+      "org.apache.spark.sql.catalyst.expressions.Uuid",
+      // The example calls methods that return unstable results.
+      "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection")
+
+    val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector)
+    parFuncs.foreach { funcId =>
+      // Examples can change settings. We clone the session to prevent tests clashing.
+      val clonedSpark = spark.cloneSession()
+      // Coalescing partitions can change result order, so disable it.
+      clonedSpark.sessionState.conf.setConf(SQLConf.COALESCE_PARTITIONS_ENABLED, false)
+      val info = clonedSpark.sessionState.catalog.lookupFunctionInfo(funcId)
+      val className = info.getClassName
+      if (!ignoreSet.contains(className)) {
+        withClue(s"Function '${info.getName}', Expression class '$className'") {
+          val example = info.getExamples
+          checkExampleSyntax(example)
+          example.split("  > ").toList.foreach {
+            case exampleRe(sql, output) =>
+              val df = clonedSpark.sql(sql)
+              val actual = unindentAndTrim(
+                hiveResultString(df.queryExecution.executedPlan).mkString("\n"))
+              val expected = unindentAndTrim(output)
+              assert(actual === expected)
+            case _ =>
+          }
+        }
+      }
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org