You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/09/14 13:44:23 UTC

[GitHub] [spark] maropu commented on a change in pull request #29743: [SPARK-32870][DOCS][SQL] Make sure that all expressions have their ExpressionDescription filled

maropu commented on a change in pull request #29743:
URL: https://github.com/apache/spark/pull/29743#discussion_r487886628



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
##########
@@ -26,7 +26,13 @@ import org.apache.spark.sql.types.{DataType, IntegerType}
  * Expression that returns the current partition id.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the current partition id.")
+  usage = "_FUNC_() - Returns the current partition id.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       0

Review comment:
       Can you add this nondeterministic func in ignoreSet of test("check outputs of expression examples")?

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
##########
@@ -660,8 +662,13 @@ object FunctionRegistry {
     }
     val clazz = scala.reflect.classTag[Cast].runtimeClass
     val usage = "_FUNC_(expr) - Casts the value `expr` to the target data type `_FUNC_`."
-    val expressionInfo =
-      new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "", "", "")
+    val examples = s"""
+    Examples:
+      > SELECT _FUNC_($exampleIn);
+       $exampleOut
+  """

Review comment:
       nit: wrong indents.

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
##########
@@ -74,7 +87,13 @@ case class InputFileBlockStart() extends LeafExpression with Nondeterministic {
 
 
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the length of the block being read, or -1 if not available.")
+  usage = "_FUNC_() - Returns the length of the block being read, or -1 if not available.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       -1
+  """,
+  since = "2.2.0")
 case class InputFileBlockLength() extends LeafExpression with Nondeterministic {

Review comment:
       ditto: please add it in `ignoreSet`.

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
##########
@@ -346,10 +349,14 @@ object CreateStruct {
       "struct",
       "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values.",
       "",
+      """
+    Examples:
+      > SELECT _FUNC_(1, 2, 3);
+       {"col1":1,"col2":2,"col3":3}
+  """,

Review comment:
       nit: wrong indents.

##########
File path: sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
##########
@@ -105,11 +105,28 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
     }
   }
 
+  test("SPARK-32870: Default expressions in FunctionRegistry should have their " +
+    "usage, examples and since filled") {
+    val ignoreSet = Set(
+      "org.apache.spark.sql.catalyst.expressions.TimeWindow")

Review comment:
       Why did you put `TimeWindow` in this list?

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
##########
@@ -51,7 +58,13 @@ case class InputFileName() extends LeafExpression with Nondeterministic {
 
 
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the start offset of the block being read, or -1 if not available.")
+  usage = "_FUNC_() - Returns the start offset of the block being read, or -1 if not available.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       -1
+  """,
+  since = "2.2.0")
 case class InputFileBlockStart() extends LeafExpression with Nondeterministic {

Review comment:
       ditto: please add it in `ignoreSet`.

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
##########
@@ -24,9 +24,16 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.types.{DataType, LongType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
-
+// scalastyle:off whitespace.end.of.line
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the name of the file being read, or empty string if not available.")
+  usage = "_FUNC_() - Returns the name of the file being read, or empty string if not available.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+
+  """,
+  since = "1.5.0")
+// scalastyle:on whitespace.end.of.line
 case class InputFileName() extends LeafExpression with Nondeterministic {

Review comment:
       ditto: please add it in `ignoreSet`.

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
##########
@@ -40,7 +40,13 @@ import org.apache.spark.sql.types.{DataType, LongType}
       within each partition. The assumption is that the data frame has less than 1 billion
       partitions, and each partition has less than 8 billion records.
       The function is non-deterministic because its result depends on partition IDs.
-  """)
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       0

Review comment:
       Can you add this nondeterministic func in `ignoreSet` of `test("check outputs of expression examples")`?

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
##########
@@ -647,7 +652,8 @@ object XxHash64Function extends InterpretedHashFunction {
  * we can guarantee shuffle and bucketing have same data distribution
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.")
+  usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.",
+  since = "2.2.0")
 case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {

Review comment:
       Yea, I think yes.

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
##########
@@ -36,7 +36,8 @@ import org.apache.spark.unsafe.types.UTF8String
     Examples:
       > SELECT _FUNC_(1, 2, 3);
        [1,2,3]
-  """)
+  """,
+  since = "1.1.0")
 case class CreateArray(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)

Review comment:
       The decision looks okay to me.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org