You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/02/10 15:20:45 UTC

[GitHub] [spark] gengliangwang commented on a change in pull request #35415: [SPARK-37507][SQL] Add a new SQL function to_binary

gengliangwang commented on a change in pull request #35415:
URL: https://github.com/apache/spark/pull/35415#discussion_r803787959



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -2538,6 +2538,78 @@ case class Encode(value: Expression, charset: Expression)
     newLeft: Expression, newRight: Expression): Encode = copy(value = newLeft, charset = newRight)
 }
 
+/**
+ * Converts the input expression to a binary value based on the supplied format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+            |_FUNC_(str[, fmt]) - Converts the input `str` to a binary value based on the supplied `fmt`.
+            |  `fmt` can be a case-insensitive string literal of "hex", "utf-8", "base2", or "base64".
+            |  By default, the binary format for conversion is "hex" if `fmt` is omitted.
+            |
+            |The function returns NULL if at least one of the input parameters is NULL.
+          """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('abc', 'utf-8');
+       abc
+  """,
+  since = "3.3.0",
+  group = "string_funcs")
+// scalastyle:on line.size.limit
+case class ToBinary(expr: Expression, format: Option[Expression], child: Expression)
+  extends RuntimeReplaceable {
+
+  def this(expr: Expression, format: Expression) = this(expr, Option(format),
+    format match {
+      case lit if lit.foldable =>
+        val value = lit.eval()
+        if (value == null) lit
+        else {
+          value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT) match {
+            case "hex" => Unhex(expr)
+            case "utf-8" => Encode(expr, Literal("UTF-8"))
+            case "base64" => UnBase64(expr)
+            case "base2" => Cast(expr, BinaryType)
+            case _ => lit
+          }
+        }
+
+      case other => other
+    }
+  )
+
+  def this(expr: Expression) = this(expr, None, Unhex(expr))
+
+  override def flatArguments: Iterator[Any] = Iterator(expr, format)
+  override def exprsReplaced: Seq[Expression] = expr +: format.toSeq
+
+  override def prettyName: String = "to_binary"
+  override def dataType: DataType = BinaryType
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    def checkFormat(lit: Expression) = {
+      if (lit.foldable) {
+        val value = lit.eval()
+        value == null || Seq("hex", "utf-8", "base64", "base2").contains(

Review comment:
       why do we allow the value to be null?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org