You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2021/03/10 02:51:54 UTC

[GitHub] [spark] beliefer commented on a change in pull request #31448: [SPARK-28137][SQL] Data Type Formatting Functions: `to_number`.

beliefer commented on a change in pull request #31448:
URL: https://github.com/apache/spark/pull/31448#discussion_r590958799



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -2422,6 +2423,163 @@ case class FormatNumber(x: Expression, d: Expression)
   override def prettyName: String = "format_number"
 }
 
+object ToNumber {
+  val pointSign = '.'
+  val commaSign = ','
+  val minusSign = '-'
+  val dollarSign = '$'
+
+  def isDecimalPoint(c: Char): Boolean = c == pointSign
+
+  def isMinusSign(c: Char): Boolean = c == minusSign
+
+  def isDollarSign(c: Char): Boolean = c == dollarSign
+
+  def invalidSignPosition(format: String, c: Char): Boolean = {
+    val signIndex = format.indexOf(c)
+    signIndex > 0 && signIndex < format.length - 1
+  }
+
+  def normalize(format: String): String = {
+    format.map {
+      case '9' => '#'
+      case 'D' => pointSign
+      case 'G' => commaSign
+      case 'S' => minusSign
+      case other => other
+    }
+  }
+
+  def isSign(c: Char): Boolean = c match {
+    case c if c == pointSign || c == commaSign || c == minusSign || c == dollarSign => true
+    case _ => false
+  }
+
+  def parsePrecisionAndScale(format: String): (Int, Int) = {
+    val arr = format.split(pointSign)
+    val filteredFormat = format.filterNot(isSign)
+    if (arr.length == 1) {
+      (filteredFormat.length, 0)
+    } else {
+      (filteredFormat.length, arr(1).filterNot(isSign).length)
+    }
+  }
+
+  def transform(format: String): String = {
+    if (format.contains('-')) {
+      val positiveFormatString = format.replaceAll("-", "")
+      s"$positiveFormatString;$format"
+    } else {
+      format
+    }
+  }
+
+  def convert(input: UTF8String, format: String, precision: Int, scale: Int): Decimal = {
+    val numberFormat = NumberFormat.getInstance()
+    val numberDecimalFormat = numberFormat.asInstanceOf[DecimalFormat]
+    numberDecimalFormat.setParseBigDecimal(true)
+    numberDecimalFormat.applyPattern(format)
+    val parsePosition = new ParsePosition(0)
+    val inputStr = input.toString.trim
+    val arr = inputStr.split(pointSign)
+    if (arr.length == 1) {
+      if (inputStr.filterNot(isSign).length > precision - scale) {
+        throw QueryExecutionErrors.invalidToNumberFormatError(format)
+      }
+    } else if (arr(0).filterNot(isSign).length > precision - scale ||
+      arr(1).filterNot(isSign).length > scale) {
+      throw QueryExecutionErrors.invalidToNumberFormatError(format)
+    }
+    val number = numberDecimalFormat.parse(inputStr, parsePosition)
+    Decimal.apply(new scala.math.BigDecimal(number.asInstanceOf[BigDecimal]))
+  }
+}
+
+/**
+ * A function that converts string to numeric.
+ */
+@ExpressionDescription(
+  usage = """
+    _FUNC_(strExpr, formatExpr) - Convert `strExpr` to a number based on the `formatExpr`.
+    The format can consist of the following characters:
+      '9':  digit position (can be dropped if insignificant)
+      '0':  digit position (will not be dropped, even if insignificant)
+      '.':  decimal point (only allowed once)
+      ',':  group (thousands) separator
+      'S':  sign anchored to number (uses locale)
+      'D':  decimal point (uses locale)
+      'G':  group separator (uses locale)
+      '$':  specifies that the input value has a leading $ (Dollar) sign.

Review comment:
       I got it.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org