You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by GitBox <gi...@apache.org> on 2022/12/23 02:36:50 UTC

[GitHub] [daffodil] mbeckerle commented on a diff in pull request #900: Draft: Work in progress - Implement textNumberPattern 'V' virtual decimal point.

mbeckerle commented on code in PR #900:
URL: https://github.com/apache/daffodil/pull/900#discussion_r1055992518


##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -63,36 +142,72 @@ case class ConvertTextNumberPrim(e: ElementBase)
     EntityReplacer { _.replaceForUnparse(zr) }
   }
 
-  val textNumberFormatEv: TextNumberFormatEv = {
-    val (pattern, patternStripped) = {
-      val p = e.textNumberPattern
-
-      if (p.startsWith(";")) {
-        e.SDE("Invalid textNumberPattern: The postive number pattern is mandatory")
-      }
+  private val pattern = {
+    val p = e.textNumberPattern
+    if (p.startsWith(";")) {

Review Comment:
   e.SDEUnless would be cleaner



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -63,36 +142,72 @@ case class ConvertTextNumberPrim(e: ElementBase)
     EntityReplacer { _.replaceForUnparse(zr) }
   }
 
-  val textNumberFormatEv: TextNumberFormatEv = {
-    val (pattern, patternStripped) = {
-      val p = e.textNumberPattern
-
-      if (p.startsWith(";")) {
-        e.SDE("Invalid textNumberPattern: The postive number pattern is mandatory")
-      }
+  private val pattern = {
+    val p = e.textNumberPattern
+    if (p.startsWith(";")) {
+      e.SDE("Invalid textNumberPattern: The postive number pattern is mandatory")
+    }
+    p
+  }
 
-      val noEscapedTicksRegex = """''""".r
-      val patternNoEscapedTicks = noEscapedTicksRegex.replaceAllIn(p, "")
-      val noQuotedRegex = """'[^']+'""".r
-      val patternNoQuoted = noQuotedRegex.replaceAllIn(patternNoEscapedTicks, "")
+  private val patternStripped = {
+    val noEscapedTicksRegex = """''""".r
+    val patternNoEscapedTicks = noEscapedTicksRegex.replaceAllIn(pattern, "")
+    val noQuotedRegex = """'[^']+'""".r
+    val patternNoQuoted = noQuotedRegex.replaceAllIn(patternNoEscapedTicks, "")
+    patternNoQuoted
+  }
 
-      if (patternNoQuoted.contains("V")) {
-        e.notYetImplemented("textNumberPattern with V symbol")
-      }
+  private val hasV = patternStripped.contains("V")
+  private val hasP = patternStripped.contains("P")
 
-      if (patternNoQuoted.contains("P")) {
+  private val textDecimalVirtualPoint = { // analogous to the property dfdl:binaryDecimalVirtualPoint
+    if (!hasV && !hasP) 0 // no virtual point
+    else {
+      if (hasP) {
         e.notYetImplemented("textNumberPattern with P symbol")
       }
+      Assert.invariant(hasV)
+      Assert.invariant(e.textNumberRep eq TextNumberRep.Standard)
+      //
+      // check for things incompatible with "V"
+      //
+      e.schemaDefinitionUnless(e.textStandardBase == 10,
+        "The dfdl:textNumberPattern V (virtual decimal point) requires that " +
+          "dfdl:textStandardBase is 10, but its value was %s",
+        e.textStandardBase)
+      //
+      val virtualPoint = TextNumberPatternUtils.textDecimalVirtualPointForStandard(e, patternStripped)
+
+      Assert.invariant(virtualPoint >= 1) // if this fails the regex is broken.
+      virtualPoint
+    }
+  }
+
+  private val patternWithoutPV =
+    pattern.replaceAll("P", "").replaceAll("V", "")
+
+  private val runtimePattern =
+    if (hasV || hasP) patternWithoutPV else pattern
+
+  val textNumberFormatEv: TextNumberFormatEv = {
 
       // Load the pattern to make sure it is valid
       try {
-        new DecimalFormat(p)
+        if (!hasV && !hasP)
+          new DecimalFormat(pattern) // But only if it doesn't have P nor V in it, since ICU doesn't support those
       } catch {
         case ex: IllegalArgumentException => e.SDE("Invalid textNumberPattern: " + ex.getMessage())
       }
 
-      (p, patternNoQuoted)
-    }
+      // At this point we have 2 patterns
+      // 1. The original textNumberPattern string from the schema
+      // 2. The original but with the P or V removed (since they dont correspond to anything in the data)
+      //
+      // We need to use the original textNumberPattern in diagnostic messages
+      // since that's what the schema author wrote.
+      // But if the pattern has P or V, then we need ICU at runtime to use the original
+      // but with the P or V removed since the P and V don't actually represent anything in the data.

Review Comment:
   Move this comment up to the val runtimePattern



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -63,36 +142,72 @@ case class ConvertTextNumberPrim(e: ElementBase)
     EntityReplacer { _.replaceForUnparse(zr) }
   }
 
-  val textNumberFormatEv: TextNumberFormatEv = {
-    val (pattern, patternStripped) = {
-      val p = e.textNumberPattern
-
-      if (p.startsWith(";")) {
-        e.SDE("Invalid textNumberPattern: The postive number pattern is mandatory")
-      }
+  private val pattern = {
+    val p = e.textNumberPattern
+    if (p.startsWith(";")) {
+      e.SDE("Invalid textNumberPattern: The postive number pattern is mandatory")
+    }
+    p
+  }
 
-      val noEscapedTicksRegex = """''""".r
-      val patternNoEscapedTicks = noEscapedTicksRegex.replaceAllIn(p, "")
-      val noQuotedRegex = """'[^']+'""".r
-      val patternNoQuoted = noQuotedRegex.replaceAllIn(patternNoEscapedTicks, "")
+  private val patternStripped = {
+    val noEscapedTicksRegex = """''""".r
+    val patternNoEscapedTicks = noEscapedTicksRegex.replaceAllIn(pattern, "")
+    val noQuotedRegex = """'[^']+'""".r
+    val patternNoQuoted = noQuotedRegex.replaceAllIn(patternNoEscapedTicks, "")
+    patternNoQuoted
+  }
 
-      if (patternNoQuoted.contains("V")) {
-        e.notYetImplemented("textNumberPattern with V symbol")
-      }
+  private val hasV = patternStripped.contains("V")
+  private val hasP = patternStripped.contains("P")
 
-      if (patternNoQuoted.contains("P")) {
+  private val textDecimalVirtualPoint = { // analogous to the property dfdl:binaryDecimalVirtualPoint
+    if (!hasV && !hasP) 0 // no virtual point
+    else {
+      if (hasP) {
         e.notYetImplemented("textNumberPattern with P symbol")
       }
+      Assert.invariant(hasV)
+      Assert.invariant(e.textNumberRep eq TextNumberRep.Standard)
+      //
+      // check for things incompatible with "V"
+      //
+      e.schemaDefinitionUnless(e.textStandardBase == 10,
+        "The dfdl:textNumberPattern V (virtual decimal point) requires that " +
+          "dfdl:textStandardBase is 10, but its value was %s",
+        e.textStandardBase)
+      //
+      val virtualPoint = TextNumberPatternUtils.textDecimalVirtualPointForStandard(e, patternStripped)
+
+      Assert.invariant(virtualPoint >= 1) // if this fails the regex is broken.
+      virtualPoint
+    }
+  }
+
+  private val patternWithoutPV =
+    pattern.replaceAll("P", "").replaceAll("V", "")
+
+  private val runtimePattern =
+    if (hasV || hasP) patternWithoutPV else pattern
+
+  val textNumberFormatEv: TextNumberFormatEv = {
 
       // Load the pattern to make sure it is valid
       try {
-        new DecimalFormat(p)
+        if (!hasV && !hasP)
+          new DecimalFormat(pattern) // But only if it doesn't have P nor V in it, since ICU doesn't support those

Review Comment:
   Add comment about this check being unnecessary for P, V case since regex checking of pattern allows only a tiny subset of all that ICU's patterns can do, and we're certain this will be error free. 



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PrimitivesTextNumber1.scala:
##########
@@ -99,11 +102,30 @@ case class ConvertTextNumberParser(
 
         // Verify that what was parsed was what was passed exactly in byte count.
         // Use pos to verify all characters consumed & check for errors!
-        if (num == null) {
+        if (num1 == null) {

Review Comment:
   Move this null check above the icuNum match. 



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -63,36 +142,72 @@ case class ConvertTextNumberPrim(e: ElementBase)
     EntityReplacer { _.replaceForUnparse(zr) }
   }
 
-  val textNumberFormatEv: TextNumberFormatEv = {
-    val (pattern, patternStripped) = {
-      val p = e.textNumberPattern
-
-      if (p.startsWith(";")) {
-        e.SDE("Invalid textNumberPattern: The postive number pattern is mandatory")
-      }
+  private val pattern = {
+    val p = e.textNumberPattern
+    if (p.startsWith(";")) {
+      e.SDE("Invalid textNumberPattern: The postive number pattern is mandatory")
+    }
+    p
+  }
 
-      val noEscapedTicksRegex = """''""".r
-      val patternNoEscapedTicks = noEscapedTicksRegex.replaceAllIn(p, "")
-      val noQuotedRegex = """'[^']+'""".r
-      val patternNoQuoted = noQuotedRegex.replaceAllIn(patternNoEscapedTicks, "")
+  private val patternStripped = {

Review Comment:
   Add comments about what this is for. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@daffodil.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org