You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by GitBox <gi...@apache.org> on 2021/03/18 16:35:59 UTC

[GitHub] [daffodil] stevedlawrence commented on a change in pull request #481: ADD test for "%%" to bypass existing test for "%NL"

stevedlawrence commented on a change in pull request #481:
URL: https://github.com/apache/daffodil/pull/481#discussion_r597032815



##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/cookers/EntityReplacer.scala
##########
@@ -281,7 +281,7 @@ final class EntityReplacer {
    * this form, but allow the others.
    */
   private def replaceEntity(proposedEntity: String, orig: String, context: Option[ThrowsSDE], forUnparse: Boolean,
-    allowByteEntity: Boolean): String = {
+                            allowByteEntity: Boolean): String = {

Review comment:
       Check your IDE settings for indentation. You've got a few places wher indentation was changed, but shouldn't be. 

##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/cookers/EntityReplacer.scala
##########
@@ -835,4 +850,4 @@ class DelimiterCooker(pn: String) extends ListOfStringLiteralBase(pn, true) {
    */
   override protected def oneLiteralCooker: StringLiteralBase = Assert.usageError("not to be used.")
   override protected def cook(raw: String, context: ThrowsSDE, forUnparse: Boolean): List[String] = Assert.usageError("not to be used")
-}
+}

Review comment:
       Check your IDE settings to make sure it ends files with a newline.

##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/cookers/EntityReplacer.scala
##########
@@ -560,48 +560,68 @@ sealed trait SingleCharacterMixin { self: StringLiteralBase =>
 class SingleCharacterLiteral(pn: String, allowByteEntities: Boolean)
   extends StringLiteralBase(pn, allowByteEntities) with SingleCharacterMixin
 
-trait NoCharClassEntitiesMixin {
+trait DisallowedCharClassEntitiesMixin {
 
   protected def propName: String
+  protected def disallowedCharClassEntities: Seq[String]
 
   /**
-   * Override if set of prohibited class entities is different
+   * The raw string to test is supplied as the first parameter
+   * A list of disallowed character class entities is supplied as the 2nd parameter
+   * This list is added to the regex string below
+   * Ex regex: (?:^|[^%])(?:%%)*(%(?:NL|ES);)
+   * (?:^|[^%]) look for the beginning of the string or a non-% character
+   * (?:%%)* look for an even number of %'s
+   * (%(?:NL|ES);) look for any single % left after previous match along with and NL or ES followed by a ;
+   *
    */
-  protected def noCharClassEntities(raw: String, context: ThrowsSDE): Unit = {
-    context.schemaDefinitionUnless(!raw.contains("%NL;"), "Property dfdl:%s cannot contain %%NL;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%ES;"), "Property dfdl:%s cannot contain %%ES;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%WSP;"), "Property dfdl:%s cannot contain %%WSP;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%WSP+;"), "Property dfdl:%s cannot contain %%WSP+;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%WSP*;"), "Property dfdl:%s cannot contain %%WSP*;", propName)
+  private lazy val disallowedRegex = {
+//    val disallowedCharClassMatch = disallowedCharClassEntities.mkString("|")

Review comment:
       Makesure to remove commented out code.

##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/cookers/EntityReplacer.scala
##########
@@ -806,17 +816,22 @@ class NonEmptyListOfStringLiteralCharClass_ES_WithByteEntities(pn: String)
 class DelimiterCookerNoES(pn: String) extends ListOfString1OrMoreLiteral(pn, true) {
 
   override val oneLiteralCooker: StringLiteralBase =
-    new StringLiteralNoCharClassEntities(propName, true) {
-
-      override protected def noCharClassEntities(raw: String, context: ThrowsSDE): Unit = {
-        // TODO: this isn't quite right, as it will allow combined delimiters
-        // that still match the empty string, e.g. "%ES;%WSP*;". We could check
-        // if raw.contains("%WSP*;"), but that is too general, preventing valid
-        // delimiters like "foo%WSP*;bar". Although the below matches the
-        // specification, it's probably not the intended behavior.
-        context.schemaDefinitionUnless(raw != "%WSP*;", "Property dfdl:%s cannot contain %%WSP*; when dfdl:lengthKind=\"delimited\".", propName)
-        context.schemaDefinitionUnless(raw != "%ES;", "Property dfdl:%s cannot contain %%ES; when dfdl:lengthKind=\"delimited\".", propName)
+    new StringLiteralNoCharClassEntities(propName, true) with DisallowedCharClassEntitiesMixin {
+
+      //override protected def noCharClassEntities(raw: String, context: ThrowsSDE): Unit = {
+      // TODO: this isn't quite right, as it will allow combined delimiters
+      // that still match the empty string, e.g. "%ES;%WSP*;". We could check
+      // if raw.contains("%WSP*;"), but that is too general, preventing valid
+      // delimiters like "foo%WSP*;bar". Although the below matches the
+      // specification, it's probably not the intended behavior.
+      override val disallowedCharClassEntities = Seq("ES")
+      override def testRaw(raw: String, context: ThrowsSDE) = {
+        context.schemaDefinitionUnless(raw != "%WSP*;", """The WSP* entity cannot appear on it's own when dfdl:lengthKind="delimited".""")
+        super[DisallowedCharClassEntitiesMixin].testRaw(raw, context)
       }
+      //context.schemaDefinitionUnless(raw != "%WSP*;", "Property dfdl:%s cannot contain %%WSP*; when dfdl:lengthKind=\"delimited\".", propName)
+      //context.schemaDefinitionUnless(raw != "%ES;", "Property dfdl:%s cannot contain %%ES; when dfdl:lengthKind=\"delimited\".", propName)
+      //}
     }
 }
 

Review comment:
       I think what you have here for DelimiterCookerNoES is correct for the dfdl separator property, but is not correct for dfdl:terminator. ES is allowed in the dfdl:terminator property, so we need a different cooker that has different checks.
   
   For the dfdl:terminator proeprty, there are no disallowed character class entities, and both WSP* and ES canot appear on their own when lengthKind is delimited. This logic is different from this ``DelimiterCookerNoES``. So you need a new class that is similar to this cooker (maybe call it ``DelimiterCookerNoSoleES``), but it should not mixin the ``DisallowedCharClassEntitiesMixin`` and it should not have ``disallowedCharClassEntities``. All it has is ``testRaw`` that checks for the same WSP* check, but also does the same check for ES.
   
   Then, in Cookers.scala, ``object TerminatorCookerNoES extends DelimiterCookerNoES("terminator")`` extends this new ``DelimiterCokerNoSoleES``.

##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/cookers/EntityReplacer.scala
##########
@@ -560,48 +560,68 @@ sealed trait SingleCharacterMixin { self: StringLiteralBase =>
 class SingleCharacterLiteral(pn: String, allowByteEntities: Boolean)
   extends StringLiteralBase(pn, allowByteEntities) with SingleCharacterMixin
 
-trait NoCharClassEntitiesMixin {
+trait DisallowedCharClassEntitiesMixin {
 
   protected def propName: String
+  protected def disallowedCharClassEntities: Seq[String]
 
   /**
-   * Override if set of prohibited class entities is different
+   * The raw string to test is supplied as the first parameter
+   * A list of disallowed character class entities is supplied as the 2nd parameter
+   * This list is added to the regex string below
+   * Ex regex: (?:^|[^%])(?:%%)*(%(?:NL|ES);)
+   * (?:^|[^%]) look for the beginning of the string or a non-% character
+   * (?:%%)* look for an even number of %'s
+   * (%(?:NL|ES);) look for any single % left after previous match along with and NL or ES followed by a ;
+   *
    */
-  protected def noCharClassEntities(raw: String, context: ThrowsSDE): Unit = {
-    context.schemaDefinitionUnless(!raw.contains("%NL;"), "Property dfdl:%s cannot contain %%NL;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%ES;"), "Property dfdl:%s cannot contain %%ES;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%WSP;"), "Property dfdl:%s cannot contain %%WSP;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%WSP+;"), "Property dfdl:%s cannot contain %%WSP+;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%WSP*;"), "Property dfdl:%s cannot contain %%WSP*;", propName)
+  private lazy val disallowedRegex = {
+//    val disallowedCharClassMatch = disallowedCharClassEntities.mkString("|")
+    val disallowedCharClassMatch = disallowedCharClassEntities.map(Pattern.quote).mkString("|")
+    val regexString = "(?:^|[^%])(?:%%)*(%(?:" + disallowedCharClassMatch + ");)"
+    regexString.r
   }
 
   protected def testRaw(raw: String, context: ThrowsSDE): Unit = {
-    noCharClassEntities(raw, context)
+    val matchedGroups = disallowedRegex.findAllMatchIn(raw).map {_.group(1)}
+    context.schemaDefinitionUnless(
+      matchedGroups.isEmpty,
+      "Property dfdl:%s cannot contain %s" ,
+      propName,
+      matchedGroups.mkString(", "))
   }
 }
 
 class StringLiteralNoCharClassEntities(pn: String, allowByteEntities: Boolean)
   extends StringLiteralBase(pn, allowByteEntities)
-  with NoCharClassEntitiesMixin {
+    with DisallowedCharClassEntitiesMixin {
 
+  // Check for all NL|ES|WSP|WSP+|WSP*
+  override val disallowedCharClassEntities = Seq("NL", "ES", "WSP", "WSP+", "WSP*")
   override def testRaw(raw: String, context: ThrowsSDE) =
-    super[NoCharClassEntitiesMixin].testRaw(raw, context)
+    super[DisallowedCharClassEntitiesMixin].testRaw(raw,context)
 }
 
 class SingleCharacterLiteralNoCharClassEntities(pn: String, allowByteEntities: Boolean)
   extends StringLiteralBase(pn, allowByteEntities)
-  with NoCharClassEntitiesMixin
-  with SingleCharacterMixin {
+    with DisallowedCharClassEntitiesMixin
+    with SingleCharacterMixin {
 
-  override protected def testRaw(raw: String, context: ThrowsSDE) = super.testRaw(raw, context)
+  // Check for all NL|ES|WSP|WSP+|WSP*

Review comment:
       These ``// CHeck for `` comments dont' really add much. The code ``disalloweCharClassEntites = ...`` is self documenting enough. Suggest removing these // Check comments.

##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/cookers/EntityReplacer.scala
##########
@@ -560,48 +560,68 @@ sealed trait SingleCharacterMixin { self: StringLiteralBase =>
 class SingleCharacterLiteral(pn: String, allowByteEntities: Boolean)
   extends StringLiteralBase(pn, allowByteEntities) with SingleCharacterMixin
 
-trait NoCharClassEntitiesMixin {
+trait DisallowedCharClassEntitiesMixin {
 
   protected def propName: String
+  protected def disallowedCharClassEntities: Seq[String]
 
   /**
-   * Override if set of prohibited class entities is different
+   * The raw string to test is supplied as the first parameter
+   * A list of disallowed character class entities is supplied as the 2nd parameter
+   * This list is added to the regex string below
+   * Ex regex: (?:^|[^%])(?:%%)*(%(?:NL|ES);)
+   * (?:^|[^%]) look for the beginning of the string or a non-% character
+   * (?:%%)* look for an even number of %'s
+   * (%(?:NL|ES);) look for any single % left after previous match along with and NL or ES followed by a ;
+   *
    */
-  protected def noCharClassEntities(raw: String, context: ThrowsSDE): Unit = {
-    context.schemaDefinitionUnless(!raw.contains("%NL;"), "Property dfdl:%s cannot contain %%NL;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%ES;"), "Property dfdl:%s cannot contain %%ES;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%WSP;"), "Property dfdl:%s cannot contain %%WSP;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%WSP+;"), "Property dfdl:%s cannot contain %%WSP+;", propName)
-    context.schemaDefinitionUnless(!raw.contains("%WSP*;"), "Property dfdl:%s cannot contain %%WSP*;", propName)
+  private lazy val disallowedRegex = {
+//    val disallowedCharClassMatch = disallowedCharClassEntities.mkString("|")
+    val disallowedCharClassMatch = disallowedCharClassEntities.map(Pattern.quote).mkString("|")
+    val regexString = "(?:^|[^%])(?:%%)*(%(?:" + disallowedCharClassMatch + ");)"
+    regexString.r
   }
 
   protected def testRaw(raw: String, context: ThrowsSDE): Unit = {
-    noCharClassEntities(raw, context)
+    val matchedGroups = disallowedRegex.findAllMatchIn(raw).map {_.group(1)}
+    context.schemaDefinitionUnless(
+      matchedGroups.isEmpty,
+      "Property dfdl:%s cannot contain %s" ,
+      propName,
+      matchedGroups.mkString(", "))

Review comment:
       If there are multiple disallowed character class found, the error message would look something like ``Property dfdl:foo cannot contain ES, WSP*, WSP``, which I personally find a little odd and maybe inconsistent. Suggest rewording it, I personaly liked what you had before, and we can just update the expected error messages in the TDML files.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org