You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@daffodil.apache.org by GitBox <gi...@apache.org> on 2018/04/05 16:04:01 UTC

[GitHub] stevedlawrence closed pull request #56: Improved toString of grammar and parser/unparser objects.

stevedlawrence closed pull request #56: Improved toString of grammar and parser/unparser objects.
URL: https://github.com/apache/incubator-daffodil/pull/56
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Grammar.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Grammar.scala
index 7f022599a..eeb7bc22e 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Grammar.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Grammar.scala
@@ -180,8 +180,13 @@ abstract class NamedGram(context: SchemaComponent) extends Gram(context) {
   // Note: keep the toString really simple.
   // It causes much grief if toString uses complicated things that can fail or
   // that end up needing the name of this NamedGram again.
-  override def toString = name // + "(" + context.scPath.last + ")" //+ (if (isEmpty) "(Empty)" else "")
 
+  override def name = context match {
+    case nm: NamedMixin => nm.name
+    case _ => super.name
+  }
+
+  override def toString = "<" + name + ">" + super.name + "</" + name + ">"
 }
 
 /**
@@ -192,9 +197,4 @@ abstract class Terminal(contextArg: SchemaComponent, guard: Boolean)
 
   override def isEmpty = !guard
 
-  private lazy val realSC = context.asInstanceOf[SchemaComponent]
-  final override lazy val path = realSC.path + "@@" + diagnosticDebugName
-
-  override def toString = path // dangerous. What if realSC.path fails?
-
 }
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Production.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Production.scala
index 43dd12ba9..40a458033 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Production.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Production.scala
@@ -48,6 +48,8 @@ final class Prod(nameArg: String, val sc: SchemaComponent, guard: Boolean, gramA
 
   final override def name = nameArg
 
+  override def toString() = "<" + name + ">" + gram.toString + "</" + name + ">"
+
   final override lazy val path = sc.path + "@@Prod(" + diagnosticDebugName + ")"
 
   final override lazy val gram: Gram = {
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala
index 930ebdfad..7dcc7b1fa 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala
@@ -76,6 +76,8 @@ class ElementCombinator(context: ElementBase,
   extends NamedGram(context)
   with Padded {
 
+  override def toString = subComb.toString() // parse centric view of the world. Unparser doesn't use subComb at all.
+
   private lazy val subComb = {
     if (context.isParentUnorderedSequence) {
       new ChoiceElementCombinator(context, eBeforeContent,
@@ -407,6 +409,8 @@ class ChoiceElementCombinator(context: ElementBase, eGramBefore: Gram, eGram: Gr
 abstract class ElementCombinatorBase(context: ElementBase, eGramBefore: Gram, eGram: Gram, eGramAfter: Gram)
   extends NamedGram(context) {
 
+  override def toString() = "<element name='" + name + "'>" + eGram.toString() + "</element>"
+
   // The order of things matters in some cases, so to be consistent we'll always use the
   // same order even when it doesn't matter
 
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesElementKinds.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesElementKinds.scala
index f8f2dcf74..549c4a05e 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesElementKinds.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesElementKinds.scala
@@ -45,6 +45,8 @@ import org.apache.daffodil.exceptions.Assert
 import org.apache.daffodil.util.Maybe._
 import org.apache.daffodil.cookers.ChoiceBranchKeyCooker
 import org.apache.daffodil.api.WarnID
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
 
 object ENoWarn3 { EqualitySuppressUnusedImportWarning() }
 
@@ -81,7 +83,15 @@ case class DelimiterStackCombinatorElement(e: ElementBase, body: Gram) extends T
   lazy val uInit = if (e.initiatorParseEv.isKnownNonEmpty) One(e.initiatorUnparseEv) else Nope
   lazy val uTerm = if (e.terminatorParseEv.isKnownNonEmpty) One(e.terminatorUnparseEv) else Nope
 
-  lazy val parser: DaffodilParser = new DelimiterStackParser((pInit.toList ++ pTerm.toList).toArray, e.termRuntimeData, body.parser)
+  lazy val delims = (pInit.toList ++ pTerm.toList)
+
+  override def toString() = {
+    val delimAttrib = delims.map { _.toString }.map { XMLUtils.escape(_).toString() }.mkString(" ")
+    "<" + Misc.getNameFromClass(this) + " delims='" + delimAttrib + "'>" +
+      body.toString() +
+      "</" + Misc.getNameFromClass(this) + ">"
+  }
+  lazy val parser: DaffodilParser = new DelimiterStackParser(delims.toArray, e.termRuntimeData, body.parser)
 
   override lazy val unparser: DaffodilUnparser = new DelimiterStackUnparser(uInit, None, uTerm, e.termRuntimeData, body.unparser)
 }
@@ -103,6 +113,11 @@ case class ComplexTypeCombinator(ct: ComplexTypeBase, body: Gram) extends Termin
 
   override def isEmpty = body.isEmpty
 
+  override def toString() =
+    "<" + Misc.getNameFromClass(this) + ">" +
+      body.toString() +
+      "</" + Misc.getNameFromClass(this) + ">"
+
   lazy val parser: DaffodilParser = new ComplexTypeParser(ct.runtimeData, body.parser)
 
   override lazy val unparser: DaffodilUnparser =
@@ -119,6 +134,11 @@ case class SequenceCombinator(sq: SequenceTermBase, rawTerms: Seq[Gram])
     res
   }
 
+  override def toString() =
+    "<" + Misc.getNameFromClass(this) + ">" +
+      terms.map { _.toString() }.mkString +
+      "</" + Misc.getNameFromClass(this) + ">"
+
   private val mt: Gram = EmptyGram
   lazy val body = rawTerms.foldRight(mt) { _ ~ _ }
 
@@ -142,6 +162,7 @@ case class UnorderedSequenceCombinator(s: Sequence, terms: Seq[Gram])
 }
 
 case class ArrayCombinator(e: ElementBase, body: Gram) extends Terminal(e, !body.isEmpty) {
+  override def toString() = "<Array>" + body.toString + "</Array>"
 
   lazy val parser: DaffodilParser = new ArrayCombinatorParser(e.elementRuntimeData, body.parser)
   override lazy val unparser: Unparser = new ArrayCombinatorUnparser(e.elementRuntimeData, body.unparser)
@@ -149,6 +170,7 @@ case class ArrayCombinator(e: ElementBase, body: Gram) extends Terminal(e, !body
 
 case class OptionalCombinator(e: ElementBase, body: Gram) extends Terminal(e, !body.isEmpty) {
 
+  override def toString() = "<Optional>" + body.toString + "</Optional>"
   lazy val parser: DaffodilParser = new OptionalCombinatorParser(e.elementRuntimeData, body.parser)
   override lazy val unparser: Unparser = new OptionalCombinatorUnparser(e.elementRuntimeData, body.unparser)
 }
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
index c363f6782..c186212df 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
@@ -50,23 +50,25 @@ object XMLUtils {
   /**
    * Legal XML v1.0 chars are #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
    */
-  def remapXMLIllegalCharToPUA(checkForExistingPUA: Boolean = true)(c: Char): Char = {
+  def remapXMLIllegalCharToPUA(checkForExistingPUA: Boolean = true, replaceCRWithLF: Boolean = true)(c: Char): Char = {
     val cInt = c.toInt
     val res = cInt match {
       case 0x9 => c
       case 0xA => c
-      case 0xD => 0xA.toChar // Map CR to LF. That's what XML does.
-      case _ if (c < 0x20) => (c + 0xE000).toChar
-      case _ if (c > 0xD7FF && c < 0xE000) => (c + 0x1000).toChar
-      case _ if (c >= 0xE000 && c <= 0xF8FF) => {
+      case 0xD =>
+        if (replaceCRWithLF) 0xA.toChar // Map CR to LF. That's what XML does.
+        else 0xE00D.toChar // or remap it to PUA so it is non-whitespace, and preserved.
+      case _ if (cInt < 0x20) => (cInt + 0xE000).toChar
+      case _ if (cInt > 0xD7FF && cInt < 0xE000) => (cInt + 0x1000).toChar
+      case _ if (cInt >= 0xE000 && cInt <= 0xF8FF) => {
         if (checkForExistingPUA)
           Assert.usageError("Pre-existing Private Use Area (PUA) character found in data: '%s'".format(c))
         else c
       }
       case 0xFFFE => 0xF0FE.toChar
       case 0xFFFF => 0xF0FF.toChar
-      case _ if (c > 0x10FFFF) => {
-        Assert.invariantFailed("Character code beyond U+10FFFF found in data. Codepoint: %s".format(c.toInt))
+      case _ if (cInt > 0x10FFFF) => {
+        Assert.invariantFailed("Character code beyond U+10FFFF found in data. Codepoint: %s".format(cInt))
       }
       case _ => c
 
@@ -356,7 +358,7 @@ object XMLUtils {
   val EXT_NS_APACHE = NS(DAFFODIL_EXTENSION_NAMESPACE_APACHE.uri)
 
   private val DAFFODIL_INTERNAL_NAMESPACE = NS(DAFFODIL_EXTENSIONS_NAMESPACE_ROOT_APACHE + ":int")
-  val INT_PREFIX= "dafint"
+  val INT_PREFIX = "dafint"
   val INT_NS = NS(DAFFODIL_INTERNAL_NAMESPACE.uri)
 
   val FILE_ATTRIBUTE_NAME = "file"
@@ -423,7 +425,7 @@ object XMLUtils {
   def dafAttributes(n: Node) = {
     n.attributes.filter { a =>
       a.getNamespace(n) == XMLUtils.EXT_NS_NCSA.toString ||
-      a.getNamespace(n) == XMLUtils.EXT_NS_APACHE.toString
+        a.getNamespace(n) == XMLUtils.EXT_NS_APACHE.toString
     }
   }
 
@@ -905,6 +907,71 @@ Differences were (path, expected, actual):
     tmpSchemaFile
   }
 
+  /**
+   * Strong escaping that never loses information, handles apos and CR right.
+   *
+   * Escapes apostrophe (single quote) as well as the other XML escaped chars.
+   * Remaps CR and any other XML-illegals into PUA. Replaces whitespace with
+   * numeric character entities for additional safety.
+   *
+   * This is needed since XML may be using single quotes to surround a string which
+   * might contain single quotes.
+   *
+   * The reason basic scala.xml.Utility.escape doesn't escape single-quotes is
+   * HTML compatibility. HTML doesn't define an "&apos;" entity.
+   *
+   * Furthermore, since some potentially illegal XML characters may be used here, we
+   * are going to remap all the illegal XML characters to their corresponding PUA characters.
+   *
+   * Lastly, all whitespace chars are replaced by numeric character entities, and
+   * anything above 0xFF that is not considered letter or digit, is also replaced
+   * by a numeric character entity.
+   *
+   * The result is a string which can be displayed as an XML attribute value, is
+   * invertible back to the original string.
+   *
+   * Finally, CRLF and CR will come through as &#xE00D;&#xA; that's because
+   * if we used &#xD; for the CR, it might be converted to a LF by XML readers.
+   * We have to use our own PUA remapping trick if we want to be sure to preserve
+   * CR in XML.
+   */
+  def escape(str: String, sb: StringBuilder = new StringBuilder()): StringBuilder = {
+    var i = 0
+    while (i < str.length) {
+      val x = str(i)
+      val c = escapeMapper(x)
+      i += 1
+      c match {
+        case '\'' => sb.append("&#x27;") // don't use "&apos;" because it's not universally accepted (HTML doesn't have it in early versions)
+        case '"' => sb.append("&quot;")
+        case '&' => sb.append("&amp;")
+        case '<' => sb.append("&lt;")
+        case '>' => sb.append("&gt;")
+        case _ if (c.isLetterOrDigit) => sb.append(c)
+        case _ if (c.isWhitespace || c.isControl) => toNumericCharacterEntity(c, sb)
+        // A0 is the NBSP character - not considered whitespace, but no glyph, so we need it numeric
+        case _ if (c.toInt == 0xA0) => toNumericCharacterEntity(c, sb)
+        // Any other char < 256 is punctuation or other glyph char
+        case _ if (c.toInt < 0xFF) => sb.append(c)
+        case _ => toNumericCharacterEntity(c, sb)
+      }
+    }
+    sb
+  }
+
+  private val escapeMapper =
+    remapXMLIllegalCharToPUA(
+      checkForExistingPUA = false,
+      replaceCRWithLF = false) _
+
+  def toNumericCharacterEntity(c: Char, sb: StringBuilder) = {
+    val i = c.toInt
+    Assert.usage(i > 0) // NUL cannot be represented at all in XML.
+    val s = Integer.toHexString(i).toUpperCase()
+    sb.append("&#x")
+    sb.append(s)
+    sb.append(";")
+  }
 }
 
 trait GetAttributesMixin extends ThrowsSDE {
diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
index 6d1ab2342..dc9f04e80 100644
--- a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
+++ b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
@@ -231,4 +231,36 @@ class TestXMLUtils {
     assertEquals("&&&", res(0).text)
   }
 
+  @Test def testEscapeLineEndings() {
+    val input = "abc\r\ndef\rghi\njkl\tmno\u0085pqr"
+    val actual = XMLUtils.escape(input).toString()
+    assertEquals("abc&#xE00D;&#xA;def&#xE00D;ghi&#xA;jkl&#x9;mno&#x85;pqr", actual)
+  }
+
+  @Test def testEscape0To127() {
+    val input = (0 to 127).map { _.toChar }.mkString
+    val actual = XMLUtils.escape(input).toString()
+    val expected = "&#xE000;&#xE001;&#xE002;&#xE003;&#xE004;&#xE005;&#xE006;&#xE007;&#xE008;" + // first batch of C0 controls
+      "&#x9;&#xA;" + // Tab and LF
+      "&#xE00B;&#xE00C;" + // more C0 controls
+      "&#xE00D;" + // CR
+      // Even more of the C0 controls.
+      "&#xE00E;&#xE00F;&#xE010;&#xE011;&#xE012;&#xE013;&#xE014;&#xE015;&#xE016;&#xE017;&#xE018;&#xE019;&#xE01A;&#xE01B;&#xE01C;&#xE01D;&#xE01E;&#xE01F;" +
+      "&#x20;" + // space is whitespace comes through numeric.
+      "!&quot;#$%&amp;" + // XML Entities for quot, amp
+      "&#x27;" + // numeric entity for apos aka single quote (because &apos; is not universal, i.e., not in HTML
+      "()*+,-./0123456789:;&lt;=&gt" + // XML entities for lt, gt
+      ";?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[" + // all printing characters
+      "\\" + // backslash char needs escape. This is ONE character
+      "]^_`abcdefghijklmnopqrstuvwxyz{|}~" + // all printing characters
+      "&#x7F;" // DEL is a control char, so numeric entity for that too.
+    assertEquals(expected, actual)
+  }
+
+  @Test def testEscape128To255() {
+    val input = (128 to 255).map { _.toChar }.mkString
+    val actual = XMLUtils.escape(input).toString()
+    val expected = "&#x80;&#x81;&#x82;&#x83;&#x84;&#x85;&#x86;&#x87;&#x88;&#x89;&#x8A;&#x8B;&#x8C;&#x8D;&#x8E;&#x8F;&#x90;&#x91;&#x92;&#x93;&#x94;&#x95;&#x96;&#x97;&#x98;&#x99;&#x9A;&#x9B;&#x9C;&#x9D;&#x9E;&#x9F;&#xA0;¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
+    assertEquals(expected, actual)
+  }
 }
diff --git a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ElementKindUnparsers.scala b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ElementKindUnparsers.scala
index 088df469a..71ff9ca1a 100644
--- a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ElementKindUnparsers.scala
+++ b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ElementKindUnparsers.scala
@@ -72,7 +72,6 @@ class SequenceCombinatorUnparser(ctxt: ModelGroupRuntimeData, childUnparsers: Ve
     var index = 0
     var doUnparser = false
     val limit = childUnparsers.length
-
     while (index < limit) {
       doUnparser = false
       val childUnparser = childUnparsers(index)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services