You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by sl...@apache.org on 2018/04/05 16:04:05 UTC
[incubator-daffodil] branch master updated: Improved toString of
grammar and parser/unparser objects.
This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git
The following commit(s) were added to refs/heads/master by this push:
new 8a29c60 Improved toString of grammar and parser/unparser objects.
8a29c60 is described below
commit 8a29c60b5c5857f17bc63298345fbde24a6d0a8c
Author: Michael Beckerle <mb...@tresys.com>
AuthorDate: Thu Mar 29 12:24:50 2018 -0400
Improved toString of grammar and parser/unparser objects.
This is just to assist in debugging some thorny problems.
DAFFODIL-1920
---
.../org/apache/daffodil/grammar/Grammar.scala | 12 +--
.../org/apache/daffodil/grammar/Production.scala | 2 +
.../grammar/primitives/ElementCombinator.scala | 4 +
.../primitives/PrimitivesElementKinds.scala | 24 +++++-
.../scala/org/apache/daffodil/xml/XMLUtils.scala | 85 +++++++++++++++++++---
.../daffodil/xml/test/unit/TestXMLUtils.scala | 32 ++++++++
.../unparsers/ElementKindUnparsers.scala | 1 -
7 files changed, 143 insertions(+), 17 deletions(-)
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Grammar.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Grammar.scala
index 7f02259..eeb7bc2 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Grammar.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Grammar.scala
@@ -180,8 +180,13 @@ abstract class NamedGram(context: SchemaComponent) extends Gram(context) {
// Note: keep the toString really simple.
// It causes much grief if toString uses complicated things that can fail or
// that end up needing the name of this NamedGram again.
- override def toString = name // + "(" + context.scPath.last + ")" //+ (if (isEmpty) "(Empty)" else "")
+ override def name = context match {
+ case nm: NamedMixin => nm.name
+ case _ => super.name
+ }
+
+ override def toString = "<" + name + ">" + super.name + "</" + name + ">"
}
/**
@@ -192,9 +197,4 @@ abstract class Terminal(contextArg: SchemaComponent, guard: Boolean)
override def isEmpty = !guard
- private lazy val realSC = context.asInstanceOf[SchemaComponent]
- final override lazy val path = realSC.path + "@@" + diagnosticDebugName
-
- override def toString = path // dangerous. What if realSC.path fails?
-
}
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Production.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Production.scala
index 43dd12b..40a4580 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Production.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/Production.scala
@@ -48,6 +48,8 @@ final class Prod(nameArg: String, val sc: SchemaComponent, guard: Boolean, gramA
final override def name = nameArg
+ override def toString() = "<" + name + ">" + gram.toString + "</" + name + ">"
+
final override lazy val path = sc.path + "@@Prod(" + diagnosticDebugName + ")"
final override lazy val gram: Gram = {
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala
index 930ebdf..7dcc7b1 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala
@@ -76,6 +76,8 @@ class ElementCombinator(context: ElementBase,
extends NamedGram(context)
with Padded {
+ override def toString = subComb.toString() // parse centric view of the world. Unparser doesn't use subComb at all.
+
private lazy val subComb = {
if (context.isParentUnorderedSequence) {
new ChoiceElementCombinator(context, eBeforeContent,
@@ -407,6 +409,8 @@ class ChoiceElementCombinator(context: ElementBase, eGramBefore: Gram, eGram: Gr
abstract class ElementCombinatorBase(context: ElementBase, eGramBefore: Gram, eGram: Gram, eGramAfter: Gram)
extends NamedGram(context) {
+ override def toString() = "<element name='" + name + "'>" + eGram.toString() + "</element>"
+
// The order of things matters in some cases, so to be consistent we'll always use the
// same order even when it doesn't matter
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesElementKinds.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesElementKinds.scala
index f8f2dcf..549c4a0 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesElementKinds.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesElementKinds.scala
@@ -45,6 +45,8 @@ import org.apache.daffodil.exceptions.Assert
import org.apache.daffodil.util.Maybe._
import org.apache.daffodil.cookers.ChoiceBranchKeyCooker
import org.apache.daffodil.api.WarnID
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
object ENoWarn3 { EqualitySuppressUnusedImportWarning() }
@@ -81,7 +83,15 @@ case class DelimiterStackCombinatorElement(e: ElementBase, body: Gram) extends T
lazy val uInit = if (e.initiatorParseEv.isKnownNonEmpty) One(e.initiatorUnparseEv) else Nope
lazy val uTerm = if (e.terminatorParseEv.isKnownNonEmpty) One(e.terminatorUnparseEv) else Nope
- lazy val parser: DaffodilParser = new DelimiterStackParser((pInit.toList ++ pTerm.toList).toArray, e.termRuntimeData, body.parser)
+ lazy val delims = (pInit.toList ++ pTerm.toList)
+
+ override def toString() = {
+ val delimAttrib = delims.map { _.toString }.map { XMLUtils.escape(_).toString() }.mkString(" ")
+ "<" + Misc.getNameFromClass(this) + " delims='" + delimAttrib + "'>" +
+ body.toString() +
+ "</" + Misc.getNameFromClass(this) + ">"
+ }
+ lazy val parser: DaffodilParser = new DelimiterStackParser(delims.toArray, e.termRuntimeData, body.parser)
override lazy val unparser: DaffodilUnparser = new DelimiterStackUnparser(uInit, None, uTerm, e.termRuntimeData, body.unparser)
}
@@ -103,6 +113,11 @@ case class ComplexTypeCombinator(ct: ComplexTypeBase, body: Gram) extends Termin
override def isEmpty = body.isEmpty
+ override def toString() =
+ "<" + Misc.getNameFromClass(this) + ">" +
+ body.toString() +
+ "</" + Misc.getNameFromClass(this) + ">"
+
lazy val parser: DaffodilParser = new ComplexTypeParser(ct.runtimeData, body.parser)
override lazy val unparser: DaffodilUnparser =
@@ -119,6 +134,11 @@ case class SequenceCombinator(sq: SequenceTermBase, rawTerms: Seq[Gram])
res
}
+ override def toString() =
+ "<" + Misc.getNameFromClass(this) + ">" +
+ terms.map { _.toString() }.mkString +
+ "</" + Misc.getNameFromClass(this) + ">"
+
private val mt: Gram = EmptyGram
lazy val body = rawTerms.foldRight(mt) { _ ~ _ }
@@ -142,6 +162,7 @@ case class UnorderedSequenceCombinator(s: Sequence, terms: Seq[Gram])
}
case class ArrayCombinator(e: ElementBase, body: Gram) extends Terminal(e, !body.isEmpty) {
+ override def toString() = "<Array>" + body.toString + "</Array>"
lazy val parser: DaffodilParser = new ArrayCombinatorParser(e.elementRuntimeData, body.parser)
override lazy val unparser: Unparser = new ArrayCombinatorUnparser(e.elementRuntimeData, body.unparser)
@@ -149,6 +170,7 @@ case class ArrayCombinator(e: ElementBase, body: Gram) extends Terminal(e, !body
case class OptionalCombinator(e: ElementBase, body: Gram) extends Terminal(e, !body.isEmpty) {
+ override def toString() = "<Optional>" + body.toString + "</Optional>"
lazy val parser: DaffodilParser = new OptionalCombinatorParser(e.elementRuntimeData, body.parser)
override lazy val unparser: Unparser = new OptionalCombinatorUnparser(e.elementRuntimeData, body.unparser)
}
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
index c363f67..c186212 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
@@ -50,23 +50,25 @@ object XMLUtils {
/**
* Legal XML v1.0 chars are #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*/
- def remapXMLIllegalCharToPUA(checkForExistingPUA: Boolean = true)(c: Char): Char = {
+ def remapXMLIllegalCharToPUA(checkForExistingPUA: Boolean = true, replaceCRWithLF: Boolean = true)(c: Char): Char = {
val cInt = c.toInt
val res = cInt match {
case 0x9 => c
case 0xA => c
- case 0xD => 0xA.toChar // Map CR to LF. That's what XML does.
- case _ if (c < 0x20) => (c + 0xE000).toChar
- case _ if (c > 0xD7FF && c < 0xE000) => (c + 0x1000).toChar
- case _ if (c >= 0xE000 && c <= 0xF8FF) => {
+ case 0xD =>
+ if (replaceCRWithLF) 0xA.toChar // Map CR to LF. That's what XML does.
+ else 0xE00D.toChar // or remap it to PUA so it is non-whitespace, and preserved.
+ case _ if (cInt < 0x20) => (cInt + 0xE000).toChar
+ case _ if (cInt > 0xD7FF && cInt < 0xE000) => (cInt + 0x1000).toChar
+ case _ if (cInt >= 0xE000 && cInt <= 0xF8FF) => {
if (checkForExistingPUA)
Assert.usageError("Pre-existing Private Use Area (PUA) character found in data: '%s'".format(c))
else c
}
case 0xFFFE => 0xF0FE.toChar
case 0xFFFF => 0xF0FF.toChar
- case _ if (c > 0x10FFFF) => {
- Assert.invariantFailed("Character code beyond U+10FFFF found in data. Codepoint: %s".format(c.toInt))
+ case _ if (cInt > 0x10FFFF) => {
+ Assert.invariantFailed("Character code beyond U+10FFFF found in data. Codepoint: %s".format(cInt))
}
case _ => c
@@ -356,7 +358,7 @@ object XMLUtils {
val EXT_NS_APACHE = NS(DAFFODIL_EXTENSION_NAMESPACE_APACHE.uri)
private val DAFFODIL_INTERNAL_NAMESPACE = NS(DAFFODIL_EXTENSIONS_NAMESPACE_ROOT_APACHE + ":int")
- val INT_PREFIX= "dafint"
+ val INT_PREFIX = "dafint"
val INT_NS = NS(DAFFODIL_INTERNAL_NAMESPACE.uri)
val FILE_ATTRIBUTE_NAME = "file"
@@ -423,7 +425,7 @@ object XMLUtils {
def dafAttributes(n: Node) = {
n.attributes.filter { a =>
a.getNamespace(n) == XMLUtils.EXT_NS_NCSA.toString ||
- a.getNamespace(n) == XMLUtils.EXT_NS_APACHE.toString
+ a.getNamespace(n) == XMLUtils.EXT_NS_APACHE.toString
}
}
@@ -905,6 +907,71 @@ Differences were (path, expected, actual):
tmpSchemaFile
}
+ /**
+ * Strong escaping that never loses information, handles apos and CR right.
+ *
+ * Escapes apostrophe (single quote) as well as the other XML escaped chars.
+ * Remaps CR and any other XML-illegals into PUA. Replaces whitespace with
+ * numeric character entities for additional safety.
+ *
+ * This is needed since XML may be using single quotes to surround a string which
+ * might contain single quotes.
+ *
+ * The reason basic scala.xml.Utility.escape doesn't escape single-quotes is
+ * HTML compatibility. HTML doesn't define an "'" entity.
+ *
+ * Furthermore, since some potentially illegal XML characters may be used here, we
+ * are going to remap all the illegal XML characters to their corresponding PUA characters.
+ *
+ * Lastly, all whitespace chars are replaced by numeric character entities, and
+ * anything above 0xFF that is not considered letter or digit, is also replaced
+ * by a numeric character entity.
+ *
+ * The result is a string which can be displayed as an XML attribute value, is
+ * invertible back to the original string.
+ *
+ * Finally, CRLF and CR will come through as 
 that's because
+ * if we used 
 for the CR, it might be converted to a LF by XML readers.
+ * We have to use our own PUA remapping trick if we want to be sure to preserve
+ * CR in XML.
+ */
+ def escape(str: String, sb: StringBuilder = new StringBuilder()): StringBuilder = {
+ var i = 0
+ while (i < str.length) {
+ val x = str(i)
+ val c = escapeMapper(x)
+ i += 1
+ c match {
+ case '\'' => sb.append("'") // don't use "'" because it's not universally accepted (HTML doesn't have it in early versions)
+ case '"' => sb.append(""")
+ case '&' => sb.append("&")
+ case '<' => sb.append("<")
+ case '>' => sb.append(">")
+ case _ if (c.isLetterOrDigit) => sb.append(c)
+ case _ if (c.isWhitespace || c.isControl) => toNumericCharacterEntity(c, sb)
+ // A0 is the NBSP character - not considered whitespace, but no glyph, so we need it numeric
+ case _ if (c.toInt == 0xA0) => toNumericCharacterEntity(c, sb)
+ // Any other char < 256 is punctuation or other glyph char
+ case _ if (c.toInt < 0xFF) => sb.append(c)
+ case _ => toNumericCharacterEntity(c, sb)
+ }
+ }
+ sb
+ }
+
+ private val escapeMapper =
+ remapXMLIllegalCharToPUA(
+ checkForExistingPUA = false,
+ replaceCRWithLF = false) _
+
+ def toNumericCharacterEntity(c: Char, sb: StringBuilder) = {
+ val i = c.toInt
+ Assert.usage(i > 0) // NUL cannot be represented at all in XML.
+ val s = Integer.toHexString(i).toUpperCase()
+ sb.append("&#x")
+ sb.append(s)
+ sb.append(";")
+ }
}
trait GetAttributesMixin extends ThrowsSDE {
diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
index 6d1ab23..dc9f04e 100644
--- a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
+++ b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
@@ -231,4 +231,36 @@ class TestXMLUtils {
assertEquals("&&&", res(0).text)
}
+ @Test def testEscapeLineEndings() {
+ val input = "abc\r\ndef\rghi\njkl\tmno\u0085pqr"
+ val actual = XMLUtils.escape(input).toString()
+ assertEquals("abc
defghi
jkl	mno…pqr", actual)
+ }
+
+ @Test def testEscape0To127() {
+ val input = (0 to 127).map { _.toChar }.mkString
+ val actual = XMLUtils.escape(input).toString()
+ val expected = "" + // first batch of C0 controls
+ "	
" + // Tab and LF
+ "" + // more C0 controls
+ "" + // CR
+ // Even more of the C0 controls.
+ "" +
+ " " + // space is whitespace comes through numeric.
+ "!"#$%&" + // XML Entities for quot, amp
+ "'" + // numeric entity for apos aka single quote (because ' is not universal, i.e., not in HTML
+ "()*+,-./0123456789:;<=>" + // XML entities for lt, gt
+ ";?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[" + // all printing characters
+ "\\" + // backslash char needs escape. This is ONE character
+ "]^_`abcdefghijklmnopqrstuvwxyz{|}~" + // all printing characters
+ "" // DEL is a control char, so numeric entity for that too.
+ assertEquals(expected, actual)
+ }
+
+ @Test def testEscape128To255() {
+ val input = (128 to 255).map { _.toChar }.mkString
+ val actual = XMLUtils.escape(input).toString()
+ val expected = "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
+ assertEquals(expected, actual)
+ }
}
diff --git a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ElementKindUnparsers.scala b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ElementKindUnparsers.scala
index 088df46..71ff9ca 100644
--- a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ElementKindUnparsers.scala
+++ b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ElementKindUnparsers.scala
@@ -72,7 +72,6 @@ class SequenceCombinatorUnparser(ctxt: ModelGroupRuntimeData, childUnparsers: Ve
var index = 0
var doUnparser = false
val limit = childUnparsers.length
-
while (index < limit) {
doUnparser = false
val childUnparser = childUnparsers(index)
--
To stop receiving notification emails like this one, please contact
slawrence@apache.org.