You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by sl...@apache.org on 2022/08/31 16:01:25 UTC
[daffodil] branch main updated: Update to Scala XML 2.1.0
This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new 87ffdf4ab Update to Scala XML 2.1.0
87ffdf4ab is described below
commit 87ffdf4ab8dd95499f2947fee07517d98224d86b
Author: Steve Lawrence <sl...@apache.org>
AuthorDate: Tue Aug 30 10:43:36 2022 -0400
Update to Scala XML 2.1.0
The main difference in this new Scala XML version is changing the
XML.load() function to not ignore comments and processing instructions
and not coalesce cdata. These changes caused some tests to fail which
required minor modifications. There are no changes to transitive
dependencies or license that need to be incorporated.
Additionally, the DaffodilConstructionLoader is modified to drop
processing instructions and comments. This gives a similar behavior to
the old XML.load function, and fixes bugs related to unexpected
comments.
DAFFODIL-2685, DAFFODIL-2237
---
.../apache/daffodil/dsom/TestDsomCompiler.scala | 28 +++++++++++++-----
.../daffodil/xml/DaffodilConstructingLoader.scala | 13 +++++----
.../daffodil/xml/test/unit/TestXMLLoader.scala | 26 +++++++----------
.../daffodil/xml/test/unit/TestXMLUtils.scala | 19 ++++++------
.../section07/escapeScheme/escapeScheme.tdml | 34 ++++++++++++++++++++++
.../section07/escapeScheme/TestEscapeScheme.scala | 2 ++
project/Dependencies.scala | 2 +-
7 files changed, 85 insertions(+), 39 deletions(-)
diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/dsom/TestDsomCompiler.scala b/daffodil-core/src/test/scala/org/apache/daffodil/dsom/TestDsomCompiler.scala
index a8e2cbdd3..227c90e00 100644
--- a/daffodil-core/src/test/scala/org/apache/daffodil/dsom/TestDsomCompiler.scala
+++ b/daffodil-core/src/test/scala/org/apache/daffodil/dsom/TestDsomCompiler.scala
@@ -17,11 +17,19 @@
package org.apache.daffodil.dsom
+import org.junit.Assert._
+import org.junit.Test
+
import scala.xml.Node
import scala.xml.Utility
import scala.xml.XML
-import org.apache.daffodil.compiler._
+
import org.apache.daffodil.Implicits._; object INoWarnDSOM1 { ImplicitsSuppressUnusedImportWarning() }
+import org.apache.daffodil.api.Diagnostic
+import org.apache.daffodil.api.URISchemaSource
+import org.apache.daffodil.compiler._
+import org.apache.daffodil.schema.annotation.props.AlignmentType
+import org.apache.daffodil.schema.annotation.props.Found
import org.apache.daffodil.schema.annotation.props.gen.AlignmentUnits
import org.apache.daffodil.schema.annotation.props.gen.BinaryNumberRep
import org.apache.daffodil.schema.annotation.props.gen.ByteOrder
@@ -33,14 +41,10 @@ import org.apache.daffodil.schema.annotation.props.gen.Representation
import org.apache.daffodil.schema.annotation.props.gen.SeparatorPosition
import org.apache.daffodil.schema.annotation.props.gen.TextNumberRep
import org.apache.daffodil.schema.annotation.props.gen.YesNo
-import org.apache.daffodil.schema.annotation.props.AlignmentType
import org.apache.daffodil.util.Misc
-import org.apache.daffodil.xml.XMLUtils
-import org.junit.Assert._
-import org.apache.daffodil.api.Diagnostic
import org.apache.daffodil.util._
-import org.junit.Test
-import org.apache.daffodil.schema.annotation.props.Found
+import org.apache.daffodil.xml.DaffodilXMLLoader
+import org.apache.daffodil.xml.XMLUtils
class TestDsomCompiler {
@@ -216,7 +220,15 @@ class TestDsomCompiler {
}
@Test def test3(): Unit = {
- val testSchema = XML.load(Misc.getRequiredResource("/test/example-of-most-dfdl-constructs.dfdl.xml").toURL)
+ // Newer versions of the scala-xml library changed the XML.load() function
+ // so that it does not ignore comments/processing instructions. This causes
+ // issues with parts of the schema compilation that expects these to be
+ // removed. So for this particular test, use the DaffodilXMLLoader (which
+ // does remove comments and what is normally used by Daffodil) to load the
+ // XML to a Scala XML Node.
+ val source = URISchemaSource(Misc.getRequiredResource("/test/example-of-most-dfdl-constructs.dfdl.xml"))
+ val loader = new DaffodilXMLLoader(null)
+ val testSchema = loader.load(source, None)
val sset = SchemaSet(testSchema)
val Seq(sch) = sset.schemas
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala
index eadb27f19..a06f062f1 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala
@@ -292,17 +292,20 @@ class DaffodilConstructingLoader private[xml] (uri: URI,
}
/**
- * Same CRLF/CR => LF processing as text gets.
+ * Drops comments
*/
override def comment(pos: Int, s: String): Comment = {
- Comment(text(pos, s).text)
+ // returning null drops comments
+ null
}
/**
- * Same CRLF/CR => LF processing as text gets.
+ * Drops processing instructions
*/
- override def procInstr(pos: Int, target: String, txt: String) =
- ProcInstr(target, text(pos, txt).text)
+ override def procInstr(pos: Int, target: String, txt: String) = {
+ // returning null drops processing instructions
+ null
+ }
private def parseXMLPrologAttributes(m: MetaData): (Option[String], Option[String], Option[Boolean]) = {
diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala
index 9f123f516..1d8ecbc23 100644
--- a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala
+++ b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala
@@ -97,11 +97,13 @@ class TestXMLLoader {
*
* At the time of this testing. The basic scala xml loader uses Xerces (java)
* under the hood. It seems hopelessly broken w.r.t CDATA region preservation.
+ *
+ * This bug was fixed in Scala XML 2.1.0. The XML.loadString function now
+ * creates CDATA object.
*/
@Test def test_scala_loader_cdata_bug(): Unit = {
- val data = """<x><![CDATA[a
-b&"<>]]></x>"""
+ val data = "<x><![CDATA[a\nb&\"<>]]></x>"
val node = scala.xml.XML.loadString(data)
val <x>{ xbody @ _* }</x> = node
assertEquals(1, xbody.length)
@@ -109,21 +111,13 @@ b&"<>]]></x>"""
val txt = body.text
assertTrue(txt.contains("a"))
assertTrue(txt.contains("b"))
- //
- // Note to developer - whomewever sees this test failing....
- //
- // IF this test fails, it means that the scala xml loader have been FIXED (hooray!)
- // and our need for the ConstructingParser may have gone away.
- //
- assertFalse(txt.contains("<![CDATA[")) // wrong
- assertFalse(txt.contains("]]>")) // wrong
+ assertFalse(txt.contains("<![CDATA[")) // correct, PCData.text returns only the contents, not the brackets
+ assertFalse(txt.contains("]]>")) // correct, PCData.text returns only the contents, not the brackets
assertTrue(txt.contains("a\nb")) // they preserve the contents
- assertFalse(body.isInstanceOf[scala.xml.PCData]) // wrong - they don't preserve the object properly.
- assertTrue(body.isInstanceOf[scala.xml.Text]) // wrong
+ assertTrue(body.isInstanceOf[scala.xml.PCData])
assertTrue(txt.contains("""&"<>""")) // They get the right characters in there.
- assertTrue(body.toString.contains("""&"<>""")) // wrong
- assertFalse(body.toString.contains("""<![CDATA[a
-b&"<>]]>"""))
+ assertTrue(body.toString.contains("""&"<>""")) // body.toString preserves content
+ assertTrue(body.toString.contains("<![CDATA[a\nb&\"<>]]>"))
}
@@ -265,4 +259,4 @@ class TestErrorHandler extends org.xml.sax.ErrorHandler {
def fatalError(exception: SAXParseException) = {
exceptions += exception
}
-}
\ No newline at end of file
+}
diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
index 87dc6df99..0b7a67044 100644
--- a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
+++ b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
@@ -204,9 +204,8 @@ class TestXMLUtils {
import scala.xml.parsing.ConstructingParser
//
// This is the way we load XML for the TDML runner
- // and it creates PCData nodes where Scala's basic loader
- // and literal XML in scala program text, converts
- // PCData to Text nodes (removing the bracket glop)
+ // and it creates PCData nodes. As of Scala XML 2.1.0, Scala's basic loader
+ // and literal XML in scala program text have this same behavior.
//
// We use this in the TDML runner because we can
// preserve whitespace robustly inside CDATA bracketing.
@@ -230,14 +229,16 @@ class TestXMLUtils {
// This is the way we load XML for DFDL Schemas
val xml = scala.xml.XML.loadString(xmlRaw)
//
- // Scala's scala.xml.XML.loaders do a good job
- // coalescing adjacent Texts (of all Atom kinds)
+ // Scala's scala.xml.XML.loaders do a good job coalescing adjacent Texts
+ // (of all Atom kinds). Note that as of Scala XML 2.1.0, this loader does
+ // not convert CDATA to text and instead creates actual CDATA nodes. This
+ // is the same behavior as the ConstructingParser
//
- assertEquals(1, xml.child.length)
+ assertEquals(3, xml.child.length)
val res = XMLUtils.coalesceAdjacentTextNodes(xml.child)
- assertEquals(1, res.length)
- assertEquals("abc&&&def" + 0xE000.toChar + "ghi", res(0).toString)
- assertEquals("abc&&&def" + 0xE000.toChar + "ghi", res(0).text)
+ assertEquals(3, res.length)
+ assertEquals("abc<![CDATA[&&&]]>def" + 0xE000.toChar + "ghi", res.mkString)
+ assertEquals("abc&&&def" + 0xE000.toChar + "ghi", res.text)
}
@Test def testOnePCData(): Unit = {
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
index f437b5317..5294330d1 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
@@ -658,5 +658,39 @@
</infoset>
</parserTestCase>
+ <defineSchema name="es7">
+ <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
+
+ <dfdl:defineEscapeScheme name="pound">
+ <!-- comment should be ignored -->
+ <dfdl:escapeScheme escapeCharacter='#'
+ escapeKind="escapeCharacter" escapeEscapeCharacter="" extraEscapedCharacters="" generateEscapeBlock="whenNeeded" />
+ <!-- comment should be ignored -->
+ </dfdl:defineEscapeScheme>
+
+ <dfdl:format ref="tns:GeneralFormat" lengthKind="delimited" escapeSchemeRef="tns:pound" />
+
+ <xs:element name="list">
+ <xs:complexType>
+ <xs:sequence dfdl:separator=",">
+ <xs:element name="field" type="xs:string" maxOccurs="unbounded" />
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+ </defineSchema>
+
+ <parserTestCase name="escapeScheme_with_comment" model="es7"
+ description="Section 7 defineEscapeScheme - DFDL-7-079R" root="list" roundTrip="true">
+ <document>1,2#,2,3</document>
+ <infoset>
+ <dfdlInfoset>
+ <tns:list>
+ <tns:field>1</tns:field>
+ <tns:field>2,2</tns:field>
+ <tns:field>3</tns:field>
+ </tns:list>
+ </dfdlInfoset>
+ </infoset>
+ </parserTestCase>
</testSuite>
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
index 9478fc3bc..a01ce88a3 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
@@ -98,4 +98,6 @@ class TestEscapeScheme {
@Test def test_escBlkAllQuotes(): Unit = { runner.runOneTest("escBlkAllQuotes") }
@Test def test_escBlkEndSame(): Unit = { runner.runOneTest("escBlkEndSame") }
//@Test def test_escBlkMultipleEEC() { runner.runOneTest("escBlkMultipleEEC") } // DAFFODIL-1972
+
+ @Test def test_escapeScheme_with_comment(): Unit = { runner.runOneTest("escapeScheme_with_comment") }
}
diff --git a/project/Dependencies.scala b/project/Dependencies.scala
index 53eaa0673..a08a4f216 100644
--- a/project/Dependencies.scala
+++ b/project/Dependencies.scala
@@ -23,7 +23,7 @@ object Dependencies {
lazy val core = Seq(
"com.lihaoyi" %% "os-lib" % "0.8.1", // for writing/compiling C source files
- "org.scala-lang.modules" %% "scala-xml" % "2.0.1",
+ "org.scala-lang.modules" %% "scala-xml" % "2.1.0",
"org.scala-lang.modules" %% "scala-parser-combinators" % "2.1.1",
"com.ibm.icu" % "icu4j" % "71.1",
"xerces" % "xercesImpl" % "2.12.2",