You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by sl...@apache.org on 2022/08/31 16:01:25 UTC

[daffodil] branch main updated: Update to Scala XML 2.1.0

This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new 87ffdf4ab Update to Scala XML 2.1.0
87ffdf4ab is described below

commit 87ffdf4ab8dd95499f2947fee07517d98224d86b
Author: Steve Lawrence <sl...@apache.org>
AuthorDate: Tue Aug 30 10:43:36 2022 -0400

    Update to Scala XML 2.1.0
    
    The main difference in this new Scala XML version is changing the
    XML.load() function to not ignore comments and processing instructions
    and not coalesce cdata. These changes caused  some tests to fail which
    required minor modifications. There are no changes to transitive
    dependencies or license that need to be incorporated.
    
    Additionally, the DaffodilConstructionLoader is modified to drop
    processing instructions and comments. This gives a similar behavior to
    the old XML.load function, and fixes bugs related to unexpected
    comments.
    
    DAFFODIL-2685, DAFFODIL-2237
---
 .../apache/daffodil/dsom/TestDsomCompiler.scala    | 28 +++++++++++++-----
 .../daffodil/xml/DaffodilConstructingLoader.scala  | 13 +++++----
 .../daffodil/xml/test/unit/TestXMLLoader.scala     | 26 +++++++----------
 .../daffodil/xml/test/unit/TestXMLUtils.scala      | 19 ++++++------
 .../section07/escapeScheme/escapeScheme.tdml       | 34 ++++++++++++++++++++++
 .../section07/escapeScheme/TestEscapeScheme.scala  |  2 ++
 project/Dependencies.scala                         |  2 +-
 7 files changed, 85 insertions(+), 39 deletions(-)

diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/dsom/TestDsomCompiler.scala b/daffodil-core/src/test/scala/org/apache/daffodil/dsom/TestDsomCompiler.scala
index a8e2cbdd3..227c90e00 100644
--- a/daffodil-core/src/test/scala/org/apache/daffodil/dsom/TestDsomCompiler.scala
+++ b/daffodil-core/src/test/scala/org/apache/daffodil/dsom/TestDsomCompiler.scala
@@ -17,11 +17,19 @@
 
 package org.apache.daffodil.dsom
 
+import org.junit.Assert._
+import org.junit.Test
+
 import scala.xml.Node
 import scala.xml.Utility
 import scala.xml.XML
-import org.apache.daffodil.compiler._
+
 import org.apache.daffodil.Implicits._; object INoWarnDSOM1 { ImplicitsSuppressUnusedImportWarning() }
+import org.apache.daffodil.api.Diagnostic
+import org.apache.daffodil.api.URISchemaSource
+import org.apache.daffodil.compiler._
+import org.apache.daffodil.schema.annotation.props.AlignmentType
+import org.apache.daffodil.schema.annotation.props.Found
 import org.apache.daffodil.schema.annotation.props.gen.AlignmentUnits
 import org.apache.daffodil.schema.annotation.props.gen.BinaryNumberRep
 import org.apache.daffodil.schema.annotation.props.gen.ByteOrder
@@ -33,14 +41,10 @@ import org.apache.daffodil.schema.annotation.props.gen.Representation
 import org.apache.daffodil.schema.annotation.props.gen.SeparatorPosition
 import org.apache.daffodil.schema.annotation.props.gen.TextNumberRep
 import org.apache.daffodil.schema.annotation.props.gen.YesNo
-import org.apache.daffodil.schema.annotation.props.AlignmentType
 import org.apache.daffodil.util.Misc
-import org.apache.daffodil.xml.XMLUtils
-import org.junit.Assert._
-import org.apache.daffodil.api.Diagnostic
 import org.apache.daffodil.util._
-import org.junit.Test
-import org.apache.daffodil.schema.annotation.props.Found
+import org.apache.daffodil.xml.DaffodilXMLLoader
+import org.apache.daffodil.xml.XMLUtils
 
 class TestDsomCompiler {
 
@@ -216,7 +220,15 @@ class TestDsomCompiler {
   }
 
   @Test def test3(): Unit = {
-    val testSchema = XML.load(Misc.getRequiredResource("/test/example-of-most-dfdl-constructs.dfdl.xml").toURL)
+    // Newer versions of the scala-xml library changed the XML.load() function
+    // so that it does not ignore comments/processing instructions. This causes
+    // issues with parts of the schema compilation that expects these to be
+    // removed. So for this particular test, use the DaffodilXMLLoader (which
+    // does remove comments and what is normally used by Daffodil) to load the
+    // XML to a Scala XML Node.
+    val source = URISchemaSource(Misc.getRequiredResource("/test/example-of-most-dfdl-constructs.dfdl.xml"))
+    val loader = new DaffodilXMLLoader(null)
+    val testSchema = loader.load(source, None)
 
     val sset = SchemaSet(testSchema)
     val Seq(sch) = sset.schemas
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala
index eadb27f19..a06f062f1 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala
@@ -292,17 +292,20 @@ class DaffodilConstructingLoader private[xml] (uri: URI,
   }
 
   /**
-   * Same CRLF/CR => LF processing as text gets.
+   * Drops comments
    */
   override def comment(pos: Int, s: String): Comment = {
-    Comment(text(pos, s).text)
+    // returning null drops comments
+    null
   }
 
   /**
-   * Same CRLF/CR => LF processing as text gets.
+   * Drops processing instructions
    */
-  override def procInstr(pos: Int, target: String, txt: String) =
-    ProcInstr(target, text(pos, txt).text)
+  override def procInstr(pos: Int, target: String, txt: String) = {
+    // returning null drops processing instructions
+    null
+  }
 
   private def parseXMLPrologAttributes(m: MetaData): (Option[String], Option[String], Option[Boolean]) = {
 
diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala
index 9f123f516..1d8ecbc23 100644
--- a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala
+++ b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLLoader.scala
@@ -97,11 +97,13 @@ class TestXMLLoader {
    *
    * At the time of this testing. The basic scala xml loader uses Xerces (java)
    * under the hood. It seems hopelessly broken w.r.t CDATA region preservation.
+   *
+   * This bug was fixed in Scala XML 2.1.0. The XML.loadString function now
+   * creates CDATA object.
    */
   @Test def test_scala_loader_cdata_bug(): Unit = {
 
-    val data = """<x><![CDATA[a
-b&"<>]]></x>"""
+    val data = "<x><![CDATA[a\nb&\"<>]]></x>"
     val node = scala.xml.XML.loadString(data)
     val <x>{ xbody @ _* }</x> = node
     assertEquals(1, xbody.length)
@@ -109,21 +111,13 @@ b&"<>]]></x>"""
     val txt = body.text
     assertTrue(txt.contains("a"))
     assertTrue(txt.contains("b"))
-    //
-    // Note to developer - whomewever sees this test failing....
-    //
-    // IF this test fails, it means that the scala xml loader have been FIXED (hooray!)
-    // and our need for the ConstructingParser may have gone away.
-    //
-    assertFalse(txt.contains("<![CDATA[")) // wrong
-    assertFalse(txt.contains("]]>")) // wrong
+    assertFalse(txt.contains("<![CDATA[")) // correct, PCData.text returns only the contents, not the brackets
+    assertFalse(txt.contains("]]>")) // correct, PCData.text returns only the contents, not the brackets
     assertTrue(txt.contains("a\nb")) // they preserve the contents
-    assertFalse(body.isInstanceOf[scala.xml.PCData]) // wrong - they don't preserve the object properly.
-    assertTrue(body.isInstanceOf[scala.xml.Text]) // wrong
+    assertTrue(body.isInstanceOf[scala.xml.PCData])
     assertTrue(txt.contains("""&"<>""")) // They get the right characters in there.
-    assertTrue(body.toString.contains("""&amp;&quot;&lt;&gt;""")) // wrong
-    assertFalse(body.toString.contains("""<![CDATA[a
-b&"<>]]>"""))
+    assertTrue(body.toString.contains("""&"<>""")) // body.toString preserves content
+    assertTrue(body.toString.contains("<![CDATA[a\nb&\"<>]]>"))
 
   }
 
@@ -265,4 +259,4 @@ class TestErrorHandler extends org.xml.sax.ErrorHandler {
   def fatalError(exception: SAXParseException) = {
     exceptions += exception
   }
-}
\ No newline at end of file
+}
diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
index 87dc6df99..0b7a67044 100644
--- a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
+++ b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
@@ -204,9 +204,8 @@ class TestXMLUtils {
     import scala.xml.parsing.ConstructingParser
     //
     // This is the way we load XML for the TDML runner
-    // and it creates PCData nodes where Scala's basic loader
-    // and literal XML in scala program text, converts
-    // PCData to Text nodes (removing the bracket glop)
+    // and it creates PCData nodes. As of Scala XML 2.1.0, Scala's basic loader
+    // and literal XML in scala program text have this same behavior.
     //
     // We use this in the TDML runner because we can
     // preserve whitespace robustly inside CDATA bracketing.
@@ -230,14 +229,16 @@ class TestXMLUtils {
     // This is the way we load XML for DFDL Schemas
     val xml = scala.xml.XML.loadString(xmlRaw)
     //
-    // Scala's scala.xml.XML.loaders do a good job
-    // coalescing adjacent Texts (of all Atom kinds)
+    // Scala's scala.xml.XML.loaders do a good job coalescing adjacent Texts
+    // (of all Atom kinds). Note that as of Scala XML 2.1.0, this loader does
+    // not convert CDATA to text and instead creates actual CDATA nodes. This
+    // is the same behavior as the ConstructingParser
     //
-    assertEquals(1, xml.child.length)
+    assertEquals(3, xml.child.length)
     val res = XMLUtils.coalesceAdjacentTextNodes(xml.child)
-    assertEquals(1, res.length)
-    assertEquals("abc&amp;&amp;&amp;def" + 0xE000.toChar + "ghi", res(0).toString)
-    assertEquals("abc&&&def" + 0xE000.toChar + "ghi", res(0).text)
+    assertEquals(3, res.length)
+    assertEquals("abc<![CDATA[&&&]]>def" + 0xE000.toChar + "ghi", res.mkString)
+    assertEquals("abc&&&def" + 0xE000.toChar + "ghi", res.text)
   }
 
   @Test def testOnePCData(): Unit = {
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
index f437b5317..5294330d1 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
@@ -658,5 +658,39 @@
     </infoset>
   </parserTestCase>
 
+  <defineSchema name="es7">
+    <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
+
+    <dfdl:defineEscapeScheme name="pound">
+      <!-- comment should be ignored -->
+      <dfdl:escapeScheme escapeCharacter='#'
+        escapeKind="escapeCharacter" escapeEscapeCharacter="" extraEscapedCharacters="" generateEscapeBlock="whenNeeded" />
+      <!-- comment should be ignored -->
+    </dfdl:defineEscapeScheme>
+
+    <dfdl:format ref="tns:GeneralFormat" lengthKind="delimited" escapeSchemeRef="tns:pound" />
+
+    <xs:element name="list">
+      <xs:complexType>
+        <xs:sequence dfdl:separator=",">
+          <xs:element name="field" type="xs:string" maxOccurs="unbounded" />
+        </xs:sequence>
+      </xs:complexType>
+    </xs:element>
+  </defineSchema>
+
+  <parserTestCase name="escapeScheme_with_comment" model="es7"
+    description="Section 7 defineEscapeScheme - DFDL-7-079R" root="list" roundTrip="true">
+    <document>1,2#,2,3</document>
+    <infoset>
+      <dfdlInfoset>
+        <tns:list>
+          <tns:field>1</tns:field>
+          <tns:field>2,2</tns:field>
+          <tns:field>3</tns:field>
+        </tns:list>
+      </dfdlInfoset>
+    </infoset>
+  </parserTestCase>
 
 </testSuite>
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
index 9478fc3bc..a01ce88a3 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
@@ -98,4 +98,6 @@ class TestEscapeScheme {
   @Test def test_escBlkAllQuotes(): Unit = { runner.runOneTest("escBlkAllQuotes") }
   @Test def test_escBlkEndSame(): Unit = { runner.runOneTest("escBlkEndSame") }
   //@Test def test_escBlkMultipleEEC() { runner.runOneTest("escBlkMultipleEEC") } // DAFFODIL-1972
+
+  @Test def test_escapeScheme_with_comment(): Unit = { runner.runOneTest("escapeScheme_with_comment") }
 }
diff --git a/project/Dependencies.scala b/project/Dependencies.scala
index 53eaa0673..a08a4f216 100644
--- a/project/Dependencies.scala
+++ b/project/Dependencies.scala
@@ -23,7 +23,7 @@ object Dependencies {
 
   lazy val core = Seq(
     "com.lihaoyi" %% "os-lib" % "0.8.1", // for writing/compiling C source files
-    "org.scala-lang.modules" %% "scala-xml" % "2.0.1",
+    "org.scala-lang.modules" %% "scala-xml" % "2.1.0",
     "org.scala-lang.modules" %% "scala-parser-combinators" % "2.1.1",
     "com.ibm.icu" % "icu4j" % "71.1",
     "xerces" % "xercesImpl" % "2.12.2",