You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by sl...@apache.org on 2019/06/17 13:50:37 UTC

[incubator-daffodil] branch master updated: Simplify how we show text differences when a TDML test fails

This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git


The following commit(s) were added to refs/heads/master by this push:
     new db7b779  Simplify how we show text differences when a TDML test fails
db7b779 is described below

commit db7b779ff6ce915a0f7c5f84d399f5b5027b2849
Author: Steve Lawrence <sl...@apache.org>
AuthorDate: Tue Jun 11 14:05:38 2019 -0400

    Simplify how we show text differences when a TDML test fails
    
    We previously found every character that differed and showed its path,
    index, and expected/actual characters. But when an XML element contains
    a lot of text (e.g. hexBinary blob) with a lot of differences, this
    character by character diff is not very helpful and consumed a lot
    memory to build the output string. This easily leads to an OutOfMemory
    exception.
    
    Instead, when showing the diff of two XML text elements, only show the
    index of the first diff plus some following characters for context. This
    should be useful enough in most cases to figure out where things when
    off the rails while keep memory usage low, even on large diffs.
    
    Also change the diff output to show the actual infoset before the
    differences summary. Displaying the actual infoset after the diff made
    it very difficult to find the summary of differences when big infosets
    were involved.
    
    DAFFODIL-2118
---
 .../scala/org/apache/daffodil/xml/XMLUtils.scala   | 44 +++++++++++-----------
 .../daffodil/xml/test/unit/TestXMLUtils.scala      | 20 +++++-----
 2 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
index 91f07dc..e62b0ff 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
@@ -748,16 +748,13 @@ object XMLUtils {
 Comparison failed.
 Expected (attributes stripped)
           %s
-Actual (attributes stripped)
+Actual (attributes ignored for diff)
           %s
 Differences were (path, expected, actual):
-%s
-Actual with Attributes (needed for unparse)
 %s""".format(
         removeAttributes(expected).toString,
-        removeAttributes(actual).toString,
-        diffs.map { _.toString }.mkString("- ", "\n- ", "\n"),
-        actual))
+        actual,
+        diffs.map { _.toString }.mkString("- ", "\n- ", "\n")))
     }
   }
 
@@ -899,23 +896,28 @@ Actual with Attributes (needed for unparse)
     dataB: String,
     maybeType: Option[String]): Seq[(String, String, String)] = {
 
-    def quoteIt(str: String) = "'" + str + "'"
-
     if (textIsSame(dataA, dataB, maybeType)) Nil
-    else if (dataA.length != dataB.length) {
-      List((zPath, quoteIt(dataA), quoteIt(dataB)))
-    } else {
-      val ints = Stream.from(1).map { _.toString }
-      val z = dataA zip dataB zip ints
-      val res = z.flatMap {
-        case ((a1, b1), index) =>
-          if (a1 == b1) Nil
-          else {
-            val indexPath = zPath + ".charAt(" + index + ")"
-            List((indexPath, a1.toString + "(%%#x%04X;)".format(a1.toInt), b1.toString + "(%%#x%04X;)".format(b1.toInt)))
-          }
+    else {
+      // There must be some difference, so let's find just the first index of
+      // difference and we'll include that and some following characters for
+      // context.
+      val CHARS_TO_SHOW_AFTER_DIFF = 40
+
+      val lenA = dataA.length
+      val lenB = dataB.length
+      var index = 0
+      while (index < lenA && index < lenB && dataA(index) == dataB(index)) {
+        index += 1
       }
-      res
+
+      // We know there must be a diff once we got here. Either dataA/dataB is a
+      // prefix of the other and index is where the prefix ends, or index is
+      // the first difference found. Either way, we can safely use slice() to
+      // get at most some number of characters at that index for context.
+      val contextA = Misc.remapControlsAndLineEndingsToVisibleGlyphs(dataA.slice(index, index + CHARS_TO_SHOW_AFTER_DIFF))
+      val contextB = Misc.remapControlsAndLineEndingsToVisibleGlyphs(dataB.slice(index, index + CHARS_TO_SHOW_AFTER_DIFF))
+      val path = zPath + ".charAt(" + (index + 1) + ")"
+      Seq((path, contextA, contextB))
     }
   }
 
diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
index a7568c3..4bf17dc 100644
--- a/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
+++ b/daffodil-lib/src/test/scala/org/apache/daffodil/xml/test/unit/TestXMLUtils.scala
@@ -33,8 +33,8 @@ class TestXMLUtils {
     val diffs = XMLUtils.computeTextDiff("", d1, d2, None)
     val Seq((p, a, b)) = diffs
     assertEquals(".charAt(1)", p)
-    assertEquals("a(%#x0061;)", a)
-    assertEquals("b(%#x0062;)", b)
+    assertEquals("a", a)
+    assertEquals("b", b)
   }
 
   @Test def testDiff1() {
@@ -43,8 +43,8 @@ class TestXMLUtils {
     val diffs = XMLUtils.computeDiff(d1, d2)
     val Seq((p1, a, b)) = diffs
     assertEquals("d.charAt(1)", p1)
-    assertEquals("a(%#x0061;)", a)
-    assertEquals("b(%#x0062;)", b)
+    assertEquals("a", a)
+    assertEquals("b", b)
   }
 
   @Test def testDiff2() {
@@ -53,8 +53,8 @@ class TestXMLUtils {
     val diffs = XMLUtils.computeDiff(d1, d2)
     val Seq((p1, a, b)) = diffs
     assertEquals("a/d[2].charAt(1)", p1)
-    assertEquals("x(%#x0078;)", a)
-    assertEquals("y(%#x0079;)", b)
+    assertEquals("x", a)
+    assertEquals("y", b)
   }
 
   @Test def testDiff3() {
@@ -63,11 +63,11 @@ class TestXMLUtils {
     val diffs = XMLUtils.computeDiff(d1, d2)
     val Seq((p1, e, f), (p2, x, y)) = diffs
     assertEquals("a/e[1].charAt(1)", p1)
-    assertEquals("e(%#x0065;)", e)
-    assertEquals("f(%#x0066;)", f)
+    assertEquals("e", e)
+    assertEquals("f", f)
     assertEquals("a/d[2].charAt(3)", p2)
-    assertEquals("x(%#x0078;)", x)
-    assertEquals("y(%#x0079;)", y)
+    assertEquals("x", x)
+    assertEquals("y", y)
 
   }