You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by sl...@apache.org on 2022/08/17 12:26:59 UTC

[daffodil] branch main updated: Set starting absolute bit positions of DOSs more often, avoid deadlocks

This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new bd46fdb16 Set starting absolute bit positions of DOSs more often, avoid deadlocks
bd46fdb16 is described below

commit bd46fdb16bfe3ef34bcb320ae03448137937d7c1
Author: Steve Lawrence <sl...@apache.org>
AuthorDate: Tue Aug 16 10:44:38 2022 -0400

    Set starting absolute bit positions of DOSs more often, avoid deadlocks
    
    Currently we are very pessimistic about specifying a known length of a
    simple type suspension by always returning Nope (i.e. there is no known
    length). This meant that it was impossible to determine the absolute
    starting position of split buffers from those suspensions. And because
    absolute bit positions are so important for allowing suspensions to
    evaluate (e.g. alignment, length calculations) it increased the chance
    that a suspension would deadlock.
    
    To resolve this issue, this modifies the simple type suspension to use
    the actual length where possible. This isn't always possible, like when
    padding or right fill might be needed, but that often isn't the case.
    
    Even with that change, the absolute starting positions could not be
    calculated for a new split in cases where the current DOS did not have
    an absolute position and only had a relative position. This meant
    deadlocks were still fairly likely. So in these cases, we calculate and
    store the length of the DOS (relative bit position + suspension length).
    Once the absolute starting position of the DOS it determined, we can use
    that length to set the starting absolute position of the following DOS.
    
    Also add an assert to ensure that these new length calculations are
    correct by checking that the ending bit position of one DOS matches the
    starting bit position of the next DOS when merging. This discovered unit
    tests with an off by one error.
    
    These changes increase the chance of an absolute starting bit position
    being known and allows more suspensions to be evaluated which should
    lead to less deadlocks.
    
    Unrelated, remove println from a previous commit that was used for
    debugging and accidentally made it into the commit.
    
    DAFFODIL-2717
---
 .../grammar/primitives/ElementCombinator.scala     | 18 ++++++++--
 .../daffodil/io/DataOutputStreamImplMixin.scala    | 38 +++++++++++++++++++++-
 .../io/DirectOrBufferedDataOutputStream.scala      |  6 ++++
 .../apache/daffodil/io/TestDataOutputStream4.scala |  4 +--
 .../org/apache/daffodil/util/MaybeULong.scala      |  2 ++
 .../processors/unparsers/SpecifiedLength2.scala    | 29 +++++++++--------
 .../apache/daffodil/processors/Suspension.scala    | 35 +++++++++++---------
 .../apache/daffodil/infoset/TestStringAsXml.scala  |  1 -
 8 files changed, 97 insertions(+), 36 deletions(-)

diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala
index c13256f90..66a5a3d83 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/ElementCombinator.scala
@@ -218,9 +218,21 @@ case class SimpleTypeRetry(ctxt: ElementBase, v: Gram)
   extends Terminal(ctxt, true) {
   override def parser = v.parser
 
-  override def unparser = new SimpleTypeRetryUnparser(
-    ctxt.erd,
-    ctxt.maybeUnparseTargetLengthInBitsEv, v.unparser)
+  // When unparsing, the target length of this simple type might not actually
+  // match the actual unparsed length due to things like padding or fill. But
+  // if we can statically determine that the target length will be correct
+  // (e.g. there won't be things like padding/fill) then we can use that length
+  // during unparsing to provide more information about buffered data output
+  // streams which can be used to help to avoid deadlocked suspensions.
+  lazy val maybeExactTargetLength = {
+    if (!ctxt.shouldAddPadding && !ctxt.shouldAddFill && !ctxt.shouldCheckExcessLength) {
+      ctxt.maybeUnparseTargetLengthInBitsEv
+    } else {
+      Maybe.Nope
+    }
+  }
+
+  override def unparser = new SimpleTypeRetryUnparser(ctxt.erd, maybeExactTargetLength, v.unparser)
 }
 
 case class CaptureContentLengthStart(ctxt: ElementBase)
diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/io/DataOutputStreamImplMixin.scala b/daffodil-io/src/main/scala/org/apache/daffodil/io/DataOutputStreamImplMixin.scala
index d4a82b3c1..40e2a0df9 100644
--- a/daffodil-io/src/main/scala/org/apache/daffodil/io/DataOutputStreamImplMixin.scala
+++ b/daffodil-io/src/main/scala/org/apache/daffodil/io/DataOutputStreamImplMixin.scala
@@ -149,7 +149,7 @@ trait DataOutputStreamImplMixin extends DataStreamCommonState
    * of the former starting bit pos so as to be able to
    * convert relative positions to absolute positions correctly.
    */
-  protected final def maybeAbsStartingBitPos0b = {
+  final def maybeAbsStartingBitPos0b: MaybeULong = {
     if (this.maybeAbsolutizedRelativeStartingBitPosInBits_.isDefined)
       maybeAbsolutizedRelativeStartingBitPosInBits_
     else
@@ -185,6 +185,20 @@ trait DataOutputStreamImplMixin extends DataStreamCommonState
       Assert.usageError("You cannot set the abs starting bit pos again.")
     }
     checkInvariants()
+
+    // We now know the absolute starting position of this DOS. If we also know its
+    // length, then we can set the absolute starting position of the next DOS as well
+    // (if one exists and if it doesn't already have an absolute starting
+    // position). This can potentially propagate to other DOSs, greatly improving
+    // the chances of suspensions not deadlocking, since starting absolute positions
+    // are often needed for evaluation (e.g. alignment, length calculations).
+    if (this.maybeLengthInBits_.isDefined && this.maybeNextInChain.isDefined) {
+      val nic = this.maybeNextInChain.get.asInstanceOf[DirectOrBufferedDataOutputStream]
+      if (nic.maybeAbsBitPos0b.isEmpty) {
+        val nextAbsPos = this.maybeAbsStartingBitPos0b.getULong + this.maybeLengthInBits_.getULong
+        nic.setAbsStartingBitPos0b(nextAbsPos)
+      }
+    }
   }
 
   protected final def setRelBitPos0b(newRelBitPos0b: ULong): Unit = {
@@ -194,6 +208,14 @@ trait DataOutputStreamImplMixin extends DataStreamCommonState
     checkInvariants()
   }
 
+  def setLengthInBits(newLengthInBits: ULong): Unit = {
+    Assert.invariant(maybeLengthInBits_.isEmpty)
+    Assert.usage(isWritable)
+    checkInvariants()
+    this.maybeLengthInBits_ = MaybeULong(newLengthInBits.longValue)
+    checkInvariants()
+  }
+
   def relBitPos0b = {
     relBitPos0b_
   }
@@ -215,6 +237,20 @@ trait DataOutputStreamImplMixin extends DataStreamCommonState
    */
   private var maybeAbsolutizedRelativeStartingBitPosInBits_ = MaybeULong.Nope
 
+  /**
+   * When we split a stream, we do so because some suspension needs to write to
+   * the end of this DOS. In some cases, we know the number of bits that
+   * suspension will write before it does it, and so we can set the starting
+   * absolute bit position of the new buffered stream that we split into.
+   * However, we can only do that if we know the absolute starting position of
+   * the original DOS when the split occurs. If we do not know that, we can
+   * still calculate the final length of this DOS (i.e. the current relative
+   * position + the length of the suspension) and store that here. Once we learn
+   * the starting absolute position of this DOS, we can then set the absolute
+   * starting position of the next DOS using this length.
+   */
+  private var maybeLengthInBits_ = MaybeULong.Nope
+
   /**
    * Absolute bit limit zero based
    *
diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/io/DirectOrBufferedDataOutputStream.scala b/daffodil-io/src/main/scala/org/apache/daffodil/io/DirectOrBufferedDataOutputStream.scala
index 62bd720b6..0bc073f2a 100644
--- a/daffodil-io/src/main/scala/org/apache/daffodil/io/DirectOrBufferedDataOutputStream.scala
+++ b/daffodil-io/src/main/scala/org/apache/daffodil/io/DirectOrBufferedDataOutputStream.scala
@@ -966,6 +966,12 @@ object DirectOrBufferedDataOutputStream {
     Assert.invariant(bufDOS.isBuffering)
     Assert.invariant(!directDOS.isBuffering)
 
+    // If the buffered DOS already has a starting absolute bit position, ensure
+    // that it matches the ending bit position of the direct DOS we are about to
+    // deliver it to
+    val maybeBufStartPos = bufDOS.maybeAbsStartingBitPos0b
+    Assert.invariant(maybeBufStartPos.isEmpty || maybeBufStartPos.get == directDOS.maybeAbsBitPos0b.get)
+
     val finfoBitOrder = finfo.bitOrder // bit order we are supposed to write with
     val priorBitOrder = directDOS.cst.priorBitOrder // bit order that the directDOS had at last successful unparse. (prior is set after each unparser)
     if (finfoBitOrder ne priorBitOrder) {
diff --git a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDataOutputStream4.scala b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDataOutputStream4.scala
index 5a2e5f557..9b02952f8 100644
--- a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDataOutputStream4.scala
+++ b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDataOutputStream4.scala
@@ -41,10 +41,10 @@ class TestDataOutputStream4 {
 
     val out = direct.addBuffered
     if (setAbs)
-      out.setAbsStartingBitPos0b(ULong(20))
+      out.setAbsStartingBitPos0b(ULong(19))
     val out2 = out.addBuffered
     if (setAbs)
-      out2.setAbsStartingBitPos0b(ULong(39))
+      out2.setAbsStartingBitPos0b(ULong(38))
 
     out.putLong(0x5a5a5, 19, finfo)
     // 101 1010 0101 1010 0101
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/util/MaybeULong.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/util/MaybeULong.scala
index 81de83cea..e5acb7ab8 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/util/MaybeULong.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/util/MaybeULong.scala
@@ -77,6 +77,8 @@ final class MaybeJULong(mi: MaybeULong)
   @inline final def isDefined = mi.isDefined
   @inline final def isEmpty = !isDefined
   override def toString = mi.toString
+
+  @inline def toMaybeULong = mi
 }
 
 object MaybeJULong {
diff --git a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/SpecifiedLength2.scala b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/SpecifiedLength2.scala
index 1c53f5ab0..5e4f0954c 100644
--- a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/SpecifiedLength2.scala
+++ b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/SpecifiedLength2.scala
@@ -177,20 +177,21 @@ class SimpleTypeRetryUnparserSuspendableOperation(
   extends SuspendableOperation {
 
   override protected def maybeKnownLengthInBits(ustate: UState): MaybeULong = {
-    // Note that we cannot use a targetLengthInBitsEv to determine the
-    // knownLengthInBits. This is because even if an OVC/Simple element has fixed
-    // length, the result of the OVC might not actually write that many bits,
-    // relying on padding and/or right fill to fill in the remaining bits
-    //
-    // TODO: The above is too pessimistic. Many formats have no notion of
-    // padding nor filling, so the length could be computed from the targetLengthInBitsEv
-    // just fine
-    //
-    // We could make it the responsibility of the caller of this to supply or not
-    // the maybeUnparserTargetLengthInBitsEv depending on whether it can be
-    // depended upon or not.
-    //
-    MaybeULong.Nope
+    if (maybeUnparserTargetLengthInBitsEv.isDefined) {
+      // maybeUnparserTargetLengthInBitsEv should only be defined if we know at
+      // schema compile time that it will evaluate to a value, and that value
+      // will match the actual unparsed length (e.g. there will not be any
+      // padding/fill). Here we assert that if the target length evaluatable was
+      // passed into this class, then it must evaluate to a value. When we
+      // deliver buffered content, we will also assert that the starting bit
+      // position of the buffered DOS that results from this suspension matches
+      // the direct DOS (i.e. this length is used correctly)
+      val maybeLen = maybeUnparserTargetLengthInBitsEv.get.evaluate(ustate)
+      Assert.invariant(maybeLen.isDefined)
+      maybeLen.toMaybeULong
+    } else {
+      MaybeULong.Nope
+    }
   }
 
   protected def test(state: UState) = {
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/Suspension.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/Suspension.scala
index efbe5b859..2f9e80a84 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/Suspension.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/Suspension.scala
@@ -136,24 +136,29 @@ trait Suspension
     val buffered = original.addBuffered
 
     if (maybeKnownLengthInBits.isDefined) {
-      // since we know the length of the unparsed representation that we're skipping for now,
-      // that means we know the absolute position of the bits in the buffer we're creating
-      // and that means alignment operations don't have to suspend waiting for this knowledge
-      if (original.maybeAbsBitPos0b.isDefined) {
-        // direct streams always know this, but buffered streams may not.
+      // We know the length of the unparsed representation of this suspension that we
+      // are currently skipping. Use that length to give hints to this DOS or the new
+      // split DOS that can used to set absolute bit positions and avoid deadlocks
+      // since absolute bit positions are usually needed to evaluate suspensions
+      // (e.g. alignment, length calculations).
+      val suspensionLength = maybeKnownLengthInBits.getULong
 
+      if (original.maybeAbsBitPos0b.isDefined) {
+        // We know the absolute bitPosition of the original dataOutputStream. That
+        // means we can just add the known length of this suspension to that and set
+        // it as the starting absolute bit position of the new split buffer.
         val originalAbsBitPos0b = original.maybeAbsBitPos0b.getULong
-
-        // we are passed this length (in bits)
-        // and can use it to initialize the absolute bit pos of the buffered output stream.
-        //
-        // This allows us to deal with alignment regions, that is, we can determine
-        // their size since we know the absolute bit position.
-
-        val mkl = maybeKnownLengthInBits.getULong
-        buffered.setAbsStartingBitPos0b(originalAbsBitPos0b + mkl)
+        buffered.setAbsStartingBitPos0b(originalAbsBitPos0b + suspensionLength)
         buffered.setPriorBitOrder(ustate.bitOrder)
-
+      } else {
+        // We do not know the absolute position of the original buffer. This means we
+        // don't yet know where the new buffer starts. However, we can calculate the
+        // final length of the original DOS (relative position + suspension length),
+        // and set that as length of the original DOS. Once that DOS learns its
+        // absolute starting position, the DOS length can be used to set the absolute
+        // starting position of the split DOS.
+        val originalRelBitPos0b = original.relBitPos0b
+        original.setLengthInBits(originalRelBitPos0b + suspensionLength)
       }
     } else {
       Logger.log.debug(s"Buffered DOS created for ${ustate.currentInfosetNode.erd.diagnosticDebugName} without knowning absolute start bit pos: ${buffered}")
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXml.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXml.scala
index de35a2c7f..fac7580fd 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXml.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXml.scala
@@ -167,7 +167,6 @@ class TestStringAsXml {
     val dp = compileSchema(Misc.getRequiredResource("/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd"))
     val unparseInfoset = Misc.getRequiredResource("/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_08.dat").toURL.openStream
     val (unparseDiags, _) = doParse(dp, unparseInfoset)
-    unparseDiags.foreach(System.err.println)
     assertTrue(unparseDiags.find(_.contains("Undeclared general entity \"name\"")).isDefined)
   }