You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by sl...@apache.org on 2021/11/19 13:36:17 UTC

[daffodil] branch main updated: Improve how we capture and restore variable maps in points of uncertainty

This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new fcae7b9  Improve how we capture and restore variable maps in points of uncertainty
fcae7b9 is described below

commit fcae7b978704cbdf5505d5752754e040ddf5e3e8
Author: Steve Lawrence <sl...@apache.org>
AuthorDate: Fri Nov 19 07:45:03 2021 -0500

    Improve how we capture and restore variable maps in points of uncertainty
    
    Currently variable instances maintain their own prior state with special
    "prior" variables. When resetting back to a PoU, we would use this prior
    value to figure out what state to reset to. But this is fragile, and
    really only keeps track of one level of change. This means multiple
    changes to variables within a single point of uncertainly can lead to
    unexpected behavior or assertion failures when backtracking.
    
    This changes how we keep track of variable state. Rather than attempting
    to roll back individual state changes, we insteed just create a deep
    copy of the entire variable map. But to avoid extra overhead, we only
    make a copy right before state is about to change, essentially
    implementing copy-on-write. This greatly simplifies the code while still
    maintain good performance.
    
    DAFFODIL-2580
---
 .../apache/daffodil/processors/VariableMap1.scala  |  47 ++++----
 .../daffodil/processors/parsers/PState.scala       |  76 ++++++++-----
 .../daffodil/section07/variables/variables.tdml    | 118 +++++++++++++++++++++
 .../section07/variables/TestVariables.scala        |   6 ++
 4 files changed, 190 insertions(+), 57 deletions(-)

diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/VariableMap1.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/VariableMap1.scala
index 80f470d..e31bda2 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/VariableMap1.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/VariableMap1.scala
@@ -99,65 +99,43 @@ class VariableInstance private (val rd: VariableRuntimeData)
   extends Serializable {
 
   var state: VariableState = VariableUndefined
-  var priorState: VariableState = VariableUndefined
   var value: DataValuePrimitiveNullable = DataValue.NoValue
-  var priorValue: DataValuePrimitiveNullable = DataValue.NoValue
 
   // This represents the default value at the start of processing, provided by
   // either the defaultValue expression or by an external binding
   var firstInstanceInitialValue: DataValuePrimitiveNullable = DataValue.NoValue
 
   def setState(s: VariableState) = {
-    this.priorState = this.state
     this.state = s
   }
 
   def setValue(v: DataValuePrimitiveNullable) = {
-    this.priorValue = this.value
     this.value = v
   }
 
-  /* This is used to set a default value without assigning a priorValue */
+  /* This is used to set a default value with the appropriate state */
   def setDefaultValue(v: DataValuePrimitiveNullable) = {
     Assert.invariant((this.state == VariableUndefined || this.state == VariableInProcess) && v.isDefined)
     this.state = VariableDefined
     this.value = v
   }
 
-  override def toString: String = "VariableInstance(%s,%s,%s,%s,%s,%s)".format(
+  override def toString: String = "VariableInstance(%s,%s,%s,%s)".format(
                                                     state,
                                                     value,
                                                     rd,
-                                                    rd.maybeDefaultValueExpr,
-                                                    priorState,
-                                                    priorValue)
+                                                    rd.maybeDefaultValueExpr)
 
   def copy(
     state: VariableState = state,
     value: DataValuePrimitiveNullable = value,
-    rd: VariableRuntimeData = rd,
-    priorState: VariableState = priorState,
-    priorValue: DataValuePrimitiveNullable = priorValue) = {
+    rd: VariableRuntimeData = rd) = {
       val inst = new VariableInstance(rd)
       inst.state = state
-      inst.priorState = priorState
       inst.value = value
-      inst.priorValue = priorValue
       inst
   }
 
-  def reset() = {
-    Assert.invariant(this.state != VariableUndefined)
-    (this.state, this.priorState, this.rd.maybeDefaultValueExpr.isDefined) match {
-      case (VariableRead, VariableSet, _) => this.setState(VariableSet)
-      case (VariableRead, _, true) => this.setState(VariableDefined)
-      case (VariableSet, _, _) => {
-        this.setState(this.priorState)
-        this.setValue(this.priorValue)
-      }
-      case (_, _, _) => Assert.impossible("Should have SDE before reaching this")
-    }
-  }
 }
 
 object VariableUtils {
@@ -330,10 +308,21 @@ class VariableMap private(vTable: Map[GlobalQName, ArrayBuffer[VariableInstance]
   lazy val context = Assert.invariantFailed("unused.")
 
   /**
-   * Used only for testing.
+   * Determine if a call to readVariable will change any state in this
+   * VariableMap. This should only be used for optimizations and should not
+   * prevent readVariable from actually being called. It does not do any
+   * checking related to SDE's or invariant checking that readVariable might
+   * perform
    */
-  def getVariableBindings(qn: GlobalQName): ArrayBuffer[VariableInstance] = {
-    vTable.get(qn).get
+  def readVariableWillChangeState(vrd: VariableRuntimeData): Boolean = {
+    val variableInstances = vTable.get(vrd.globalQName)
+    if (variableInstances.isDefined) {
+      val variable = variableInstances.get.last
+      val variableRead = (variable.state eq VariableRead) && variable.value.isDefined
+      !variableRead
+    } else {
+      true
+    }
   }
 
   /**
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
index f525034..b39ad4e 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
@@ -65,7 +65,6 @@ import org.apache.daffodil.infoset.DISimpleState
 import org.apache.daffodil.exceptions.Abort
 import org.apache.daffodil.exceptions.ThrowsSDE
 import org.apache.daffodil.infoset.DataValue.DataValuePrimitive
-import org.apache.daffodil.xml.GlobalQName
 
 object MPState {
 
@@ -210,17 +209,6 @@ final class PState private (
    */
   val pointsOfUncertainty = new MStackOf[PState.Mark]()
 
-  /**
-   * This stack tracks variables that have changed within the current point of
-   * uncertainty. This tracking is necessary to revert changes made to variables
-   * when the parser needs to backtrack. This stack needs to be pushed when a
-   * new mark is created and popped when it is discarded or reset. It is
-   * necessary for every mark to either be discarded or reset to inorder for
-   * this stack to funciton correctly.
-   */
-  private val changedVariablesStack = new MStackOf[mutable.MutableList[GlobalQName]]()
-  changedVariablesStack.push(mutable.MutableList[GlobalQName]())
-
   override def dataStream = One(dataInputStream)
 
   def saveDelimitedParseResult(result: Maybe[dfa.ParseResult]): Unit = {
@@ -244,7 +232,6 @@ final class PState private (
 
   private def mark(requestorID: String, context: RuntimeData): PState.Mark = {
     // threadCheck()
-    changedVariablesStack.push(mutable.MutableList[GlobalQName]())
     val m = markPool.getFromPool(requestorID)
     m.captureFrom(this, requestorID, context)
     m.element.infosetWalkerBlockCount += 1
@@ -263,12 +250,6 @@ final class PState private (
     m.restoreInto(this)
     m.clear()
     markPool.returnToPool(m)
-    changedVariablesStack.top.foreach { v => {
-      val variable = variableMap.find(v)
-      if (variable.isDefined)
-        variable.get.reset
-    }}
-    changedVariablesStack.pop
   }
 
   private def discard(m: PState.Mark): Unit = {
@@ -276,7 +257,6 @@ final class PState private (
     dataInputStream.discard(m.disMark)
     m.clear()
     markPool.returnToPool(m)
-    changedVariablesStack.pop
   }
 
   override def toString() = {
@@ -328,24 +308,57 @@ final class PState private (
     this.infoset = newParent
   }
 
+  /**
+   * When we take marks of this PState when we enter a PoU, we intentionally do
+   * not take a deep copy of the variable map because that is too expensive of
+   * an operation for a data structure that rarely changes. So taking a mark
+   * just makes a shallow copy of the PState variable map.
+   *
+   * But this means modifications of the variable map could potentially affect
+   * PoU marks, which means resetting back to a mark does not work as intended.
+   * To resolve this issue, anytime a function is called that will change the
+   * state of a variable, we must first call this function. This will take a
+   * deep copy of the variable map and set that copy as the variable map for
+   * this PState. This way, the state already captured in the mark will not be
+   * modified and we can correctly reset back to that state when resetting the
+   * PoU. Note that we only do this if we have not already done a deep copy in
+   * this PoU--if we've already done a deep copy, we don't need to do it again
+   * since the PoU copy can't be modified.
+   */
+  @inline
+  private def changingVariable(): Unit = {
+    if (!pointsOfUncertainty.isEmpty) {
+      val curPoU = pointsOfUncertainty.top
+      if (curPoU.variableMap eq this.variableMap) {
+        this.setVariableMap(this.variableMap.copy())
+      }
+    }
+  }
+
   override def setVariable(vrd: VariableRuntimeData, newValue: DataValuePrimitive, referringContext: ThrowsSDE): Unit = {
+    changingVariable()
     variableMap.setVariable(vrd, newValue, referringContext, this)
-    changedVariablesStack.top += vrd.globalQName
   }
 
   override def getVariable(vrd: VariableRuntimeData, referringContext: ThrowsSDE): DataValuePrimitive = {
-    val res = variableMap.readVariable(vrd, referringContext, this)
-    changedVariablesStack.top += vrd.globalQName
-    res
+    // Skip the call to changingVariable if this variable has already been
+    // read, which means another read will not actually change the state. This
+    // potentially avoids an expensive deep copy if we've read a variable,
+    // entered a PoU, and then read that variable again
+    if (variableMap.readVariableWillChangeState(vrd)) {
+      changingVariable()
+    }
+    variableMap.readVariable(vrd, referringContext, this)
   }
 
   def newVariableInstance(vrd: VariableRuntimeData): VariableInstance = {
-    val v = variableMap.newVariableInstance(vrd)
-    changedVariablesStack.top += vrd.globalQName
-    v
+    changingVariable()
+    variableMap.newVariableInstance(vrd)
   }
 
   def removeVariableInstance(vrd: VariableRuntimeData): Unit = {
+    // we do not need to call changingVariable() here even though this changes
+    // variable state, because newVariableInstance would have already called it
     variableMap.removeVariableInstance(vrd)
   }
 
@@ -579,13 +592,20 @@ object PState {
       else
         complexElementState.captureFrom(element)
       this.disMark = ps.dataInputStream.mark(requestorID)
-      this.variableMap = ps.variableMap
       this.processorStatus = ps.processorStatus
       this.validationStatus = ps.validationStatus
       this.diagnostics = ps.diagnostics
       this.mpStateMark.captureFrom(ps.mpstate)
       this.blobPaths = ps.blobPaths
       this.context = context
+
+      // Note that this is intentionally a shallow copy. This normally would
+      // not work because the variable map is mutable so other state changes
+      // could mutate this snapshot. This is avoided by carefully changing the
+      // PState variable map to a deep copy of this variable map right before a
+      // change is made. This essentially makes the PState variable map behave
+      // as copy-on-write.
+      this.variableMap = ps.variableMap
     }
 
     def restoreInfoset(ps: PState) = {
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables.tdml
index b56f3d1..5d8d167 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables.tdml
@@ -2590,4 +2590,122 @@
     </tdml:document>
   </tdml:unparserTestCase>
 
+  <tdml:defineSchema name="escapeCharVars">
+
+    <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />
+
+    <dfdl:defineVariable defaultValue="\" name="EscapeChar" type="xs:string"/>
+
+    <dfdl:format ref="GeneralFormat"
+      escapeSchemeRef="escapeScheme" />
+
+    <dfdl:defineEscapeScheme name="escapeScheme">
+      <dfdl:escapeScheme
+        escapeKind="escapeCharacter"
+        escapeCharacter="{ $EscapeChar }"
+        escapeEscapeCharacter="{ $EscapeChar }"
+        extraEscapedCharacters=""/>
+    </dfdl:defineEscapeScheme>
+
+    <xs:element name="root">
+      <xs:complexType>
+        <xs:sequence>
+          <xs:element name="delims" dfdl:initiator="[" dfdl:terminator="]" dfdl:length="1" dfdl:lengthKind="explicit" minOccurs="0">
+            <xs:complexType>
+              <xs:sequence>
+                <xs:element name="escapeChar" type="xs:string" dfdl:lengthKind="explicit" dfdl:length="1" dfdl:escapeSchemeRef="">
+                  <xs:annotation>
+                    <xs:appinfo source="http://www.ogf.org/dfdl/">
+                      <dfdl:setVariable ref="EscapeChar" value="{ . }" />
+                    </xs:appinfo>
+                  </xs:annotation>
+                </xs:element>
+              </xs:sequence>
+            </xs:complexType>
+          </xs:element>
+          <xs:sequence dfdl:separator=",">
+            <xs:element name="field" type="xs:string" dfdl:lengthKind="delimited" maxOccurs="unbounded" />
+          </xs:sequence>
+        </xs:sequence>
+      </xs:complexType>
+    </xs:element>
+
+  </tdml:defineSchema>
+
+  <tdml:parserTestCase name="escapeCharVars_01" root="root" model="escapeCharVars">
+    <tdml:document>[=]fieldOne,field=,Two,fieldThree</tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <root xmlns="http://example.com">
+          <delims>
+            <escapeChar>=</escapeChar>
+          </delims>
+          <field>fieldOne</field>
+          <field>field,Two</field>
+          <field>fieldThree</field>
+        </root>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
+  <tdml:parserTestCase name="escapeCharVars_02" root="root" model="escapeCharVars">
+    <tdml:document>fieldOne,field\,Two,fieldThree</tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <root xmlns="http://example.com">
+          <field>fieldOne</field>
+          <field>field,Two</field>
+          <field>fieldThree</field>
+        </root>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
+
+  <tdml:defineSchema name="multipleVarReadInPoU">
+
+    <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />
+
+    <dfdl:defineVariable defaultValue="x" name="var1" type="xs:string"/>
+
+    <dfdl:format ref="GeneralFormat" />
+
+    <xs:element name="root">
+      <xs:complexType>
+        <xs:sequence>
+          <xs:element name="opt" minOccurs="0">
+            <xs:complexType>
+              <xs:sequence>
+                <xs:sequence>
+                  <xs:annotation>
+                    <xs:appinfo source="http://www.ogf.org/dfdl/">
+                      <dfdl:assert test="{ $var1 eq 'x' }" />
+                    </xs:appinfo>
+                  </xs:annotation>
+                </xs:sequence>
+                <xs:sequence>
+                  <xs:annotation>
+                    <xs:appinfo source="http://www.ogf.org/dfdl/">
+                      <dfdl:assert test="{ $var1 eq 'y' }" />
+                    </xs:appinfo>
+                  </xs:annotation>
+                </xs:sequence>
+              </xs:sequence>
+            </xs:complexType>
+          </xs:element>
+        </xs:sequence>
+      </xs:complexType>
+    </xs:element>
+
+  </tdml:defineSchema>
+
+  <tdml:parserTestCase name="multipleVarReadInPoU_01" root="root" model="multipleVarReadInPoU">
+    <tdml:document></tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <root xmlns="http://example.com"></root>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
 </tdml:testSuite>
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section07/variables/TestVariables.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section07/variables/TestVariables.scala
index 13eb449..fa50051 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/section07/variables/TestVariables.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/section07/variables/TestVariables.scala
@@ -132,6 +132,12 @@ class TestVariables {
   @Test def test_setVar1_d_parse(): Unit = { runner_01.runOneTest("setVar1_d_parse") }
   @Test def test_setVar1_d_unparse(): Unit = { runner_01.runOneTest("setVar1_d_unparse") }
 
+  @Test def test_escapeCharVars_01(): Unit = { runner.runOneTest("escapeCharVars_01") }
+  @Test def test_escapeCharVars_02(): Unit = { runner.runOneTest("escapeCharVars_02") }
+
+  @Test def test_multipleVarReadInPoU_01(): Unit = { runner.runOneTest("multipleVarReadInPoU_01") }
+
+
 /*****************************************************************/
   val tdmlVal = XMLUtils.TDML_NAMESPACE
   val dfdl = XMLUtils.DFDL_NAMESPACE