You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by sl...@apache.org on 2021/11/19 13:36:17 UTC
[daffodil] branch main updated: Improve how we capture and restore variable maps in points of uncertainty
This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new fcae7b9 Improve how we capture and restore variable maps in points of uncertainty
fcae7b9 is described below
commit fcae7b978704cbdf5505d5752754e040ddf5e3e8
Author: Steve Lawrence <sl...@apache.org>
AuthorDate: Fri Nov 19 07:45:03 2021 -0500
Improve how we capture and restore variable maps in points of uncertainty
Currently variable instances maintain their own prior state with special
"prior" variables. When resetting back to a PoU, we would use this prior
value to figure out what state to reset to. But this is fragile, and
really only keeps track of one level of change. This means multiple
changes to variables within a single point of uncertainly can lead to
unexpected behavior or assertion failures when backtracking.
This changes how we keep track of variable state. Rather than attempting
to roll back individual state changes, we insteed just create a deep
copy of the entire variable map. But to avoid extra overhead, we only
make a copy right before state is about to change, essentially
implementing copy-on-write. This greatly simplifies the code while still
maintain good performance.
DAFFODIL-2580
---
.../apache/daffodil/processors/VariableMap1.scala | 47 ++++----
.../daffodil/processors/parsers/PState.scala | 76 ++++++++-----
.../daffodil/section07/variables/variables.tdml | 118 +++++++++++++++++++++
.../section07/variables/TestVariables.scala | 6 ++
4 files changed, 190 insertions(+), 57 deletions(-)
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/VariableMap1.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/VariableMap1.scala
index 80f470d..e31bda2 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/VariableMap1.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/VariableMap1.scala
@@ -99,65 +99,43 @@ class VariableInstance private (val rd: VariableRuntimeData)
extends Serializable {
var state: VariableState = VariableUndefined
- var priorState: VariableState = VariableUndefined
var value: DataValuePrimitiveNullable = DataValue.NoValue
- var priorValue: DataValuePrimitiveNullable = DataValue.NoValue
// This represents the default value at the start of processing, provided by
// either the defaultValue expression or by an external binding
var firstInstanceInitialValue: DataValuePrimitiveNullable = DataValue.NoValue
def setState(s: VariableState) = {
- this.priorState = this.state
this.state = s
}
def setValue(v: DataValuePrimitiveNullable) = {
- this.priorValue = this.value
this.value = v
}
- /* This is used to set a default value without assigning a priorValue */
+ /* This is used to set a default value with the appropriate state */
def setDefaultValue(v: DataValuePrimitiveNullable) = {
Assert.invariant((this.state == VariableUndefined || this.state == VariableInProcess) && v.isDefined)
this.state = VariableDefined
this.value = v
}
- override def toString: String = "VariableInstance(%s,%s,%s,%s,%s,%s)".format(
+ override def toString: String = "VariableInstance(%s,%s,%s,%s)".format(
state,
value,
rd,
- rd.maybeDefaultValueExpr,
- priorState,
- priorValue)
+ rd.maybeDefaultValueExpr)
def copy(
state: VariableState = state,
value: DataValuePrimitiveNullable = value,
- rd: VariableRuntimeData = rd,
- priorState: VariableState = priorState,
- priorValue: DataValuePrimitiveNullable = priorValue) = {
+ rd: VariableRuntimeData = rd) = {
val inst = new VariableInstance(rd)
inst.state = state
- inst.priorState = priorState
inst.value = value
- inst.priorValue = priorValue
inst
}
- def reset() = {
- Assert.invariant(this.state != VariableUndefined)
- (this.state, this.priorState, this.rd.maybeDefaultValueExpr.isDefined) match {
- case (VariableRead, VariableSet, _) => this.setState(VariableSet)
- case (VariableRead, _, true) => this.setState(VariableDefined)
- case (VariableSet, _, _) => {
- this.setState(this.priorState)
- this.setValue(this.priorValue)
- }
- case (_, _, _) => Assert.impossible("Should have SDE before reaching this")
- }
- }
}
object VariableUtils {
@@ -330,10 +308,21 @@ class VariableMap private(vTable: Map[GlobalQName, ArrayBuffer[VariableInstance]
lazy val context = Assert.invariantFailed("unused.")
/**
- * Used only for testing.
+ * Determine if a call to readVariable will change any state in this
+ * VariableMap. This should only be used for optimizations and should not
+ * prevent readVariable from actually being called. It does not do any
+ * checking related to SDE's or invariant checking that readVariable might
+ * perform
*/
- def getVariableBindings(qn: GlobalQName): ArrayBuffer[VariableInstance] = {
- vTable.get(qn).get
+ def readVariableWillChangeState(vrd: VariableRuntimeData): Boolean = {
+ val variableInstances = vTable.get(vrd.globalQName)
+ if (variableInstances.isDefined) {
+ val variable = variableInstances.get.last
+ val variableRead = (variable.state eq VariableRead) && variable.value.isDefined
+ !variableRead
+ } else {
+ true
+ }
}
/**
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
index f525034..b39ad4e 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
@@ -65,7 +65,6 @@ import org.apache.daffodil.infoset.DISimpleState
import org.apache.daffodil.exceptions.Abort
import org.apache.daffodil.exceptions.ThrowsSDE
import org.apache.daffodil.infoset.DataValue.DataValuePrimitive
-import org.apache.daffodil.xml.GlobalQName
object MPState {
@@ -210,17 +209,6 @@ final class PState private (
*/
val pointsOfUncertainty = new MStackOf[PState.Mark]()
- /**
- * This stack tracks variables that have changed within the current point of
- * uncertainty. This tracking is necessary to revert changes made to variables
- * when the parser needs to backtrack. This stack needs to be pushed when a
- * new mark is created and popped when it is discarded or reset. It is
- * necessary for every mark to either be discarded or reset to inorder for
- * this stack to funciton correctly.
- */
- private val changedVariablesStack = new MStackOf[mutable.MutableList[GlobalQName]]()
- changedVariablesStack.push(mutable.MutableList[GlobalQName]())
-
override def dataStream = One(dataInputStream)
def saveDelimitedParseResult(result: Maybe[dfa.ParseResult]): Unit = {
@@ -244,7 +232,6 @@ final class PState private (
private def mark(requestorID: String, context: RuntimeData): PState.Mark = {
// threadCheck()
- changedVariablesStack.push(mutable.MutableList[GlobalQName]())
val m = markPool.getFromPool(requestorID)
m.captureFrom(this, requestorID, context)
m.element.infosetWalkerBlockCount += 1
@@ -263,12 +250,6 @@ final class PState private (
m.restoreInto(this)
m.clear()
markPool.returnToPool(m)
- changedVariablesStack.top.foreach { v => {
- val variable = variableMap.find(v)
- if (variable.isDefined)
- variable.get.reset
- }}
- changedVariablesStack.pop
}
private def discard(m: PState.Mark): Unit = {
@@ -276,7 +257,6 @@ final class PState private (
dataInputStream.discard(m.disMark)
m.clear()
markPool.returnToPool(m)
- changedVariablesStack.pop
}
override def toString() = {
@@ -328,24 +308,57 @@ final class PState private (
this.infoset = newParent
}
+ /**
+ * When we take marks of this PState when we enter a PoU, we intentionally do
+ * not take a deep copy of the variable map because that is too expensive of
+ * an operation for a data structure that rarely changes. So taking a mark
+ * just makes a shallow copy of the PState variable map.
+ *
+ * But this means modifications of the variable map could potentially affect
+ * PoU marks, which means resetting back to a mark does not work as intended.
+ * To resolve this issue, anytime a function is called that will change the
+ * state of a variable, we must first call this function. This will take a
+ * deep copy of the variable map and set that copy as the variable map for
+ * this PState. This way, the state already captured in the mark will not be
+ * modified and we can correctly reset back to that state when resetting the
+ * PoU. Note that we only do this if we have not already done a deep copy in
+ * this PoU--if we've already done a deep copy, we don't need to do it again
+ * since the PoU copy can't be modified.
+ */
+ @inline
+ private def changingVariable(): Unit = {
+ if (!pointsOfUncertainty.isEmpty) {
+ val curPoU = pointsOfUncertainty.top
+ if (curPoU.variableMap eq this.variableMap) {
+ this.setVariableMap(this.variableMap.copy())
+ }
+ }
+ }
+
override def setVariable(vrd: VariableRuntimeData, newValue: DataValuePrimitive, referringContext: ThrowsSDE): Unit = {
+ changingVariable()
variableMap.setVariable(vrd, newValue, referringContext, this)
- changedVariablesStack.top += vrd.globalQName
}
override def getVariable(vrd: VariableRuntimeData, referringContext: ThrowsSDE): DataValuePrimitive = {
- val res = variableMap.readVariable(vrd, referringContext, this)
- changedVariablesStack.top += vrd.globalQName
- res
+ // Skip the call to changingVariable if this variable has already been
+ // read, which means another read will not actually change the state. This
+ // potentially avoids an expensive deep copy if we've read a variable,
+ // entered a PoU, and then read that variable again
+ if (variableMap.readVariableWillChangeState(vrd)) {
+ changingVariable()
+ }
+ variableMap.readVariable(vrd, referringContext, this)
}
def newVariableInstance(vrd: VariableRuntimeData): VariableInstance = {
- val v = variableMap.newVariableInstance(vrd)
- changedVariablesStack.top += vrd.globalQName
- v
+ changingVariable()
+ variableMap.newVariableInstance(vrd)
}
def removeVariableInstance(vrd: VariableRuntimeData): Unit = {
+ // we do not need to call changingVariable() here even though this changes
+ // variable state, because newVariableInstance would have already called it
variableMap.removeVariableInstance(vrd)
}
@@ -579,13 +592,20 @@ object PState {
else
complexElementState.captureFrom(element)
this.disMark = ps.dataInputStream.mark(requestorID)
- this.variableMap = ps.variableMap
this.processorStatus = ps.processorStatus
this.validationStatus = ps.validationStatus
this.diagnostics = ps.diagnostics
this.mpStateMark.captureFrom(ps.mpstate)
this.blobPaths = ps.blobPaths
this.context = context
+
+ // Note that this is intentionally a shallow copy. This normally would
+ // not work because the variable map is mutable so other state changes
+ // could mutate this snapshot. This is avoided by carefully changing the
+ // PState variable map to a deep copy of this variable map right before a
+ // change is made. This essentially makes the PState variable map behave
+ // as copy-on-write.
+ this.variableMap = ps.variableMap
}
def restoreInfoset(ps: PState) = {
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables.tdml
index b56f3d1..5d8d167 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables.tdml
@@ -2590,4 +2590,122 @@
</tdml:document>
</tdml:unparserTestCase>
+ <tdml:defineSchema name="escapeCharVars">
+
+ <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />
+
+ <dfdl:defineVariable defaultValue="\" name="EscapeChar" type="xs:string"/>
+
+ <dfdl:format ref="GeneralFormat"
+ escapeSchemeRef="escapeScheme" />
+
+ <dfdl:defineEscapeScheme name="escapeScheme">
+ <dfdl:escapeScheme
+ escapeKind="escapeCharacter"
+ escapeCharacter="{ $EscapeChar }"
+ escapeEscapeCharacter="{ $EscapeChar }"
+ extraEscapedCharacters=""/>
+ </dfdl:defineEscapeScheme>
+
+ <xs:element name="root">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="delims" dfdl:initiator="[" dfdl:terminator="]" dfdl:length="1" dfdl:lengthKind="explicit" minOccurs="0">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="escapeChar" type="xs:string" dfdl:lengthKind="explicit" dfdl:length="1" dfdl:escapeSchemeRef="">
+ <xs:annotation>
+ <xs:appinfo source="http://www.ogf.org/dfdl/">
+ <dfdl:setVariable ref="EscapeChar" value="{ . }" />
+ </xs:appinfo>
+ </xs:annotation>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+ <xs:sequence dfdl:separator=",">
+ <xs:element name="field" type="xs:string" dfdl:lengthKind="delimited" maxOccurs="unbounded" />
+ </xs:sequence>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+ </tdml:defineSchema>
+
+ <tdml:parserTestCase name="escapeCharVars_01" root="root" model="escapeCharVars">
+ <tdml:document>[=]fieldOne,field=,Two,fieldThree</tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <root xmlns="http://example.com">
+ <delims>
+ <escapeChar>=</escapeChar>
+ </delims>
+ <field>fieldOne</field>
+ <field>field,Two</field>
+ <field>fieldThree</field>
+ </root>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ </tdml:parserTestCase>
+
+ <tdml:parserTestCase name="escapeCharVars_02" root="root" model="escapeCharVars">
+ <tdml:document>fieldOne,field\,Two,fieldThree</tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <root xmlns="http://example.com">
+ <field>fieldOne</field>
+ <field>field,Two</field>
+ <field>fieldThree</field>
+ </root>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ </tdml:parserTestCase>
+
+
+ <tdml:defineSchema name="multipleVarReadInPoU">
+
+ <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />
+
+ <dfdl:defineVariable defaultValue="x" name="var1" type="xs:string"/>
+
+ <dfdl:format ref="GeneralFormat" />
+
+ <xs:element name="root">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="opt" minOccurs="0">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:sequence>
+ <xs:annotation>
+ <xs:appinfo source="http://www.ogf.org/dfdl/">
+ <dfdl:assert test="{ $var1 eq 'x' }" />
+ </xs:appinfo>
+ </xs:annotation>
+ </xs:sequence>
+ <xs:sequence>
+ <xs:annotation>
+ <xs:appinfo source="http://www.ogf.org/dfdl/">
+ <dfdl:assert test="{ $var1 eq 'y' }" />
+ </xs:appinfo>
+ </xs:annotation>
+ </xs:sequence>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+ </tdml:defineSchema>
+
+ <tdml:parserTestCase name="multipleVarReadInPoU_01" root="root" model="multipleVarReadInPoU">
+ <tdml:document></tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <root xmlns="http://example.com"></root>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ </tdml:parserTestCase>
+
</tdml:testSuite>
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section07/variables/TestVariables.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section07/variables/TestVariables.scala
index 13eb449..fa50051 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/section07/variables/TestVariables.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/section07/variables/TestVariables.scala
@@ -132,6 +132,12 @@ class TestVariables {
@Test def test_setVar1_d_parse(): Unit = { runner_01.runOneTest("setVar1_d_parse") }
@Test def test_setVar1_d_unparse(): Unit = { runner_01.runOneTest("setVar1_d_unparse") }
+ @Test def test_escapeCharVars_01(): Unit = { runner.runOneTest("escapeCharVars_01") }
+ @Test def test_escapeCharVars_02(): Unit = { runner.runOneTest("escapeCharVars_02") }
+
+ @Test def test_multipleVarReadInPoU_01(): Unit = { runner.runOneTest("multipleVarReadInPoU_01") }
+
+
/*****************************************************************/
val tdmlVal = XMLUtils.TDML_NAMESPACE
val dfdl = XMLUtils.DFDL_NAMESPACE