You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@daffodil.apache.org by GitBox <gi...@apache.org> on 2018/01/06 16:25:36 UTC
[GitHub] stevedlawrence closed pull request #15: Optimizations with Infoset HashMap lookups/insertions
stevedlawrence closed pull request #15: Optimizations with Infoset HashMap lookups/insertions
URL: https://github.com/apache/incubator-daffodil/pull/15
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dpath/Expression.scala b/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dpath/Expression.scala
index a1398b6b3..ce0125bc7 100644
--- a/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dpath/Expression.scala
+++ b/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dpath/Expression.scala
@@ -1123,6 +1123,7 @@ case class NamedStep(s: String, predArg: Option[PredicateExpression])
}.getOrElse(die)
e
}
+ stepElem.isReferencedByExpressions = true
stepElem
}
}
diff --git a/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dsom/ElementBase.scala b/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dsom/ElementBase.scala
index b35a40cc3..160266208 100644
--- a/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dsom/ElementBase.scala
+++ b/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dsom/ElementBase.scala
@@ -557,7 +557,6 @@ trait ElementBase
//
// unparser specific items
//
- false, // !isReferencedByExpressions, // assume it is always to be referenced by expressions
optTruncateSpecifiedLengthString,
if (isOutputValueCalc) Some(ovcCompiledExpression) else None,
maybeBinaryFloatRepEv,
diff --git a/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfoset.scala b/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfoset.scala
index a1344824e..6ab72be89 100644
--- a/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfoset.scala
+++ b/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfoset.scala
@@ -56,9 +56,11 @@ object TestInfoset {
* classes.
*/
- def elem2Infoset(erd: ElementRuntimeData, xmlElem: scala.xml.Node, tunable: DaffodilTunables = DaffodilTunables()): InfosetElement = {
+ private val tunableForTests = DaffodilTunables("allowExternalPathExpressions", "true")
+
+ def elem2Infoset(erd: ElementRuntimeData, xmlElem: scala.xml.Node): InfosetElement = {
val ic = new ScalaXMLInfosetInputter(xmlElem)
- ic.initialize(erd, tunable)
+ ic.initialize(erd, tunableForTests)
val aacc = ic.advanceAccessor
Assert.invariant(ic.advance == true)
val infosetRootNode = aacc.node
diff --git a/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfosetCursorFromReader2.scala b/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfosetCursorFromReader2.scala
index 4932760f9..11412b0bc 100644
--- a/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfosetCursorFromReader2.scala
+++ b/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfosetCursorFromReader2.scala
@@ -138,7 +138,6 @@ class TestInfosetInputterFromReader2 {
val arr = bar_s.getChildArray(foo_1_s.runtimeData)
if (arr.length % 100L =#= 0L) {
// println("array length is " + arr.length)
- bar_s.resetChildArray(0)
foo_arr_s.reduceToSize(0)
}
arr.asInstanceOf[DIArray].children
diff --git a/daffodil-lib/src/main/scala/edu/illinois/ncsa/daffodil/api/DaffodilTunables.scala b/daffodil-lib/src/main/scala/edu/illinois/ncsa/daffodil/api/DaffodilTunables.scala
index 2f8b9fa30..36f881c3a 100644
--- a/daffodil-lib/src/main/scala/edu/illinois/ncsa/daffodil/api/DaffodilTunables.scala
+++ b/daffodil-lib/src/main/scala/edu/illinois/ncsa/daffodil/api/DaffodilTunables.scala
@@ -111,7 +111,16 @@ case class DaffodilTunables(
//
val initialElementOccurrencesHint: Long = 10,
val unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy.Type = UnqualifiedPathStepPolicy.NoNamespace,
- val suppressSchemaDefinitionWarnings: List[WarnID] = Nil)
+ val suppressSchemaDefinitionWarnings: List[WarnID] = Nil,
+
+ // By default, path expressions in Daffodil will only work correctly if path
+ // steps are used in an expression defined in the schema when compiled. To
+ // enable the use of other expressions (e.g. during debugging, where not all
+ // expressions are known at schema compile time), set this tunable to true.
+ // This may cause a degredation of performance in path expression evaluation,
+ // so this should be avoided when in production. This flag is automatically
+ // enabled when debugging is enabled.
+ val allowExternalPathExpressions: Boolean = false)
extends Serializable
with Logging
with DataStreamLimits {
@@ -211,6 +220,7 @@ case class DaffodilTunables(
}
this.copy(suppressSchemaDefinitionWarnings = warningsList)
}
+ case "allowexternalpathexpressions" => this.copy(allowExternalPathExpressions = java.lang.Boolean.valueOf(value))
case _ => {
log(LogLevel.Warning, "Ignoring unknown tunable: %s", tunable)
this
diff --git a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/dsom/CompiledExpression1.scala b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/dsom/CompiledExpression1.scala
index cd24bcfbb..51f0ed4d5 100644
--- a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/dsom/CompiledExpression1.scala
+++ b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/dsom/CompiledExpression1.scala
@@ -333,6 +333,15 @@ class DPathElementCompileInfo(
elementChildrenCompileInfo
}
+ /**
+ * Stores whether or not this element is used in any path step expressions
+ * during schema compilation. Note that this needs to be a var since its
+ * value is determined during DPath compilation, which requires that the
+ * DPathElementCompileInfo already exists. So this must be a mutable value
+ * that can be flipped during schema compilation.
+ */
+ var isReferencedByExpressions = false
+
override def toString = "DPathElementCompileInfo(%s)".format(name)
@throws(classOf[java.io.IOException])
diff --git a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/infoset/InfosetImpl.scala b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/infoset/InfosetImpl.scala
index 1458114c0..f73adfe59 100644
--- a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/infoset/InfosetImpl.scala
+++ b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/infoset/InfosetImpl.scala
@@ -1323,7 +1323,7 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
}
}
- lazy val childNodes = new ArrayBuffer[DINode]
+ val childNodes = new ArrayBuffer[DINode]
lazy val nameToChildNodeLookup = new HashMap[NamedQName, ArrayBuffer[DINode]]
override lazy val contents: IndexedSeq[DINode] = childNodes
@@ -1335,8 +1335,9 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
}
final def getChild(info: DPathElementCompileInfo): InfosetElement = {
- if (nameToChildNodeLookup.containsKey(info.namedQName))
- nameToChildNodeLookup.get(info.namedQName)(0).asInstanceOf[InfosetElement]
+ val maybeNode = findChild(info.namedQName)
+ if (maybeNode.isDefined)
+ maybeNode.get.asInstanceOf[InfosetElement]
else
throw new InfosetNoSuchChildElementException(this, info)
}
@@ -1349,48 +1350,12 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
final def getChildArray(info: DPathElementCompileInfo): InfosetArray = {
Assert.usage(info.isArray)
-
- val name = info.namedQName
-
- val array = if (nameToChildNodeLookup.containsKey(name)) {
- val seq = nameToChildNodeLookup.get(name)
- // Don't support query expressions yet, so should only have
- // one item in the list
- //
- seq(0).asInstanceOf[InfosetArray] //.find(node => node.isInstanceOf[DIArray]).getOrElse(Assert.usageError("not an array")).asInstanceOf[InfosetArray]
- } else
+ val maybeNode = findChild(info.namedQName)
+ if (maybeNode.isDefined) {
+ maybeNode.get.asInstanceOf[InfosetArray]
+ } else {
throw new InfosetNoSuchChildElementException(this, info)
-
- array
- }
-
- /**
- * Used to prune parts of the array away to allow streaming. It allows us
- * to not keep all of the children of the array in the infoset tree for the
- * duration of the parse/unparse, once they are no longer needed.
- */
- final def resetChildArray(slot: Int) {
- val numChildrenToRemove = numChildren - slot
-
- var i = numChildrenToRemove
-
- while (i > 0) {
- val childToRemove = childNodes(i)
- if (nameToChildNodeLookup.containsKey(childToRemove.namedQName)) {
- val fastSeq = nameToChildNodeLookup.get(childToRemove.namedQName)
- if (fastSeq.length == 1)
- // last one, remove the whole key entry
- nameToChildNodeLookup.remove(childToRemove.namedQName)
- else
- // not the last one, just drop the end
- fastSeq.dropRight(1)
- } else { /* Does not exist, nothing to do? */ }
-
- i -= 1
}
-
- childNodes.dropRight(numChildrenToRemove)
- _numChildren = childNodes.length
}
override def addChild(e: InfosetElement): Unit = {
@@ -1400,16 +1365,20 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
// no children, or last child is not a DIArray for
// this element, create the DIArray
val ia = new DIArray(childERD, this)
- addChildToFastLookup(ia)
- childNodes.append(ia)
+ if (childERD.dpathElementCompileInfo.isReferencedByExpressions) {
+ addChildToFastLookup(ia)
+ }
+ childNodes += ia
_numChildren = childNodes.length
ia
}
// Array is now always last, add the new child to it
childNodes.last.asInstanceOf[DIArray].append(e)
} else {
- childNodes.append(e.asInstanceOf[DINode])
- addChildToFastLookup(e.asInstanceOf[DINode])
+ if (e.runtimeData.dpathElementCompileInfo.isReferencedByExpressions) {
+ addChildToFastLookup(e.asInstanceOf[DINode])
+ }
+ childNodes += e.asInstanceOf[DINode]
_numChildren = childNodes.length
}
e.setParent(this)
@@ -1417,11 +1386,39 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
def addChildToFastLookup(node: DINode): Unit = {
val name = node.namedQName
+ val fastSeq = nameToChildNodeLookup.get(name)
+ if (fastSeq != null) {
+ fastSeq += node
+ } else {
+ val ab = new ArrayBuffer[DINode]()
+ ab += node
+ nameToChildNodeLookup.put(name, ab)
+ }
+ }
- if (nameToChildNodeLookup.containsKey(name)) {
- nameToChildNodeLookup.get(name).append(node)
+ def findChild(qname: NamedQName): Maybe[DINode] = {
+ val fastSeq = nameToChildNodeLookup.get(qname)
+ if (fastSeq != null) {
+ // Daffodil does not support query expressions yet, so there should only
+ // be one item in the list
+ Assert.invariant(fastSeq.length == 1)
+ One(fastSeq(0))
+ } else if (tunable.allowExternalPathExpressions) {
+ // Only DINodes used in expressions defined in the schema are added to
+ // the nameToChildNodeLookup hashmap. If an expression defined outside of
+ // the schema (like via the debugger) attempts to access an element that
+ // was never used in a schema expression, it will never be found. If the
+ // appropriate tunable is set, we will do a linear search to find the
+ // element. Due to the slowness of this, this should only be enabled
+ // during debugging or testing.
+ val found = childNodes.filter(_.erd.namedQName == qname)
+
+ // Daffodil does not support query expressions yet, so there should be at
+ // most one item found
+ Assert.invariant(found.length <= 1)
+ Maybe.toMaybe(found.headOption)
} else {
- nameToChildNodeLookup.put(name, ArrayBuffer(node))
+ Nope
}
}
@@ -1444,15 +1441,17 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
// this should always be the last element in the hashmap seq
while (i >= cs._numChildren) {
val childToRemove = childNodes(i)
- if (nameToChildNodeLookup.containsKey(childToRemove.namedQName)) {
+ if (childToRemove.erd.dpathElementCompileInfo.isReferencedByExpressions) {
val fastSeq = nameToChildNodeLookup.get(childToRemove.namedQName)
- if (fastSeq.length == 1)
+ Assert.invariant(fastSeq != null)
+ if (fastSeq.length == 1) {
// last one, remove the whole key entry
nameToChildNodeLookup.remove(childToRemove.namedQName)
- else
+ } else {
// not the last one, just drop the end
- fastSeq.dropRight(1)
- } else { /* Nothing to do? Doesn't exist. */ }
+ fastSeq.reduceToSize(fastSeq.length - 1)
+ }
+ }
i -= 1
}
@@ -1507,8 +1506,12 @@ final class DIDocument(erd: ElementRuntimeData, tunable: DaffodilTunables)
//
Assert.invariant(childNodes.length == 0)
val node = child.asInstanceOf[DINode]
- childNodes.append(node)
- nameToChildNodeLookup.put(node.namedQName, ArrayBuffer(node))
+ childNodes += node
+ if (node.erd.dpathElementCompileInfo.isReferencedByExpressions) {
+ val ab = new ArrayBuffer[DINode]()
+ ab += node
+ nameToChildNodeLookup.put(node.namedQName, ab)
+ }
child.setParent(this)
root = child.asInstanceOf[DIElement]
}
diff --git a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/Runtime.scala b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/Runtime.scala
index b74e64970..1b1f8a15d 100644
--- a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/Runtime.scala
+++ b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/Runtime.scala
@@ -141,6 +141,7 @@ class DataProcessor(val ssrd: SchemaSetRuntimeData)
def setDebugging(flag: Boolean) {
areDebugging_ = flag
+ setTunable("allowExternalPathExpressions", flag.toString)
}
def setExternalVariables(extVars: Map[String, String]): Unit = {
diff --git a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/RuntimeData.scala b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/RuntimeData.scala
index 713a5b09c..827924d5d 100644
--- a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/RuntimeData.scala
+++ b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/RuntimeData.scala
@@ -662,12 +662,6 @@ final class ElementRuntimeData(
//
// Unparser-specific arguments
//
- /**
- * pass true for this if the corresponding infoset element is never
- * accessed by way of expressions. Enables the element to be dropped
- * from the infoset immediately after unparsing is complete.
- */
- @TransientParam notReferencedByExpressionsArg: => Boolean,
@TransientParam optTruncateSpecifiedLengthStringArg: => Option[Boolean],
@TransientParam outputValueCalcExprArg: => Option[CompiledExpression[AnyRef]],
@TransientParam maybeBinaryFloatRepEvArg: => Maybe[BinaryFloatRepEv],
@@ -710,14 +704,11 @@ final class ElementRuntimeData(
lazy val namedQName = namedQNameArg
lazy val impliedRepresentation = impliedRepresentationArg
lazy val optDefaultValue = optDefaultValueArg
- lazy val notReferencedByExpressions = notReferencedByExpressionsArg
lazy val optTruncateSpecifiedLengthString = optTruncateSpecifiedLengthStringArg
lazy val outputValueCalcExpr = outputValueCalcExprArg
lazy val maybeBinaryFloatRepEv = maybeBinaryFloatRepEvArg
lazy val maybeByteOrderEv = maybeByteOrderEvArg
- def isReferencedByExpressions = !notReferencedByExpressions
-
override def preSerialization: Unit = {
super.preSerialization
parent
@@ -750,7 +741,6 @@ final class ElementRuntimeData(
namedQName
impliedRepresentation
optDefaultValue
- notReferencedByExpressions
optTruncateSpecifiedLengthString
outputValueCalcExpr
maybeBinaryFloatRepEv
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services