You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@daffodil.apache.org by GitBox <gi...@apache.org> on 2018/01/06 16:25:36 UTC

[GitHub] stevedlawrence closed pull request #15: Optimizations with Infoset HashMap lookups/insertions

stevedlawrence closed pull request #15: Optimizations with Infoset HashMap lookups/insertions
URL: https://github.com/apache/incubator-daffodil/pull/15
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dpath/Expression.scala b/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dpath/Expression.scala
index a1398b6b3..ce0125bc7 100644
--- a/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dpath/Expression.scala
+++ b/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dpath/Expression.scala
@@ -1123,6 +1123,7 @@ case class NamedStep(s: String, predArg: Option[PredicateExpression])
       }.getOrElse(die)
       e
     }
+    stepElem.isReferencedByExpressions = true
     stepElem
   }
 }
diff --git a/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dsom/ElementBase.scala b/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dsom/ElementBase.scala
index b35a40cc3..160266208 100644
--- a/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dsom/ElementBase.scala
+++ b/daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dsom/ElementBase.scala
@@ -557,7 +557,6 @@ trait ElementBase
       //
       // unparser specific items
       //
-      false, // !isReferencedByExpressions, // assume it is always to be referenced by expressions
       optTruncateSpecifiedLengthString,
       if (isOutputValueCalc) Some(ovcCompiledExpression) else None,
       maybeBinaryFloatRepEv,
diff --git a/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfoset.scala b/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfoset.scala
index a1344824e..6ab72be89 100644
--- a/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfoset.scala
+++ b/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfoset.scala
@@ -56,9 +56,11 @@ object TestInfoset {
    * classes.
    */
 
-  def elem2Infoset(erd: ElementRuntimeData, xmlElem: scala.xml.Node, tunable: DaffodilTunables = DaffodilTunables()): InfosetElement = {
+  private val tunableForTests = DaffodilTunables("allowExternalPathExpressions", "true")
+
+  def elem2Infoset(erd: ElementRuntimeData, xmlElem: scala.xml.Node): InfosetElement = {
     val ic = new ScalaXMLInfosetInputter(xmlElem)
-    ic.initialize(erd, tunable)
+    ic.initialize(erd, tunableForTests)
     val aacc = ic.advanceAccessor
     Assert.invariant(ic.advance == true)
     val infosetRootNode = aacc.node
diff --git a/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfosetCursorFromReader2.scala b/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfosetCursorFromReader2.scala
index 4932760f9..11412b0bc 100644
--- a/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfosetCursorFromReader2.scala
+++ b/daffodil-core/src/test/scala/edu/illinois/ncsa/daffodil/infoset/TestInfosetCursorFromReader2.scala
@@ -138,7 +138,6 @@ class TestInfosetInputterFromReader2 {
       val arr = bar_s.getChildArray(foo_1_s.runtimeData)
       if (arr.length % 100L =#= 0L) {
         // println("array length is " + arr.length)
-        bar_s.resetChildArray(0)
         foo_arr_s.reduceToSize(0)
       }
       arr.asInstanceOf[DIArray].children
diff --git a/daffodil-lib/src/main/scala/edu/illinois/ncsa/daffodil/api/DaffodilTunables.scala b/daffodil-lib/src/main/scala/edu/illinois/ncsa/daffodil/api/DaffodilTunables.scala
index 2f8b9fa30..36f881c3a 100644
--- a/daffodil-lib/src/main/scala/edu/illinois/ncsa/daffodil/api/DaffodilTunables.scala
+++ b/daffodil-lib/src/main/scala/edu/illinois/ncsa/daffodil/api/DaffodilTunables.scala
@@ -111,7 +111,16 @@ case class DaffodilTunables(
   //
   val initialElementOccurrencesHint: Long = 10,
   val unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy.Type = UnqualifiedPathStepPolicy.NoNamespace,
-  val suppressSchemaDefinitionWarnings: List[WarnID] = Nil)
+  val suppressSchemaDefinitionWarnings: List[WarnID] = Nil,
+
+  // By default, path expressions in Daffodil will only work correctly if path
+  // steps are used in an expression defined in the schema when compiled. To
+  // enable the use of other expressions (e.g. during debugging, where not all
+  // expressions are known at schema compile time), set this tunable to true.
+  // This may cause a degredation of performance in path expression evaluation,
+  // so this should be avoided when in production. This flag is automatically
+  // enabled when debugging is enabled.
+  val allowExternalPathExpressions: Boolean = false)
   extends Serializable
   with Logging
   with DataStreamLimits {
@@ -211,6 +220,7 @@ case class DaffodilTunables(
         }
         this.copy(suppressSchemaDefinitionWarnings = warningsList)
       }
+      case "allowexternalpathexpressions" => this.copy(allowExternalPathExpressions = java.lang.Boolean.valueOf(value))
       case _ => {
         log(LogLevel.Warning, "Ignoring unknown tunable: %s", tunable)
         this
diff --git a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/dsom/CompiledExpression1.scala b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/dsom/CompiledExpression1.scala
index cd24bcfbb..51f0ed4d5 100644
--- a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/dsom/CompiledExpression1.scala
+++ b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/dsom/CompiledExpression1.scala
@@ -333,6 +333,15 @@ class DPathElementCompileInfo(
     elementChildrenCompileInfo
   }
 
+  /**
+   * Stores whether or not this element is used in any path step expressions
+   * during schema compilation. Note that this needs to be a var since its
+   * value is determined during DPath compilation, which requires that the
+   * DPathElementCompileInfo already exists. So this must be a mutable value
+   * that can be flipped during schema compilation.
+   */
+  var isReferencedByExpressions = false
+
   override def toString = "DPathElementCompileInfo(%s)".format(name)
 
   @throws(classOf[java.io.IOException])
diff --git a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/infoset/InfosetImpl.scala b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/infoset/InfosetImpl.scala
index 1458114c0..f73adfe59 100644
--- a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/infoset/InfosetImpl.scala
+++ b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/infoset/InfosetImpl.scala
@@ -1323,7 +1323,7 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
     }
   }
 
-  lazy val childNodes = new ArrayBuffer[DINode]
+  val childNodes = new ArrayBuffer[DINode]
   lazy val nameToChildNodeLookup = new HashMap[NamedQName, ArrayBuffer[DINode]]
 
   override lazy val contents: IndexedSeq[DINode] = childNodes
@@ -1335,8 +1335,9 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
   }
 
   final def getChild(info: DPathElementCompileInfo): InfosetElement = {
-    if (nameToChildNodeLookup.containsKey(info.namedQName))
-      nameToChildNodeLookup.get(info.namedQName)(0).asInstanceOf[InfosetElement]
+    val maybeNode = findChild(info.namedQName)
+    if (maybeNode.isDefined)
+      maybeNode.get.asInstanceOf[InfosetElement]
     else
       throw new InfosetNoSuchChildElementException(this, info)
   }
@@ -1349,48 +1350,12 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
 
   final def getChildArray(info: DPathElementCompileInfo): InfosetArray = {
     Assert.usage(info.isArray)
-
-    val name = info.namedQName
-
-    val array = if (nameToChildNodeLookup.containsKey(name)) {
-      val seq = nameToChildNodeLookup.get(name)
-      // Don't support query expressions yet, so should only have
-      // one item in the list
-      //
-      seq(0).asInstanceOf[InfosetArray] //.find(node => node.isInstanceOf[DIArray]).getOrElse(Assert.usageError("not an array")).asInstanceOf[InfosetArray]
-    } else
+    val maybeNode = findChild(info.namedQName)
+    if (maybeNode.isDefined) {
+      maybeNode.get.asInstanceOf[InfosetArray]
+    } else {
       throw new InfosetNoSuchChildElementException(this, info)
-
-    array
-  }
-
-  /**
-   * Used to prune parts of the array away to allow streaming. It allows us
-   * to not keep all of the children of the array in the infoset tree for the
-   * duration of the parse/unparse, once they are no longer needed.
-   */
-  final def resetChildArray(slot: Int) {
-    val numChildrenToRemove = numChildren - slot
-
-    var i = numChildrenToRemove
-
-    while (i > 0) {
-      val childToRemove = childNodes(i)
-      if (nameToChildNodeLookup.containsKey(childToRemove.namedQName)) {
-        val fastSeq = nameToChildNodeLookup.get(childToRemove.namedQName)
-        if (fastSeq.length == 1)
-          // last one, remove the whole key entry
-          nameToChildNodeLookup.remove(childToRemove.namedQName)
-        else
-          // not the last one, just drop the end
-          fastSeq.dropRight(1)
-      } else { /* Does not exist, nothing to do? */ }
-
-      i -= 1
     }
-
-    childNodes.dropRight(numChildrenToRemove)
-    _numChildren = childNodes.length
   }
 
   override def addChild(e: InfosetElement): Unit = {
@@ -1400,16 +1365,20 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
         // no children, or last child is not a DIArray for
         // this element, create the DIArray
         val ia = new DIArray(childERD, this)
-        addChildToFastLookup(ia)
-        childNodes.append(ia)
+        if (childERD.dpathElementCompileInfo.isReferencedByExpressions) {
+          addChildToFastLookup(ia)
+        }
+        childNodes += ia
         _numChildren = childNodes.length
         ia
       }
       // Array is now always last, add the new child to it
       childNodes.last.asInstanceOf[DIArray].append(e)
     } else {
-      childNodes.append(e.asInstanceOf[DINode])
-      addChildToFastLookup(e.asInstanceOf[DINode])
+      if (e.runtimeData.dpathElementCompileInfo.isReferencedByExpressions) {
+        addChildToFastLookup(e.asInstanceOf[DINode])
+      }
+      childNodes += e.asInstanceOf[DINode]
       _numChildren = childNodes.length
     }
     e.setParent(this)
@@ -1417,11 +1386,39 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
 
   def addChildToFastLookup(node: DINode): Unit = {
     val name = node.namedQName
+    val fastSeq = nameToChildNodeLookup.get(name)
+    if (fastSeq != null) {
+      fastSeq += node
+    } else {
+      val ab = new ArrayBuffer[DINode]()
+      ab += node
+      nameToChildNodeLookup.put(name, ab)
+    }
+  }
 
-    if (nameToChildNodeLookup.containsKey(name)) {
-      nameToChildNodeLookup.get(name).append(node)
+  def findChild(qname: NamedQName): Maybe[DINode] = {
+    val fastSeq = nameToChildNodeLookup.get(qname)
+    if (fastSeq != null) {
+      // Daffodil does not support query expressions yet, so there should only
+      // be one item in the list
+      Assert.invariant(fastSeq.length == 1)
+      One(fastSeq(0))
+    } else if (tunable.allowExternalPathExpressions) {
+      // Only DINodes used in expressions defined in the schema are added to
+      // the nameToChildNodeLookup hashmap. If an expression defined outside of
+      // the schema (like via the debugger) attempts to access an element that
+      // was never used in a schema expression, it will never be found. If the
+      // appropriate tunable is set, we will do a linear search to find the
+      // element. Due to the slowness of this, this should only be enabled
+      // during debugging or testing.
+      val found = childNodes.filter(_.erd.namedQName == qname)
+
+      // Daffodil does not support query expressions yet, so there should be at
+      // most one item found
+      Assert.invariant(found.length <= 1)
+      Maybe.toMaybe(found.headOption)
     } else {
-      nameToChildNodeLookup.put(name, ArrayBuffer(node))
+      Nope
     }
   }
 
@@ -1444,15 +1441,17 @@ sealed class DIComplex(override val erd: ElementRuntimeData, val tunable: Daffod
     // this should always be the last element in the hashmap seq
     while (i >= cs._numChildren) {
       val childToRemove = childNodes(i)
-      if (nameToChildNodeLookup.containsKey(childToRemove.namedQName)) {
+      if (childToRemove.erd.dpathElementCompileInfo.isReferencedByExpressions) {
         val fastSeq = nameToChildNodeLookup.get(childToRemove.namedQName)
-        if (fastSeq.length == 1)
+        Assert.invariant(fastSeq != null)
+        if (fastSeq.length == 1) {
           // last one, remove the whole key entry
           nameToChildNodeLookup.remove(childToRemove.namedQName)
-        else
+        } else {
           // not the last one, just drop the end
-          fastSeq.dropRight(1)
-      } else { /* Nothing to do? Doesn't exist. */ }
+          fastSeq.reduceToSize(fastSeq.length - 1)
+        }
+      }
 
       i -= 1
     }
@@ -1507,8 +1506,12 @@ final class DIDocument(erd: ElementRuntimeData, tunable: DaffodilTunables)
     //
     Assert.invariant(childNodes.length == 0)
     val node = child.asInstanceOf[DINode]
-    childNodes.append(node)
-    nameToChildNodeLookup.put(node.namedQName, ArrayBuffer(node))
+    childNodes += node
+    if (node.erd.dpathElementCompileInfo.isReferencedByExpressions) {
+      val ab = new ArrayBuffer[DINode]()
+      ab += node
+      nameToChildNodeLookup.put(node.namedQName, ab)
+    }
     child.setParent(this)
     root = child.asInstanceOf[DIElement]
   }
diff --git a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/Runtime.scala b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/Runtime.scala
index b74e64970..1b1f8a15d 100644
--- a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/Runtime.scala
+++ b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/Runtime.scala
@@ -141,6 +141,7 @@ class DataProcessor(val ssrd: SchemaSetRuntimeData)
 
   def setDebugging(flag: Boolean) {
     areDebugging_ = flag
+    setTunable("allowExternalPathExpressions", flag.toString)
   }
 
   def setExternalVariables(extVars: Map[String, String]): Unit = {
diff --git a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/RuntimeData.scala b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/RuntimeData.scala
index 713a5b09c..827924d5d 100644
--- a/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/RuntimeData.scala
+++ b/daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/RuntimeData.scala
@@ -662,12 +662,6 @@ final class ElementRuntimeData(
   //
   // Unparser-specific arguments
   //
-  /**
-   * pass true for this if the corresponding infoset element is never
-   * accessed by way of expressions. Enables the element to be dropped
-   * from the infoset immediately after unparsing is complete.
-   */
-  @TransientParam notReferencedByExpressionsArg: => Boolean,
   @TransientParam optTruncateSpecifiedLengthStringArg: => Option[Boolean],
   @TransientParam outputValueCalcExprArg: => Option[CompiledExpression[AnyRef]],
   @TransientParam maybeBinaryFloatRepEvArg: => Maybe[BinaryFloatRepEv],
@@ -710,14 +704,11 @@ final class ElementRuntimeData(
   lazy val namedQName = namedQNameArg
   lazy val impliedRepresentation = impliedRepresentationArg
   lazy val optDefaultValue = optDefaultValueArg
-  lazy val notReferencedByExpressions = notReferencedByExpressionsArg
   lazy val optTruncateSpecifiedLengthString = optTruncateSpecifiedLengthStringArg
   lazy val outputValueCalcExpr = outputValueCalcExprArg
   lazy val maybeBinaryFloatRepEv = maybeBinaryFloatRepEvArg
   lazy val maybeByteOrderEv = maybeByteOrderEvArg
 
-  def isReferencedByExpressions = !notReferencedByExpressions
-
   override def preSerialization: Unit = {
     super.preSerialization
     parent
@@ -750,7 +741,6 @@ final class ElementRuntimeData(
     namedQName
     impliedRepresentation
     optDefaultValue
-    notReferencedByExpressions
     optTruncateSpecifiedLengthString
     outputValueCalcExpr
     maybeBinaryFloatRepEv


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services