You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by GitBox <gi...@apache.org> on 2020/09/14 15:39:17 UTC

[GitHub] [incubator-daffodil] mbeckerle commented on a change in pull request #419: Reduce performance degradation caused by commit b0f59ef7c8

mbeckerle commented on a change in pull request #419:
URL: https://github.com/apache/incubator-daffodil/pull/419#discussion_r488001832



##########
File path: daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ElementUnparser.scala
##########
@@ -477,7 +477,7 @@ sealed trait RegularElementUnparserStartEndStrategy
       }
       val cur = state.currentInfosetNode
       if (cur.isComplex)
-        cur.asComplex.setFinal()
+        cur.isFinal = true

Review comment:
       So the issue here is the finalizable mixin, resulting in virtual functions, when just a isFinal data member is not done by way of mixin? 
   
   So the guidance I guess is "fewer 'thin' mixins" in the runtime? You have to really need the mixin for it to be worth the overhead?
   
   I''m trying to figure out what lessons learned, and what guidance we put in the coding standards notes, and try to remember to suggest during code reviews. 

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/InfosetImpl.scala
##########
@@ -1458,6 +1454,12 @@ sealed class DIComplex(override val erd: ElementRuntimeData)
 
   override def children = childNodes.toStream
 
+  override def maybeLastChild: Maybe[DINode] = {
+    val len = childNodes.length
+    if (len > 0) Maybe(childNodes(len - 1))

Review comment:
       Add comment here that if the childNode has been removed (set to null), then Maybe(childNodes(len - 1 )) will be Maybe(null) which is Nope, and that this is expected behavior. 

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/InfosetWalker.scala
##########
@@ -321,132 +390,114 @@ class InfosetWalker private (
     }
   }
 
-  private trait InfosetWalkerStep {
-    /**
-     * Output events associated with this step kind, and mutate the
-     * InfosetWalker state to walk to the next node in the infoset
-     */
-    def step(): Unit
-
-    final def moveToFirstChild(newContainer: DINode): Unit = {
-      containerNode = newContainer
-      containerIndexStack.push(0)
-    }
+  @inline
+  private def moveToFirstChild(newContainer: DINode): Unit = {
+    containerNodeStack.push(newContainer)
+    containerIndexStack.push(0)
+  }
 
-    final def moveToContainer(): Unit = {
-      containerNode = containerNode.containerNode
-      containerIndexStack.pop
-    }
+  @inline
+  private def moveToContainer(): Unit = {
+    containerNodeStack.pop
+    containerIndexStack.pop
+  }
 
-    final def moveToNextSibling(): Unit = {
-      containerIndexStack.push(containerIndexStack.pop + 1)
-    }
+  @inline
+  private def moveToNextSibling(): Unit = {
+    val top = containerIndexStack.top
+    containerIndexStack.setTop(top + 1)
   }
 
-  private object InfosetWalkerStepStart extends InfosetWalkerStep {
-    /**
-     * Start the document. Note that because the top of container index is
-     * initialized to one less that the starting index, we also call
-     * moveToNextSibling to increment the starting index to the correct
-     * position.
-     */
-    override def step(): Unit = {
-      outputter.startDocument()
-      moveToNextSibling()
-    }
+  /**
+   * Start the document. Note that because the top of container index is
+   * initialized to one less that the starting index, we also call
+   * moveToNextSibling to increment the starting index to the correct
+   * position.
+   */
+  private def infosetWalkerStepStart(): Unit = {
+    outputter.startDocument()
+    moveToNextSibling()
   }
 
-  private object InfosetWalkerStepEnd extends InfosetWalkerStep {
-    /**
-     * End document and clean up state. Setting finished to true causes
-     * the next step to be None, walk() will return, and the caller
-     * should not call walk() again because it is finished.
-     */
-    override def step(): Unit = {
-      outputter.endDocument()
-      containerNode = null
-      containerIndexStack = null
-      finished = true
-    }
+  /**
+   * End document and clean up state. Setting finished to true causes
+   * the next step to be None, walk() will return, and the caller
+   * should not call walk() again because it is finished.
+   */
+  private def infosetWalkerStepEnd(): Unit = {
+    outputter.endDocument()
+    containerNodeStack = null
+    containerIndexStack = null
+    finished = true
   }
 
-  private object InfosetWalkerStepMove extends InfosetWalkerStep {
-    /**
-     * Output start/end events for DIComplex/DIArray/DISimple, and mutate state
-     * so we are looking at the next node in the infoset.
-     */
-    def step(): Unit = {
-      val children = containerNode.contents
-      val childIndex = containerIndexStack.top
-
-      if (childIndex < children.size) {
-        // This block means we need to create a start event for the element in
-        // the children array at childIndex. We then need to mutate the walker
-        // state so the next call to step() is for either the first child of
-        // this element or the next sibling.
-
-        children(childIndex) match {
-          case s: DISimple => {
-            if (!s.isHidden || walkHidden) {
-              outputter.startSimple(s)
-              outputter.endSimple(s)
-            }
-            if (removeUnneeded) {
-              // now we can remove this simple element to free up memory
-              containerNode.freeChildIfNoLongerNeeded(containerIndexStack.top)
-            }
-            moveToNextSibling()
-          }
-          case c: DIComplex => {
-            if (!c.isHidden || walkHidden) {
-              outputter.startComplex(c)
-              moveToFirstChild(c)
-            } else {
-              moveToNextSibling()
-            }
-          }
-          case a: DIArray => {
-            if (!a.isHidden || walkHidden) {
-              outputter.startArray(a)
-              moveToFirstChild(a)
-            } else {
-              moveToNextSibling()
-            }
-          }
-        }
+  /**
+   * Output start/end events for DIComplex/DIArray/DISimple, and mutate state
+   * so we are looking at the next node in the infoset.
+   */
+  private def infosetWalkerStepMove(containerNode: DINode, containerIndex: Int): Unit = {
+    val children = containerNode.contents
 
-      } else {
-        // This else block means that we incremented the index stack past the
-        // number of children in this container (must be a DIComplex/DIDocument
-        // or DIArray), which means we have created events for all if its
-        // children. So we must now create the appropriate end event for the
-        // container and then mutate the state so that we are looking at its
-        // next sibling. Note that if there is no next sibling, that will be
-        // found the next time step() is called (because we incremented past
-        // this element) and we will fall into this block again, call the end
-        // function again, and repeat the process.
-
-        // create appropriate end event
-        containerNode match {
-          case a: DIArray => {
-            outputter.endArray(a)
-          }
-          case c: DIComplex => {
-            outputter.endComplex(c)
-          }
-          case _ => Assert.impossible()
-        }
+    if (containerIndex < children.size) {
+      // This block means we need to create a start event for the element in
+      // the children array at containerIndex. We then need to mutate the walker
+      // state so the next call to step() is for either the first child of
+      // this element or the next sibling.

Review comment:
       Don't understand "or the next sibling". What does that mean, or rather, why would that be the case?

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/SequenceParserBases.scala
##########
@@ -203,31 +201,27 @@ abstract class SequenceParserBase(
               pstate.mpstate.moveOverOneGroupIndexOnly()
             }
 
-            val newLastChildNode = pstate.infoset.contents.lastOption
-            if (newLastChildNode != savedLastChildNode) {
-              // We have added at least one child to to this complex during
+            val newLastChildNode = pstate.infoset.maybeLastChild
+            if (newLastChildNode.isDefined) {
+              // We have potentially added a child to to this complex during
               // this array loop.
               //
               // If the new child is a DIArray, we know this DIArray has at
               // least one element, but we don't know if we actually added a
               // new one in this loop or not. So just get the last array
-              // element and set it as final anyways. Note that we need a null
-              // check because if we didn't add an array element this go
-              // around, the last element may have already been walked and
-              // freed and so could now be null.
+              // element and set it as final anyways.
               //
               // If it's not a DIArray, that means it's just an optional
               // simple/complex and that will get set final below where all
               // other non-array elements get set as final.
-              newLastChildNode.get match {
-                case a: DIArray => {
-                  val lastArrayElem = a.contents.last
-                  if (lastArrayElem != null) {
-                    lastArrayElem.setFinal()
-                    pstate.walker.walk()
-                  }
+              val lastChild = newLastChildNode.get
+              if (!lastChild.isSimple && !lastChild.isComplex) {

Review comment:
       Two calls to isSimple and isComplex? Let's just add an isArray method to go with the others. It's again disappointing that x.isInstanceOf[DIArray] isn't every bit as fast as having an isArray boolean. If isSimple and isComplex are virtual functions defined on DINode and DISimple and DIComplex, then I fail to understand why they wouldn't be pretty much identical in preformance to x.isInstanceOf, why would this be? 
   
   This really suggests we should have an Int member which we initialize to an integer enum value that identifies simple/complex/array, and then we can have inline final def isSimple = nodeKindEnum eq SIMPLE and similar for isComplex and isArray. These would then not be virtual functions at all. But I don't like having to play that sort of tricks. We're just reinventing virtual dispatch, doing so in a less general way that we know will just turn into an integer equality test. 

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/InfosetWalker.scala
##########
@@ -201,26 +229,60 @@ class InfosetWalker private (
    * possible.
    *
    * It is an error to call walk() if isFinished returns true
+   *
+   * For performance reasons, sometimes a call to walk will intentionally not
+   * try to take any steps even if it is possible. This lets us essentially
+   * batch infoset events and improve performance. If this is the last time
+   * walk() will be called, the lastWalk parameter should be set to true, which
+   * will cause walk() to not skip any steps.
    */
-  def walk(): Unit = {
+  def walk(lastWalk: Boolean = false): Unit = {
     Assert.usage(!finished)
 
-    var maybeNextStep: Maybe[InfosetWalkerStep] = Nope
-    while ({
-      maybeNextStep = getNextStep()
-      maybeNextStep.isDefined
-    }) {
-      maybeNextStep.get.step()
+    if (walkSkipRemaining > 0 && !lastWalk) {

Review comment:
       I don't understand why this improves performance. 
   
   Every node has to eventually be walked. Are we cutting down the number of times walk is called only to return without being able to make progress?  So this design is effectively polling and what we're doing is just to cut down on the polling frequency?
   
   If so, then it suggests to me we should only be calling walk() when a there is no outstanding PoU and only after N elements are added to the infoset, for some fairly large N (like 100).  But perhaps this algorithm achieves that same thing?

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/InfosetWalker.scala
##########
@@ -178,7 +179,11 @@ class InfosetWalker private (
    * we increment this value on the top of the stack so that the starting index
    * is correct.
    */
-  private var containerNode: DINode = startingContainerNode
+  private var containerNodeStack: MStackOf[DINode] = {

Review comment:
       Really? Maintaining this whole separate stack is faster than just doing a test to see if enclosing parent is a DIArray and indirecting past that to the enclosing DIComplex if so? That's kind of disappointing.   Maybe we should just have a DINode.nodeKind : Int, where 0 is DISimple, 1 is DIComplex, 2 is DIArray, etc. and then containerNode would just be { if (nodeKind == 2) parent.parent else parent }

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/InfosetWalker.scala
##########
@@ -321,132 +390,114 @@ class InfosetWalker private (
     }
   }
 
-  private trait InfosetWalkerStep {
-    /**
-     * Output events associated with this step kind, and mutate the
-     * InfosetWalker state to walk to the next node in the infoset
-     */
-    def step(): Unit
-
-    final def moveToFirstChild(newContainer: DINode): Unit = {
-      containerNode = newContainer
-      containerIndexStack.push(0)
-    }
+  @inline
+  private def moveToFirstChild(newContainer: DINode): Unit = {
+    containerNodeStack.push(newContainer)
+    containerIndexStack.push(0)
+  }
 
-    final def moveToContainer(): Unit = {
-      containerNode = containerNode.containerNode
-      containerIndexStack.pop
-    }
+  @inline
+  private def moveToContainer(): Unit = {
+    containerNodeStack.pop
+    containerIndexStack.pop
+  }
 
-    final def moveToNextSibling(): Unit = {
-      containerIndexStack.push(containerIndexStack.pop + 1)
-    }
+  @inline
+  private def moveToNextSibling(): Unit = {
+    val top = containerIndexStack.top
+    containerIndexStack.setTop(top + 1)
   }
 
-  private object InfosetWalkerStepStart extends InfosetWalkerStep {
-    /**
-     * Start the document. Note that because the top of container index is
-     * initialized to one less that the starting index, we also call
-     * moveToNextSibling to increment the starting index to the correct
-     * position.
-     */
-    override def step(): Unit = {
-      outputter.startDocument()
-      moveToNextSibling()
-    }
+  /**
+   * Start the document. Note that because the top of container index is
+   * initialized to one less that the starting index, we also call
+   * moveToNextSibling to increment the starting index to the correct
+   * position.
+   */
+  private def infosetWalkerStepStart(): Unit = {
+    outputter.startDocument()
+    moveToNextSibling()
   }
 
-  private object InfosetWalkerStepEnd extends InfosetWalkerStep {
-    /**
-     * End document and clean up state. Setting finished to true causes
-     * the next step to be None, walk() will return, and the caller
-     * should not call walk() again because it is finished.
-     */
-    override def step(): Unit = {
-      outputter.endDocument()
-      containerNode = null
-      containerIndexStack = null
-      finished = true
-    }
+  /**
+   * End document and clean up state. Setting finished to true causes
+   * the next step to be None, walk() will return, and the caller
+   * should not call walk() again because it is finished.
+   */
+  private def infosetWalkerStepEnd(): Unit = {
+    outputter.endDocument()
+    containerNodeStack = null
+    containerIndexStack = null
+    finished = true
   }
 
-  private object InfosetWalkerStepMove extends InfosetWalkerStep {
-    /**
-     * Output start/end events for DIComplex/DIArray/DISimple, and mutate state
-     * so we are looking at the next node in the infoset.
-     */
-    def step(): Unit = {
-      val children = containerNode.contents
-      val childIndex = containerIndexStack.top
-
-      if (childIndex < children.size) {
-        // This block means we need to create a start event for the element in
-        // the children array at childIndex. We then need to mutate the walker
-        // state so the next call to step() is for either the first child of
-        // this element or the next sibling.
-
-        children(childIndex) match {
-          case s: DISimple => {
-            if (!s.isHidden || walkHidden) {
-              outputter.startSimple(s)
-              outputter.endSimple(s)
-            }
-            if (removeUnneeded) {
-              // now we can remove this simple element to free up memory
-              containerNode.freeChildIfNoLongerNeeded(containerIndexStack.top)
-            }
-            moveToNextSibling()
-          }
-          case c: DIComplex => {
-            if (!c.isHidden || walkHidden) {
-              outputter.startComplex(c)
-              moveToFirstChild(c)
-            } else {
-              moveToNextSibling()
-            }
-          }
-          case a: DIArray => {
-            if (!a.isHidden || walkHidden) {
-              outputter.startArray(a)
-              moveToFirstChild(a)
-            } else {
-              moveToNextSibling()
-            }
-          }
-        }
+  /**
+   * Output start/end events for DIComplex/DIArray/DISimple, and mutate state
+   * so we are looking at the next node in the infoset.
+   */
+  private def infosetWalkerStepMove(containerNode: DINode, containerIndex: Int): Unit = {
+    val children = containerNode.contents
 
-      } else {
-        // This else block means that we incremented the index stack past the
-        // number of children in this container (must be a DIComplex/DIDocument
-        // or DIArray), which means we have created events for all if its
-        // children. So we must now create the appropriate end event for the
-        // container and then mutate the state so that we are looking at its
-        // next sibling. Note that if there is no next sibling, that will be
-        // found the next time step() is called (because we incremented past
-        // this element) and we will fall into this block again, call the end
-        // function again, and repeat the process.
-
-        // create appropriate end event
-        containerNode match {
-          case a: DIArray => {
-            outputter.endArray(a)
-          }
-          case c: DIComplex => {
-            outputter.endComplex(c)
-          }
-          case _ => Assert.impossible()
-        }
+    if (containerIndex < children.size) {
+      // This block means we need to create a start event for the element in
+      // the children array at containerIndex. We then need to mutate the walker
+      // state so the next call to step() is for either the first child of
+      // this element or the next sibling.
 
-        // we've ended this array/complex associated with the container, so we
-        // now want to move to the parent container, potentially free up the
-        // memory associated with this container, and then move to the next
-        // sibling of this container
-        moveToContainer()
+      val child = children(containerIndex)
+
+      if (child.isSimple) {
+        if (!child.isHidden || walkHidden) {
+          val simple = child.asInstanceOf[DISimple]
+          outputter.startSimple(simple)

Review comment:
       We could combine these two virtual functions into one, or provide a combined variant startEndSimple(simple). That would cut two virtual function calls down to one. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org