You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by sl...@apache.org on 2021/08/23 15:20:23 UTC

[daffodil] branch master updated: Fix parser serialization stack overflow regression

This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/master by this push:
     new 5db7959  Fix parser serialization stack overflow regression
5db7959 is described below

commit 5db795978b585643907e0617942c0dcfd1d69f96
Author: Steve Lawrence <sl...@apache.org>
AuthorDate: Thu Aug 19 09:31:08 2021 -0400

    Fix parser serialization stack overflow regression
    
    Commit 126789891ed5e834037198e5aa59e1f6b34dcb04 added a fix to prevent
    stack overflows during serialization by making parent backpointers
    transient and manually serializing them, which avoids serialization of a
    highly connected/cyclic graph that requires a large stack depth.
    
    Commit eb603e7f4a342e1a63d1e07f59c714cf531724bc removed the need for
    most transient parameters, but accidentally removed the transient
    annotations for these parents needed to fix the stack overflow issue.
    
    This adds back the @transient annotations needed to prevent stack
    overflows, as well as additional comments to hopefully ensure we don't
    accidentally remove these transients in the future.
    
    DAFFODIL-2551
---
 .../apache/daffodil/dsom/CompiledExpression1.scala | 45 +++++++++++++---------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
index 3eefc1c..5868773 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
@@ -34,6 +34,7 @@ import org.apache.daffodil.util.Delay
 import org.apache.daffodil.util.Maybe
 import org.apache.daffodil.util.MaybeULong
 import org.apache.daffodil.util.PreSerialization
+import org.apache.daffodil.util.TransientParam
 import org.apache.daffodil.xml.NS
 import org.apache.daffodil.xml.NamedQName
 import org.apache.daffodil.xml.NoNamespace
@@ -208,7 +209,10 @@ final case class ConstantExpression[+T <: AnyRef](
  */
 class DPathCompileInfo(
 
-  parentsDelay: Delay[Seq[DPathCompileInfo]],
+  // parentsDelay is a transient due to serialization order issues causing
+  // stack overflows. There is no delay/lazy/by-name involvement here. See the
+  // lazy val parents scaladoc for a detailed explanation.
+  @TransientParam parentsDelay: Delay[Seq[DPathCompileInfo]],
   val variableMap: VariableMap,
   val namespaces: scala.xml.NamespaceBinding,
   val path: String,
@@ -224,22 +228,24 @@ class DPathCompileInfo(
   }
 
   /**
-   * This "parents" val is a backpointer to all DPathCompileInfo's that
-   * reference this DPathCompileInfo. The problem with this is that when
-   * elements are shared, these backpointers create a highly connected graph
-   * that requires a large stack to serialize using the default java
-   * serialization as it jumps around parents and children. To avoid this large
-   * stack requirement, we make the parents backpointer transient. This
-   * prevents jumping back up to parents during serialization and results in
-   * only needing a stack depth relative to the schema depth. Once all that
-   * serialization is completed and all the DPathCompileInfo's are serialized,
-   * we then manually traverse all the DPathCompileInfo's again and serialize
-   * the parent sequences (via the serailizeParents method). Because all the
-   * DPathCompileInfo's are already serialized, this just serializes the
-   * Sequence objects and the stack depth is again relative to the schema
-   * depth.
+   * This "parents" and "parentsDelay" variables are backpointers to all
+   * DPathCompileInfo's that reference this DPathCompileInfo. The problem with
+   * this is that when elements are shared, these backpointers create a highly
+   * connected and cyclic graph that requires a large stack to serialize using
+   * default Java serialization as it jumps around parents and children. To
+   * avoid this large stack requirement, we mark the parents and parentsDelay
+   * backpointers as transient. This prevents Java from serializing these
+   * backpointers at all, turning this into serialization of a directed
+   * acycling graph, which only requires a stack depth relative to the schema
+   * depth. Once all that default serialization is completed and all the
+   * DPathCompileInfo's are serialized, we then manually traverse all the
+   * DPathCompileInfo's again and serialize the parent sequences (via the
+   * serailizeParents method). Because all the DPathCompileInfo's are already
+   * serialized, the seralizeParents is really just serializing a Seq with
+   * pointers to objects that have already been serialized, which avoids the
+   * serialization cycles.
    */
-  lazy val parents = parentsDelay.value
+  @transient lazy val parents = parentsDelay.value
 
   def serializeParents(oos: java.io.ObjectOutputStream): Unit = {
     oos.writeObject(parents)
@@ -335,9 +341,10 @@ class DPathCompileInfo(
  */
 class DPathElementCompileInfo
 (
-  parentsDelay: Delay[Seq[DPathElementCompileInfo]],
-  // parentsArg is a transient due to serialization order issues,
-  // there is no delay/lazy/by-name involvement here.
+  // parentsDelay is a transient due to serialization order issues causing
+  // stack overflows. There is no delay/lazy/by-name involvement here. See the
+  // lazy val parents scaladoc for a detailed explanation.
+  @TransientParam parentsDelay: Delay[Seq[DPathElementCompileInfo]],
 
   variableMap: VariableMap,
   // This next arg must be a Delay as we're creating a circular