You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by sl...@apache.org on 2022/12/05 16:53:44 UTC

[daffodil] branch main updated: Add infosetWalkerSkipMin/Max tunables

This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new 3e63abc2e Add infosetWalkerSkipMin/Max tunables
3e63abc2e is described below

commit 3e63abc2e735ba06367f6f4106ac5a64ddbf5997
Author: Steve Lawrence <sl...@apache.org>
AuthorDate: Fri Dec 2 11:49:33 2022 -0500

    Add infosetWalkerSkipMin/Max tunables
    
    Allows configuration of how often the InfosetWalker skips walk() calls.
    Setting to a value of zero disables skipping walk() calls. A value of
    zero is useful during testing to ensure the InfosetWalker isn't
    incorrectly removing infoset elements, which can lead to seemingly
    random null pointer exceptions.
    
    Add multiple tests to show that the InfosetWalker currently removes
    infoset elements that are still needed by separator logic, that this
    does not arise in a particular case with the default skip min/max
    values, and that disabling infoset removal works around the issue.
    
    DAFFODIL-2755
---
 .../resources/org/apache/daffodil/xsd/dafext.xsd   |  34 ++++++
 .../apache/daffodil/infoset/InfosetWalker.scala    |  38 ++++++-
 .../daffodil/processors/parsers/PState.scala       |   4 +-
 .../daffodil/section00/general/infosetWalker.tdml  | 117 +++++++++++++++++++++
 .../section00/general/TestInfosetWalker.scala      |  40 +++++++
 5 files changed, 227 insertions(+), 6 deletions(-)

diff --git a/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dafext.xsd b/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dafext.xsd
index 806f6a8f5..47439f44c 100644
--- a/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dafext.xsd
+++ b/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dafext.xsd
@@ -207,6 +207,40 @@
             </xs:restriction>
           </xs:simpleType>
         </xs:element>
+        <xs:element name="infosetWalkerSkipMin" default="32" minOccurs="0">
+          <xs:annotation>
+            <xs:documentation>
+              Daffodil periodically walks the internal infoset to send events to the configured
+              InfosetOutputter, skipping at least this number of walk attempts. Larger values
+              mean delayed InfosetOutputter events and more memory usage; Smaller values mean
+              more CPU usage. Set this value to zero to never skip any walk attempts. This is
+              specifically for advanced testing behavior and should not need to be changed by users.
+            </xs:documentation>
+          </xs:annotation>
+          <xs:simpleType>
+            <xs:restriction base="xs:int">
+              <xs:minInclusive value="0" />
+            </xs:restriction>
+          </xs:simpleType>
+        </xs:element>
+        <xs:element name="infosetWalkerSkipMax" default="2048" minOccurs="0">
+          <xs:annotation>
+            <xs:documentation>
+              Daffodil periodically walks the internal infoset to send events to the configured
+              InfosetOutputter. On walks where no progress is made, the number of walks to skip
+              is increased with the assumption that something is blocking it (like an
+              unresolved point of uncertainty), up to this maximum value. Higher values mean
+              less attempts are made when blocked for a long time, but with potentially more
+              delays and memory usage before InfosetOutputter events are created. This is
+              specifically for advanced testing behavior and should not need to be changed by users.
+            </xs:documentation>
+          </xs:annotation>
+          <xs:simpleType>
+            <xs:restriction base="xs:int">
+              <xs:minInclusive value="0" />
+            </xs:restriction>
+          </xs:simpleType>
+        </xs:element>
         <xs:element name="inputFileMemoryMapLowThreshold" type="xs:int" default="33554432" minOccurs="0">
           <xs:annotation>
             <xs:documentation>
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/InfosetWalker.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/InfosetWalker.scala
index 7a90d2c5e..a03eaff91 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/InfosetWalker.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/InfosetWalker.scala
@@ -57,13 +57,27 @@ object InfosetWalker {
    *   Whether or not to release infoset nodes once it is determined that they
    *   will no longer be used by Daffodil. This should usually be set to true
    *   except while debugging
+   *
+   * @param walkSkipMin
+   *
+   *   The minimum number of walk() calls to skip before actually trying to
+   *   walk the infoset. A value of zero disables skipping.
+   *
+   * @param walkSkipMax
+   *
+   *   When a walk() call fails to make any progress, it assumes we are blocked
+   *   (e.g. due to an unresolved point of uncertainty) and increases the
+   *   number of walk() calls to skip before trying again. This defines the
+   *   maximum number of skipped calls, even as that number increases.
    */
   def apply(
     root: DIElement,
     outputter: InfosetOutputter,
     walkHidden: Boolean,
     ignoreBlocks: Boolean,
-    releaseUnneededInfoset: Boolean): InfosetWalker = {
+    releaseUnneededInfoset: Boolean,
+    walkSkipMin: Int = 32,
+    walkSkipMax: Int = 2048): InfosetWalker = {
 
     // Determine the container of the root node and the index in which it
     // appears in that node
@@ -89,7 +103,9 @@ object InfosetWalker {
       outputter,
       walkHidden,
       ignoreBlocks,
-      releaseUnneededInfoset)
+      releaseUnneededInfoset,
+      walkSkipMin,
+      walkSkipMax)
   }
 
 }
@@ -141,6 +157,18 @@ object InfosetWalker {
  *   Whether or not to remove infoset nodes once it is determined that they
  *   will no longer be used by Daffodil. This should usually be set to true
  *   except while debugging
+ *
+ * @param walkSkipMin
+ *
+ *   The minimum number of walk() calls to skip before actually trying to
+ *   remove unneeded infoset elements.
+ *
+ * @param walkSkipMax
+ *
+ *   When a walk() call fails to remove any infoset elements, it assumes we
+ *   being blocked for removal (e.g. due to an unresolved point of uncertainty)
+ *   and increases the number of walk() calls to skip before trying again. This
+ *   defines the maximum number of skiped calls, even as this number increases.
  */
 class InfosetWalker private (
   startingContainerNode: DINode,
@@ -148,7 +176,9 @@ class InfosetWalker private (
   val outputter: InfosetOutputter,
   walkHidden: Boolean,
   ignoreBlocks: Boolean,
-  releaseUnneededInfoset: Boolean) {
+  releaseUnneededInfoset: Boolean,
+  walkSkipMin: Int,
+  walkSkipMax: Int) {
 
   /**
    * These two pieces of mutable state are all that is needed to keep track of
@@ -216,8 +246,6 @@ class InfosetWalker private (
    * steps back down to the min value so that we try taking steps and stream
    * events more frequently.
    */
-  private val walkSkipMin = 32
-  private val walkSkipMax = 2048
   private var walkSkipSize = walkSkipMin
   private var walkSkipRemaining = walkSkipSize
 
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
index 47de2179d..05e7468e4 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
@@ -694,7 +694,9 @@ object PState {
       output,
       walkHidden = false,
       ignoreBlocks = false,
-      releaseUnneededInfoset = !areDebugging && tunables.releaseUnneededInfoset)
+      releaseUnneededInfoset = !areDebugging && tunables.releaseUnneededInfoset,
+      walkSkipMin = tunables.infosetWalkerSkipMin,
+      walkSkipMax = tunables.infosetWalkerSkipMax)
 
     dis.cst.setPriorBitOrder(root.defaultBitOrder)
     val newState = new PState(
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/infosetWalker.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/infosetWalker.tdml
new file mode 100644
index 000000000..26cab6eab
--- /dev/null
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/infosetWalker.tdml
@@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<tdml:testSuite suiteName="InfosetWalker" description="Section 00 - InfosetWalker tests"
+  xmlns:tdml="http://www.ibm.com/xmlns/dfdl/testData"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
+  xmlns:xs="http://www.w3.org/2001/XMLSchema"
+  xmlns:ex="http://example.com"
+  xmlns:daf="urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:ext">
+
+  <!--
+    Disable skipping of walk() calls. If we are incorrectly releasing infoset
+    elements, this will likely cause a null expception
+  -->
+  <tdml:defineConfig name="cfg_infosetWalker_01">
+    <daf:tunables>
+      <daf:infosetWalkerSkipMin>0</daf:infosetWalkerSkipMin>
+    </daf:tunables>
+  </tdml:defineConfig>
+
+  <!--
+    Disable skipping of walk() calls, but do not release unneeded infosets.
+    Shows that setting releaseUnneededInfoset can avoid issues related to the
+    InfosetWalker incorrecly releasing elements.
+  -->
+  <tdml:defineConfig name="cfg_infosetWalker_02">
+    <daf:tunables>
+      <daf:infosetWalkerSkipMin>0</daf:infosetWalkerSkipMin>
+      <daf:releaseUnneededInfoset>false</daf:releaseUnneededInfoset>
+    </daf:tunables>
+  </tdml:defineConfig>
+
+
+  <tdml:defineSchema name="schema_01" elementFormDefault="unqualified">
+    <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />
+
+    <dfdl:format ref="ex:GeneralFormat" lengthKind="delimited" />
+
+    <xs:element name="root_01">
+      <xs:complexType>
+        <xs:sequence dfdl:initiator="|" dfdl:terminator="|" dfdl:separator=";">
+          <xs:element name="first" type="xs:string" />
+          <xs:sequence dfdl:separator=";" dfdl:separatorPosition="postfix">
+            <xs:element name="field" type="xs:string" minOccurs="0" maxOccurs="unbounded" />
+          </xs:sequence>
+        </xs:sequence>
+      </xs:complexType>
+    </xs:element>
+
+  </tdml:defineSchema>
+
+  <tdml:parserTestCase name="infosetWalker_01" model="schema_01">
+    <tdml:document>
+      <tdml:documentPart type="text">|header;body1;body2;body3;|</tdml:documentPart>
+    </tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <ex:root_01 xmlns:ex="http://example.com">
+          <first>header</first>
+          <field>body1</field>
+          <field>body2</field>
+          <field>body3</field>
+        </ex:root_01>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
+  <tdml:parserTestCase name="infosetWalker_02" model="schema_01" config="cfg_infosetWalker_01">
+    <tdml:document>
+      <tdml:documentPart type="text">|header;body1;body2;body3;|</tdml:documentPart>
+    </tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <ex:root_01 xmlns:ex="http://example.com">
+          <first>header</first>
+          <field>body1</field>
+          <field>body2</field>
+          <field>body3</field>
+        </ex:root_01>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
+  <tdml:parserTestCase name="infosetWalker_03" model="schema_01" config="cfg_infosetWalker_02">
+    <tdml:document>
+      <tdml:documentPart type="text">|header;body1;body2;body3;|</tdml:documentPart>
+    </tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <ex:root_01 xmlns:ex="http://example.com">
+          <first>header</first>
+          <field>body1</field>
+          <field>body2</field>
+          <field>body3</field>
+        </ex:root_01>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
+
+</tdml:testSuite>
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestInfosetWalker.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestInfosetWalker.scala
new file mode 100644
index 000000000..d4d1dd4a5
--- /dev/null
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestInfosetWalker.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.section00.general
+
+import org.apache.daffodil.tdml.Runner
+import org.junit.AfterClass
+import org.junit.Test
+
+object TestInfosetWalker {
+  val testDir = "/org/apache/daffodil/section00/general/"
+  val runner2 = Runner(testDir, "infosetWalker.tdml")
+
+  @AfterClass def shutDown(): Unit = {
+    runner2.reset
+  }
+}
+
+class TestInfosetWalker {
+  import TestInfosetWalker._
+
+  @Test def test_infosetWalker_01() = { runner2.runOneTest("infosetWalker_01") }
+  // DAFFODIL-2755
+  /*@Test*/ def test_infosetWalker_02() = { runner2.runOneTest("infosetWalker_02") }
+  @Test def test_infosetWalker_03() = { runner2.runOneTest("infosetWalker_03") }
+}