You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by ja...@apache.org on 2018/07/30 13:28:24 UTC

[incubator-daffodil] branch master updated: Allow same character for escapeBlockEnd and escapeEscapeCharacter

This is an automated email from the ASF dual-hosted git repository.

jadams pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git


The following commit(s) were added to refs/heads/master by this push:
     new a83b377  Allow same character for escapeBlockEnd and escapeEscapeCharacter
a83b377 is described below

commit a83b3772bc5d6eccb127babc7278ef797db29c75
Author: Josh Adams <ja...@tresys.com>
AuthorDate: Thu Jul 26 15:52:42 2018 -0400

    Allow same character for escapeBlockEnd and escapeEscapeCharacter
    
    This commit fixes the issue where the escapeBlockEnd and
    escapeEscapeCharacter are the same character. This is apparently a
    common thing in how Excel escapes things in CSV.
    
    This is not a complete fix as there are still issues in cases like
    escapeBlockEnd="++" and escapeEscapeCharacter='+', but there does not
    seem to be an easy way to deal with these extreme edge cases cleanly in
    the existing code. This issue has been documented in DAFFODIL-1972
    
    DAFFODIL-1923
---
 .../org/apache/daffodil/processors/dfa/Rules.scala | 17 ++++++---
 .../section07/escapeScheme/escapeScheme.tdml       | 44 +++++++++++++++++++++-
 .../escapeScheme/TestEscapeSchemeDebug.scala       |  6 +--
 .../section07/escapeScheme/TestEscapeScheme.scala  |  3 ++
 4 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/Rules.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/Rules.scala
index 967be5e..c159d2c 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/Rules.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/Rules.scala
@@ -214,9 +214,17 @@ class StartStateEscapeBlock(states: => ArrayBuffer[State], val blockEnd: DFADeli
   val stateName: String = "StartState"
 
   val rules = ArrayBuffer(
-    Rule { (r: Registers) => couldBeFirstChar(r.data0, blockEnd) } { (r: Registers) => r.status = StateKind.Paused },
-    Rule { (r: Registers) => { EEC.isDefined && (r.data0 == EEC.get) } } { (r: Registers) => r.nextState = EECState },
-    Rule { (r: Registers) => { r.data0 == DFA.EndOfDataChar } } { (r: Registers) => r.nextState = DFA.EndOfData },
+    Rule { (r: Registers) => couldBeFirstChar(r.data0, blockEnd)} { (r: Registers) =>
+      {
+        if (EEC.isDefined && (blockEnd.lookingFor.equals(EEC.get.toString))) { // EEC == escapeBlockEnd
+          r.nextState = EECState
+        } else {
+          r.status = StateKind.Paused
+        }
+      }
+    },
+    Rule { (r: Registers) => { EEC.isDefined && (r.data0 == EEC.get) } } { (r: Registers) => { r.nextState = EECState }},
+    Rule { (r: Registers) => { r.data0 == DFA.EndOfDataChar } } { (r: Registers) => { r.nextState = DFA.EndOfData }},
     Rule { (r: Registers) => true } { (r: Registers) =>
       {
         r.appendToField(r.data0)
@@ -224,7 +232,6 @@ class StartStateEscapeBlock(states: => ArrayBuffer[State], val blockEnd: DFADeli
         r.nextState = StartState
       }
     })
-
 }
 
 class StartStateEscapeChar(states: => ArrayBuffer[State], val EEC: MaybeChar, val EC: Char, val stateNum: Int)
@@ -488,7 +495,7 @@ class EECStateBlock(states: => ArrayBuffer[State], blockEnd: DFADelimiter, val E
     //
     // We've already encountered EEC as data0 here
     //
-    Rule { (r: Registers) => couldBeFirstChar(r.data0, blockEnd) } {
+    Rule { (r: Registers) => couldBeFirstChar(r.data0, blockEnd) && !couldBeFirstChar(r.data1, blockEnd) } {
       (r: Registers) => r.status = StateKind.Paused //PTERMState
     },
     Rule { (r: Registers) => couldBeFirstChar(r.data1, blockEnd) } { (r: Registers) =>
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
index 613b7ea..7609628 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
@@ -603,5 +603,47 @@
       </dfdlInfoset>
     </infoset>
   </parserTestCase>
- 
+
+  <!--
+       Test Name: escBlkMultipleEEC
+          Schema: eBlkMultipleEEC
+            Root: record
+         Purpose: This test demonstrates that escapeSheme with escapeBlock where the escapeBlockEnd is the same as 2 escapeEscapeCharacters (DAFFODIL-1923).
+  -->
+  <defineSchema name="eBlkMultipleEEC">
+    <dfdl:format ref="tns:GeneralFormat" lengthKind="delimited" />
+
+    <dfdl:defineEscapeScheme name="eBlkMultipleEEC">
+      <dfdl:escapeScheme escapeBlockStart="&quot;"
+        escapeBlockEnd="&quot;&quote;" escapeKind="escapeBlock"
+        escapeEscapeCharacter="&quot;"  extraEscapedCharacters="" generateEscapeBlock="whenNeeded"/>
+    </dfdl:defineEscapeScheme>
+
+    <xs:element name="record">
+      <xs:complexType>
+        <xs:sequence dfdl:separator="," >
+          <xs:element name="item" type="xs:string" maxOccurs="unbounded"
+	    dfdl:escapeSchemeRef="tns:eBlkMultipleEEC" />
+        </xs:sequence>
+      </xs:complexType>
+    </xs:element>
+  </defineSchema>
+
+  <parserTestCase name="escBlkMultipleEEC" model="eBlkMultipleEEC"
+    description="Section 7 defineEscapeScheme - DFDL-7-079R" root="record" roundTrip="true">
+    <document>1,"Column """Number""" Two"",3,4,5</document>
+    <infoset>
+      <dfdlInfoset>
+    	<tns:record>
+    	  <tns:item>1</tns:item>
+    	  <tns:item>Column &quot;Number&quot; Two</tns:item>
+    	  <tns:item>3</tns:item>
+    	  <tns:item>4</tns:item>
+    	  <tns:item>5</tns:item>
+  	</tns:record>
+      </dfdlInfoset>
+    </infoset>
+  </parserTestCase>
+
+
 </testSuite>
diff --git a/daffodil-test/src/test/scala-debug/org/apache/daffodil/section07/escapeScheme/TestEscapeSchemeDebug.scala b/daffodil-test/src/test/scala-debug/org/apache/daffodil/section07/escapeScheme/TestEscapeSchemeDebug.scala
index 9eb8117..7cf4b27 100644
--- a/daffodil-test/src/test/scala-debug/org/apache/daffodil/section07/escapeScheme/TestEscapeSchemeDebug.scala
+++ b/daffodil-test/src/test/scala-debug/org/apache/daffodil/section07/escapeScheme/TestEscapeSchemeDebug.scala
@@ -49,9 +49,5 @@ class TestEscapeSchemeDebug {
   //DFDL-961
   @Test def test_scenario3_11_postfix() { runner2.runOneTest("scenario3_11_postfix") }
 
-
-  //DAFFODIL-1923
-  @Test def test_escBlkAllQuotes() { runner.runOneTest("escBlkAllQuotes") }
-  @Test def test_escBlkEndSame() { runner.runOneTest("escBlkEndSame") }
-
+  @Test def test_escBlkMultipleEEC() { runner.runOneTest("escBlkMultipleEEC") } // DAFFODIL-1972
 }
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
index 745eced..0e978f9 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
@@ -140,4 +140,7 @@ class TestEscapeScheme {
   @Test def test_scenario4_12_req_term() { runner2.runOneTest("scenario4_12_req_term") }
 
   @Test def test_scenario5_1() { runner2.runOneTest("scenario5_1") }
+
+  @Test def test_escBlkAllQuotes() { runner.runOneTest("escBlkAllQuotes") }
+  @Test def test_escBlkEndSame() { runner.runOneTest("escBlkEndSame") }
 }