You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by ja...@apache.org on 2018/07/30 13:28:24 UTC
[incubator-daffodil] branch master updated: Allow same character
for escapeBlockEnd and escapeEscapeCharacter
This is an automated email from the ASF dual-hosted git repository.
jadams pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git
The following commit(s) were added to refs/heads/master by this push:
new a83b377 Allow same character for escapeBlockEnd and escapeEscapeCharacter
a83b377 is described below
commit a83b3772bc5d6eccb127babc7278ef797db29c75
Author: Josh Adams <ja...@tresys.com>
AuthorDate: Thu Jul 26 15:52:42 2018 -0400
Allow same character for escapeBlockEnd and escapeEscapeCharacter
This commit fixes the issue where the escapeBlockEnd and
escapeEscapeCharacter are the same character. This is apparently a
common thing in how Excel escapes things in CSV.
This is not a complete fix as there are still issues in cases like
escapeBlockEnd="++" and escapeEscapeCharacter='+', but there does not
seem to be an easy way to deal with these extreme edge cases cleanly in
the existing code. This issue has been documented in DAFFODIL-1972
DAFFODIL-1923
---
.../org/apache/daffodil/processors/dfa/Rules.scala | 17 ++++++---
.../section07/escapeScheme/escapeScheme.tdml | 44 +++++++++++++++++++++-
.../escapeScheme/TestEscapeSchemeDebug.scala | 6 +--
.../section07/escapeScheme/TestEscapeScheme.scala | 3 ++
4 files changed, 59 insertions(+), 11 deletions(-)
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/Rules.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/Rules.scala
index 967be5e..c159d2c 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/Rules.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/Rules.scala
@@ -214,9 +214,17 @@ class StartStateEscapeBlock(states: => ArrayBuffer[State], val blockEnd: DFADeli
val stateName: String = "StartState"
val rules = ArrayBuffer(
- Rule { (r: Registers) => couldBeFirstChar(r.data0, blockEnd) } { (r: Registers) => r.status = StateKind.Paused },
- Rule { (r: Registers) => { EEC.isDefined && (r.data0 == EEC.get) } } { (r: Registers) => r.nextState = EECState },
- Rule { (r: Registers) => { r.data0 == DFA.EndOfDataChar } } { (r: Registers) => r.nextState = DFA.EndOfData },
+ Rule { (r: Registers) => couldBeFirstChar(r.data0, blockEnd)} { (r: Registers) =>
+ {
+ if (EEC.isDefined && (blockEnd.lookingFor.equals(EEC.get.toString))) { // EEC == escapeBlockEnd
+ r.nextState = EECState
+ } else {
+ r.status = StateKind.Paused
+ }
+ }
+ },
+ Rule { (r: Registers) => { EEC.isDefined && (r.data0 == EEC.get) } } { (r: Registers) => { r.nextState = EECState }},
+ Rule { (r: Registers) => { r.data0 == DFA.EndOfDataChar } } { (r: Registers) => { r.nextState = DFA.EndOfData }},
Rule { (r: Registers) => true } { (r: Registers) =>
{
r.appendToField(r.data0)
@@ -224,7 +232,6 @@ class StartStateEscapeBlock(states: => ArrayBuffer[State], val blockEnd: DFADeli
r.nextState = StartState
}
})
-
}
class StartStateEscapeChar(states: => ArrayBuffer[State], val EEC: MaybeChar, val EC: Char, val stateNum: Int)
@@ -488,7 +495,7 @@ class EECStateBlock(states: => ArrayBuffer[State], blockEnd: DFADelimiter, val E
//
// We've already encountered EEC as data0 here
//
- Rule { (r: Registers) => couldBeFirstChar(r.data0, blockEnd) } {
+ Rule { (r: Registers) => couldBeFirstChar(r.data0, blockEnd) && !couldBeFirstChar(r.data1, blockEnd) } {
(r: Registers) => r.status = StateKind.Paused //PTERMState
},
Rule { (r: Registers) => couldBeFirstChar(r.data1, blockEnd) } { (r: Registers) =>
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
index 613b7ea..7609628 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/escapeScheme/escapeScheme.tdml
@@ -603,5 +603,47 @@
</dfdlInfoset>
</infoset>
</parserTestCase>
-
+
+ <!--
+ Test Name: escBlkMultipleEEC
+ Schema: eBlkMultipleEEC
+ Root: record
+ Purpose: This test demonstrates that escapeSheme with escapeBlock where the escapeBlockEnd is the same as 2 escapeEscapeCharacters (DAFFODIL-1923).
+ -->
+ <defineSchema name="eBlkMultipleEEC">
+ <dfdl:format ref="tns:GeneralFormat" lengthKind="delimited" />
+
+ <dfdl:defineEscapeScheme name="eBlkMultipleEEC">
+ <dfdl:escapeScheme escapeBlockStart="""
+ escapeBlockEnd="""e;" escapeKind="escapeBlock"
+ escapeEscapeCharacter=""" extraEscapedCharacters="" generateEscapeBlock="whenNeeded"/>
+ </dfdl:defineEscapeScheme>
+
+ <xs:element name="record">
+ <xs:complexType>
+ <xs:sequence dfdl:separator="," >
+ <xs:element name="item" type="xs:string" maxOccurs="unbounded"
+ dfdl:escapeSchemeRef="tns:eBlkMultipleEEC" />
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+ </defineSchema>
+
+ <parserTestCase name="escBlkMultipleEEC" model="eBlkMultipleEEC"
+ description="Section 7 defineEscapeScheme - DFDL-7-079R" root="record" roundTrip="true">
+ <document>1,"Column """Number""" Two"",3,4,5</document>
+ <infoset>
+ <dfdlInfoset>
+ <tns:record>
+ <tns:item>1</tns:item>
+ <tns:item>Column "Number" Two</tns:item>
+ <tns:item>3</tns:item>
+ <tns:item>4</tns:item>
+ <tns:item>5</tns:item>
+ </tns:record>
+ </dfdlInfoset>
+ </infoset>
+ </parserTestCase>
+
+
</testSuite>
diff --git a/daffodil-test/src/test/scala-debug/org/apache/daffodil/section07/escapeScheme/TestEscapeSchemeDebug.scala b/daffodil-test/src/test/scala-debug/org/apache/daffodil/section07/escapeScheme/TestEscapeSchemeDebug.scala
index 9eb8117..7cf4b27 100644
--- a/daffodil-test/src/test/scala-debug/org/apache/daffodil/section07/escapeScheme/TestEscapeSchemeDebug.scala
+++ b/daffodil-test/src/test/scala-debug/org/apache/daffodil/section07/escapeScheme/TestEscapeSchemeDebug.scala
@@ -49,9 +49,5 @@ class TestEscapeSchemeDebug {
//DFDL-961
@Test def test_scenario3_11_postfix() { runner2.runOneTest("scenario3_11_postfix") }
-
- //DAFFODIL-1923
- @Test def test_escBlkAllQuotes() { runner.runOneTest("escBlkAllQuotes") }
- @Test def test_escBlkEndSame() { runner.runOneTest("escBlkEndSame") }
-
+ @Test def test_escBlkMultipleEEC() { runner.runOneTest("escBlkMultipleEEC") } // DAFFODIL-1972
}
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
index 745eced..0e978f9 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/section07/escapeScheme/TestEscapeScheme.scala
@@ -140,4 +140,7 @@ class TestEscapeScheme {
@Test def test_scenario4_12_req_term() { runner2.runOneTest("scenario4_12_req_term") }
@Test def test_scenario5_1() { runner2.runOneTest("scenario5_1") }
+
+ @Test def test_escBlkAllQuotes() { runner.runOneTest("escBlkAllQuotes") }
+ @Test def test_escBlkEndSame() { runner.runOneTest("escBlkEndSame") }
}