You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by GitBox <gi...@apache.org> on 2021/03/19 20:48:10 UTC

[GitHub] [daffodil] mbeckerle commented on a change in pull request #516: Added test for DAFFODIL-2486 discriminator bug.

mbeckerle commented on a change in pull request #516:
URL: https://github.com/apache/daffodil/pull/516#discussion_r597966157



##########
File path: daffodil-test/src/test/resources/org/apache/daffodil/section07/discriminators/discriminator2.tdml
##########
@@ -0,0 +1,150 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<testSuite suiteName="NameDOB"
+           xmlns:xs="http://www.w3.org/2001/XMLSchema"
+           xmlns:fn="http://www.w3.org/2005/xpath-functions"
+           xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
+           xmlns:tdml="http://www.ibm.com/xmlns/dfdl/testData"
+           xmlns:ex="http://example.com"
+           xmlns:tns="http://example.com"
+           defaultRoundTrip="onePass">
+
+  <tdml:defineSchema name="s1">
+
+    <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />
+
+    <dfdl:format
+      ref="tns:GeneralFormat"
+      representation="text"
+      encoding="ASCII"
+      lengthKind="delimited"
+      separator=""
+      separatorPosition="infix"
+    />
+
+    <!--
+    Schema for simple CSV-like file containing 4 columns, the last of which is a date.
+
+    What makes this non-trivial is use of discriminator after the first column.
+    If we can parse a first column, then the remaining columns MUST be present, and an error
+    in parsing them should be fatal.
+
+    -->
+    <xs:element name="file" dfdl:initiator="last,first,middle,DOB%NL;%WSP*;">
+      <xs:complexType>
+        <xs:sequence dfdl:separator="%NL;" dfdl:separatorPosition="postfix">
+          <xs:element name="record" maxOccurs="unbounded">
+            <xs:complexType>
+              <xs:sequence>
+                <xs:element name="lastName" type="xs:string"
+                            dfdl:terminator=","/>
+                <xs:sequence>
+                  <xs:annotation>
+                    <xs:appinfo source="http://www.ogf.org/dfdl/">
+                      <!--
+                      This discriminator should discriminate the closest
+                      point of uncertainty, which should be the record array element.
+
+                      If we parse the record to this discriminator, then
+                      any subsequent error parsing the remaining fields (e.g., such as
+                      the date being incorrect format, should fail the whole parse, not
+                      just terminate the array.
+                      -->
+                      <dfdl:discriminator test="{ fn:true() }"/>
+                    </xs:appinfo>
+                  </xs:annotation>
+                </xs:sequence>
+                <xs:sequence dfdl:separator=",">
+                  <xs:element name="firstName" type="xs:string"/>
+                  <xs:element name="middleName" type="xs:string"/>
+                  <xs:element name="DOB" type="xs:date"
+                              dfdl:calendarPattern="MM/dd/yyyy"
+                              dfdl:calendarPatternKind="explicit"/>
+                </xs:sequence>
+              </xs:sequence>
+            </xs:complexType>
+          </xs:element>
+        </xs:sequence>
+      </xs:complexType>
+    </xs:element>
+
+  </tdml:defineSchema>
+
+  <!--
+  This test just illustrates that the above schema does parse well-formed CSV-like data
+  including the final date element.
+  -->
+  <tdml:parserTestCase name="nameDOB_test1" model="s1">
+    <tdml:document><![CDATA[last,first,middle,DOB
+smith,robert,brandon,03/24/1988
+johnson,john,henry,01/23/1986
+jones,arya,cat,02/19/1986
+]]></tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <ex:file xmlns:ex="http://example.com">
+          <record>
+            <lastName>smith</lastName>
+            <firstName>robert</firstName>
+            <middleName>brandon</middleName>
+            <DOB>1988-03-24</DOB>
+          </record>
+          <record>
+            <lastName>johnson</lastName>
+            <firstName>john</firstName>
+            <middleName>henry</middleName>
+            <DOB>1986-01-23</DOB>
+          </record>
+          <record>
+            <lastName>jones</lastName>
+            <firstName>arya</firstName>
+            <middleName>cat</middleName>
+            <DOB>1986-02-19</DOB>
+          </record>
+        </ex:file>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
+  <!--
+    This test illustrates that because of the malformed date in the
+    final row of data, the schema will deem the whole file malformed.
+
+    Bug DAFFODIL-2486 reports that the discriminator in the schema
+    for this data does not seem to work.
+
+    The test should end with a complaint about the DOB element, which is a date.
+    Until that bug is fixed, this ends with "left over data".
+    Because it backtracks and terminates the record array based on the failure
+    to parse the date.
+    That shouldn't happen because of the discriminator.
+    -->
+  <tdml:parserTestCase name="nameDOB_test_bad_1" root="file"
+                       model="s1">
+    <tdml:document><![CDATA[last,first,middle,DOB
+smith,robert,brandon,03/24/1988
+johnson,john,henry,01/23/1986
+jones,arya,cat,1986-02-19
+]]></tdml:document>

Review comment:
       I can't believe I hit this snag, while actually writing up a TL;DR essay in a comment on another bug about exactly this problem!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org