You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@daffodil.apache.org by Roger L Costello <co...@mitre.org> on 2022/12/23 19:13:49 UTC

How to drop a bunch of lines, then process some lines, then drop the remaining lines?

Hi Folks,

My input consists of a bunch of lines, then some lines that contain xxxxPxxxxxxxDxxxxx... (where x = any char), and then a bunch more lines.

....
....
xxxxPxxxxxxxDxxxxxxxx
xxxxPxxxxxxxDxxxxxxxx
xxxxPxxxxxxxDxxxxxxxx
.....
.....

I want the output to show only the middle lines. Each of those lines represents an aeronautical sid, Standard Instrument Departure. A sid is identified by P in position 5 and D in position 13. That is, I want to output only the sid lines. I want to dump (hide) the other, non-sid lines.

Below is my DFDL schema.  When I run it, I get this error message: 

[error] Parse Error: Failed to populate sid[1]. Cause: Parse Error: Assertion failed: { dfdl:checkConstraints(.) } failed

What am I doing wrong please?  /Roger

<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:fn="http://www.w3.org/2005/xpath-functions"
    xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/">
    
    <xs:annotation>
        <xs:appinfo source="http://www.ogf.org/dfdl/">
            <dfdl:format
                alignment="1" 
                alignmentUnits="bytes" 
                emptyValueDelimiterPolicy="none" 
                encoding="ASCII" 
                encodingErrorPolicy="replace" 
                escapeSchemeRef="" 
                fillByte="%SP;" 
                floating="no" 
                ignoreCase="yes" 
                initiatedContent="no" 
                initiator="" 
                leadingSkip="0"
                lengthKind="delimited" 
                lengthUnits="characters" 
                nilValueDelimiterPolicy="none" 
                occursCountKind="implicit" 
                outputNewLine="%CR;%LF;" 
                representation="text" 
                separator="" 
                separatorSuppressionPolicy="anyEmpty" 
                sequenceKind="ordered" 
                textBidi="no" 
                textPadKind="none"
                textTrimKind="none" 
                trailingSkip="0" 
                truncateSpecifiedLengthString="no" 
                terminator="" 
                textNumberRep="standard" 
                textStandardBase="10" 
                textStandardZeroRep="0" 
                textNumberRounding="pattern" 
                textStandardExponentRep="E" 
                textNumberCheckPolicy="strict"
            />
        </xs:appinfo>
    </xs:annotation>
    
    <xs:element name="sids">
        <xs:complexType>
            <xs:sequence>
                <xs:sequence dfdl:hiddenGroupRef="Hide-lines-prior-to-sids" />
                <xs:sequence dfdl:separator="%NL;" dfdl:separatorPosition="infix">
                    <xs:element name="sid" maxOccurs="unbounded" dfdl:lengthKind="delimited">
                        <xs:complexType>
                            <xs:sequence>
                                <xs:element name="Record_Type" type="validString" dfdl:lengthKind="explicit" dfdl:length="1" />
                                <xs:element name="Area_Code" type="validString" dfdl:lengthKind="explicit" dfdl:length="3" />
                                <xs:element name="Section_Code" dfdl:lengthKind="explicit" dfdl:length="1">
                                    <xs:simpleType>
                                        <xs:restriction base="validString">
                                            <xs:enumeration value="P" />
                                        </xs:restriction>
                                    </xs:simpleType>
                                </xs:element>
                                <xs:element name="Blank" dfdl:lengthKind="explicit" dfdl:length="1">
                                    <xs:simpleType>
                                        <xs:restriction base="validString">
                                            <xs:enumeration value=" " />
                                        </xs:restriction>
                                    </xs:simpleType>
                                </xs:element>
                                <xs:element name="Airport_ICAO_Code_Identifier" type="validString" dfdl:lengthKind="explicit" dfdl:length="4" />
                                <xs:element name="ICAO_Code" type="validString" dfdl:lengthKind="explicit" dfdl:length="2" />
                                <xs:element name="Subsection_Code" dfdl:lengthKind="explicit" dfdl:length="1">
                                    <xs:simpleType>
                                        <xs:restriction base="validString">
                                            <xs:enumeration value="D" />
                                        </xs:restriction>
                                    </xs:simpleType>
                                </xs:element>
                                <xs:element name="remainder" type="validString" />
                            </xs:sequence>
                        </xs:complexType>
                    </xs:element>
                </xs:sequence>
                <xs:sequence dfdl:hiddenGroupRef="Hide-lines-after-the-sids" />
            </xs:sequence>
        </xs:complexType>
    </xs:element>

    <xs:group name="Hide-lines-prior-to-sids">
        <xs:sequence>
            <xs:element name="junk" type="xs:string" 
                dfdl:lengthKind="pattern"
                dfdl:lengthPattern=".*?(?=(^.{4}P.{7}[^D])|(^.{4}[^P].{7}.))"
                dfdl:outputValueCalc="{.}"/>
        </xs:sequence>
    </xs:group>
    
    <xs:group name="Hide-lines-after-the-sids">
        <xs:sequence>
            <xs:element name="junk" type="xs:string" 
                dfdl:lengthKind="pattern"
                dfdl:lengthPattern=".*?(?=((^.{4}P.{7}[^D])|(^.{4}[^P].{7}.)))"
                dfdl:outputValueCalc="{.}"/>
        </xs:sequence>
    </xs:group>
    
    
    <xs:simpleType name="validString">
        <xs:annotation>
            <xs:appinfo source="http://www.ogf.org/dfdl/">
                <dfdl:assert>{ dfdl:checkConstraints(.) }</dfdl:assert>
            </xs:appinfo>
        </xs:annotation>
        <xs:restriction base="xs:string"/>
    </xs:simpleType>
    
</xs:schema>


RE: How to drop a bunch of lines, then process some lines, then drop the remaining lines?

Posted by Roger L Costello <co...@mitre.org>.
Please ignore the below message. I figured out what the problem was.  /Roger

-----Original Message-----
From: Roger L Costello <co...@mitre.org> 
Sent: Friday, December 23, 2022 2:14 PM
To: users@daffodil.apache.org
Subject: How to drop a bunch of lines, then process some lines, then drop the remaining lines?

Hi Folks,

My input consists of a bunch of lines, then some lines that contain xxxxPxxxxxxxDxxxxx... (where x = any char), and then a bunch more lines.

....
....
xxxxPxxxxxxxDxxxxxxxx
xxxxPxxxxxxxDxxxxxxxx
xxxxPxxxxxxxDxxxxxxxx
.....
.....

I want the output to show only the middle lines. Each of those lines represents an aeronautical sid, Standard Instrument Departure. A sid is identified by P in position 5 and D in position 13. That is, I want to output only the sid lines. I want to dump (hide) the other, non-sid lines.

Below is my DFDL schema.  When I run it, I get this error message: 

[error] Parse Error: Failed to populate sid[1]. Cause: Parse Error: Assertion failed: { dfdl:checkConstraints(.) } failed

What am I doing wrong please?  /Roger

<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:fn="http://www.w3.org/2005/xpath-functions"
    xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/">
    
    <xs:annotation>
        <xs:appinfo source="http://www.ogf.org/dfdl/">
            <dfdl:format
                alignment="1" 
                alignmentUnits="bytes" 
                emptyValueDelimiterPolicy="none" 
                encoding="ASCII" 
                encodingErrorPolicy="replace" 
                escapeSchemeRef="" 
                fillByte="%SP;" 
                floating="no" 
                ignoreCase="yes" 
                initiatedContent="no" 
                initiator="" 
                leadingSkip="0"
                lengthKind="delimited" 
                lengthUnits="characters" 
                nilValueDelimiterPolicy="none" 
                occursCountKind="implicit" 
                outputNewLine="%CR;%LF;" 
                representation="text" 
                separator="" 
                separatorSuppressionPolicy="anyEmpty" 
                sequenceKind="ordered" 
                textBidi="no" 
                textPadKind="none"
                textTrimKind="none" 
                trailingSkip="0" 
                truncateSpecifiedLengthString="no" 
                terminator="" 
                textNumberRep="standard" 
                textStandardBase="10" 
                textStandardZeroRep="0" 
                textNumberRounding="pattern" 
                textStandardExponentRep="E" 
                textNumberCheckPolicy="strict"
            />
        </xs:appinfo>
    </xs:annotation>
    
    <xs:element name="sids">
        <xs:complexType>
            <xs:sequence>
                <xs:sequence dfdl:hiddenGroupRef="Hide-lines-prior-to-sids" />
                <xs:sequence dfdl:separator="%NL;" dfdl:separatorPosition="infix">
                    <xs:element name="sid" maxOccurs="unbounded" dfdl:lengthKind="delimited">
                        <xs:complexType>
                            <xs:sequence>
                                <xs:element name="Record_Type" type="validString" dfdl:lengthKind="explicit" dfdl:length="1" />
                                <xs:element name="Area_Code" type="validString" dfdl:lengthKind="explicit" dfdl:length="3" />
                                <xs:element name="Section_Code" dfdl:lengthKind="explicit" dfdl:length="1">
                                    <xs:simpleType>
                                        <xs:restriction base="validString">
                                            <xs:enumeration value="P" />
                                        </xs:restriction>
                                    </xs:simpleType>
                                </xs:element>
                                <xs:element name="Blank" dfdl:lengthKind="explicit" dfdl:length="1">
                                    <xs:simpleType>
                                        <xs:restriction base="validString">
                                            <xs:enumeration value=" " />
                                        </xs:restriction>
                                    </xs:simpleType>
                                </xs:element>
                                <xs:element name="Airport_ICAO_Code_Identifier" type="validString" dfdl:lengthKind="explicit" dfdl:length="4" />
                                <xs:element name="ICAO_Code" type="validString" dfdl:lengthKind="explicit" dfdl:length="2" />
                                <xs:element name="Subsection_Code" dfdl:lengthKind="explicit" dfdl:length="1">
                                    <xs:simpleType>
                                        <xs:restriction base="validString">
                                            <xs:enumeration value="D" />
                                        </xs:restriction>
                                    </xs:simpleType>
                                </xs:element>
                                <xs:element name="remainder" type="validString" />
                            </xs:sequence>
                        </xs:complexType>
                    </xs:element>
                </xs:sequence>
                <xs:sequence dfdl:hiddenGroupRef="Hide-lines-after-the-sids" />
            </xs:sequence>
        </xs:complexType>
    </xs:element>

    <xs:group name="Hide-lines-prior-to-sids">
        <xs:sequence>
            <xs:element name="junk" type="xs:string" 
                dfdl:lengthKind="pattern"
                dfdl:lengthPattern=".*?(?=(^.{4}P.{7}[^D])|(^.{4}[^P].{7}.))"
                dfdl:outputValueCalc="{.}"/>
        </xs:sequence>
    </xs:group>
    
    <xs:group name="Hide-lines-after-the-sids">
        <xs:sequence>
            <xs:element name="junk" type="xs:string" 
                dfdl:lengthKind="pattern"
                dfdl:lengthPattern=".*?(?=((^.{4}P.{7}[^D])|(^.{4}[^P].{7}.)))"
                dfdl:outputValueCalc="{.}"/>
        </xs:sequence>
    </xs:group>
    
    
    <xs:simpleType name="validString">
        <xs:annotation>
            <xs:appinfo source="http://www.ogf.org/dfdl/">
                <dfdl:assert>{ dfdl:checkConstraints(.) }</dfdl:assert>
            </xs:appinfo>
        </xs:annotation>
        <xs:restriction base="xs:string"/>
    </xs:simpleType>
    
</xs:schema>