You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by mb...@apache.org on 2022/03/14 16:36:23 UTC
[daffodil] branch main updated: Added dfdlx:alignmentKind automatic/manual property
This is an automated email from the ASF dual-hosted git repository.
mbeckerle pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new 14f6c99 Added dfdlx:alignmentKind automatic/manual property
14f6c99 is described below
commit 14f6c99ce67f6fc58e77330b8169e66e9719607b
Author: Michael Beckerle <mb...@apache.org>
AuthorDate: Thu Mar 10 16:31:04 2022 -0500
Added dfdlx:alignmentKind automatic/manual property
ParseError on charset not aligned.
This is a significant change in behavior, though because the compiler
puts down mandatory text alignment fill regions the automatic
alignmentby the I/O layer was redundant.
Without the mandatory text alignment regions, the I/O layer really
must not do auto alignment.
Removed dead code from TextParser and unused I/O layer getString
method.
DAFFODIL-2652
---
.../org/apache/daffodil/dsom/ElementBase.scala | 113 +++++-----
.../apache/daffodil/dsom/TermEncodingMixin.scala | 4 +-
.../org/apache/daffodil/grammar/AlignedMixin.scala | 59 +++---
.../grammar/primitives/PrimitivesFraming.scala | 5 +-
.../org/apache/daffodil/io/DataInputStream.scala | 8 -
.../daffodil/io/InputSourceDataInputStream.scala | 47 ++---
.../io/StringDataInputStreamForUnparse.scala | 1 -
.../daffodil/processors/charset/BitsCharset.scala | 2 +-
.../processors/charset/BitsCharsetDecoder.scala | 17 +-
.../io/TestInputSourceDataInputStream6.scala | 33 +--
.../apache/daffodil/xsd/DFDL_part2_attributes.xsd | 1 +
.../resources/org/apache/daffodil/xsd/dfdlx.xsd | 29 +++
.../daffodil/propGen/PropertyGenerator.scala | 2 +-
.../daffodil/processors/dfa/TextParser.scala | 9 +-
.../processors/parsers/DelimiterParsers.scala | 2 +-
.../daffodil/processors/parsers/ParseErrors.scala | 9 +
.../processors/parsers/StringLengthParsers.scala | 9 +-
.../representation_properties/encodings.tdml | 118 ++++++++++-
.../computedLengthFields.tdml | 227 ++++++++++++++++++++-
.../representation_properties/TestRepProps2.scala | 8 +
.../TestComputedLengthFields.scala | 15 +-
21 files changed, 550 insertions(+), 168 deletions(-)
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/dsom/ElementBase.scala b/daffodil-core/src/main/scala/org/apache/daffodil/dsom/ElementBase.scala
index 0f82ccc..5d1f09f 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/dsom/ElementBase.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/dsom/ElementBase.scala
@@ -545,10 +545,12 @@ trait ElementBase
private lazy val implicitAlignmentInBits: Int = getImplicitAlignmentInBits(primType, impliedRepresentation)
final lazy val alignmentValueInBits: JInt = {
- //
- // get the alignment, measured in bits based on the alignment property, units, and type (when applicable)
- //
- val alignInBits: JInt =
+ if (alignmentKindDefaulted == AlignmentKind.Manual) 1 // disable automatic alignment.
+ else {
+ //
+ // get the alignment, measured in bits based on the alignment property, units, and type (when applicable)
+ //
+ val alignInBits: JInt =
alignment match {
case AlignmentType.Implicit => {
if (this.isComplexType) this.complexType.modelGroup.alignmentValueInBits
@@ -562,65 +564,66 @@ trait ElementBase
alignInBits
}
}
- //
- // Do checking of interactions of alignment with the rest of the representation
- //
- if ((alignment ne AlignmentType.Implicit) && this.isSimpleType) {
//
- // For explicitly aligned simple types there are specific checks having to do with
- // how explicit alignment interacts with text characters, or with binary packed decimal - as text chars
- // and packed decimal digits come with alignment constraints of their own.
+ // Do checking of interactions of alignment with the rest of the representation
//
- impliedRepresentation match {
- case Representation.Text => {
- //
- // If they have text representation, alignment and the text encoding alignment must be compared.
- //
- if (isRepresented && (alignInBits % implicitAlignmentInBits) != 0)
- SDE(
- "The given alignment (%s bits) must be a multiple of the encoding specified alignment (%s bits) for %s when representation='text'. Encoding: %s",
- alignInBits, implicitAlignmentInBits, primType.name, this.knownEncodingName)
- }
- case Representation.Binary => {
- //
- // if they have binary representation we must worry about packed digits, which require 4-bit alignment.
- //
- primType match {
- case PrimType.Float | PrimType.Double | PrimType.Boolean | PrimType.HexBinary => /* Non textual data, no need to compare alignment to encoding's expected alignment */
- case _ => binaryNumberRep match {
- case BinaryNumberRep.Packed | BinaryNumberRep.Bcd | BinaryNumberRep.Ibm4690Packed => {
- if ((alignInBits % 4) != 0)
- SDE(
- "The given alignment (%s bits) must be a multiple of 4 for %s when using packed binary formats",
- alignInBits, primType.name)
+ if ((alignment ne AlignmentType.Implicit) && this.isSimpleType) {
+ //
+ // For explicitly aligned simple types there are specific checks having to do with
+ // how explicit alignment interacts with text characters, or with binary packed decimal - as text chars
+ // and packed decimal digits come with alignment constraints of their own.
+ //
+ impliedRepresentation match {
+ case Representation.Text => {
+ //
+ // If they have text representation, alignment and the text encoding alignment must be compared.
+ //
+ if (isRepresented && (alignInBits % implicitAlignmentInBits) != 0)
+ SDE(
+ "The given alignment (%s bits) must be a multiple of the encoding specified alignment (%s bits) for %s when representation='text'. Encoding: %s",
+ alignInBits, implicitAlignmentInBits, primType.name, this.knownEncodingName)
+ }
+ case Representation.Binary => {
+ //
+ // if they have binary representation we must worry about packed digits, which require 4-bit alignment.
+ //
+ primType match {
+ case PrimType.Float | PrimType.Double | PrimType.Boolean | PrimType.HexBinary => /* Non textual data, no need to compare alignment to encoding's expected alignment */
+ case _ => binaryNumberRep match {
+ case BinaryNumberRep.Packed | BinaryNumberRep.Bcd | BinaryNumberRep.Ibm4690Packed => {
+ if ((alignInBits % 4) != 0)
+ SDE(
+ "The given alignment (%s bits) must be a multiple of 4 for %s when using packed binary formats",
+ alignInBits, primType.name)
+ }
+ case _ => /* Since this is non-textual data, no need to compare alignment to encoding's expected alignment */
}
- case _ => /* Since this is non-textual data, no need to compare alignment to encoding's expected alignment */
}
}
}
+ } // end if explicit alignment and simple type
+ //
+ // Now regardless of type, check for whether the initiator interacts badly with
+ // the alignment.
+ //
+ if (hasInitiator) {
+ // Check for case where explicit alignment property and
+ // mandatory text alignment of initiator
+ // are not compatible.
+ val textAlign = knownEncodingAlignmentInBits
+ // the explicit alignment must be a multiple of the textAlign
+ if (textAlign < alignInBits || textAlign % alignInBits != 0)
+ SDW(
+ WarnID.AlignmentAndInitiatorTextAlignmentNotCompatible,
+ "Initiator text may leave the element incorrectly aligned. The text encoding of initiator characters is %s bits, " +
+ "but the element alignment requires %s bits. Suggest consider whether both dfdl:initiator and dfdl:alignment should be specified for this element.",
+ textAlign, alignInBits)
}
- } // end if explicit alignment and simple type
- //
- // Now regardless of type, check for whether the initiator interacts badly with
- // the alignment.
- //
- if (hasInitiator) {
- // Check for case where explicit alignment property and
- // mandatory text alignment of initiator
- // are not compatible.
- val textAlign = knownEncodingAlignmentInBits
- // the explicit alignment must be a multiple of the textAlign
- if (textAlign < alignInBits || textAlign % alignInBits != 0)
- SDW(
- WarnID.AlignmentAndInitiatorTextAlignmentNotCompatible,
- "Initiator text may leave the element incorrectly aligned. The text encoding of initiator characters is %s bits, " +
- "but the element alignment requires %s bits. Suggest consider whether both dfdl:initiator and dfdl:alignment should be specified for this element.",
- textAlign, alignInBits)
+ //
+ // Having done the checks, just return the answer
+ //
+ alignInBits
}
- //
- // Having done the checks, just return the answer
- //
- alignInBits
}
/**
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/dsom/TermEncodingMixin.scala b/daffodil-core/src/main/scala/org/apache/daffodil/dsom/TermEncodingMixin.scala
index d51f145..582c822 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/dsom/TermEncodingMixin.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/dsom/TermEncodingMixin.scala
@@ -22,6 +22,7 @@ import org.apache.daffodil.schema.annotation.props.gen.Representation
import org.apache.daffodil.schema.annotation.props.gen.EncodingErrorPolicy
import org.apache.daffodil.processors.KnownEncodingMixin
import org.apache.daffodil.api.WarnID
+import org.apache.daffodil.schema.annotation.props.gen.AlignmentKind
import org.apache.daffodil.schema.annotation.props.gen.YesNo
import org.apache.daffodil.util.Maybe
@@ -86,7 +87,8 @@ trait TermEncodingMixin extends KnownEncodingMixin { self: Term =>
* alignment required. This is always 1 or 8.
*/
override final lazy val knownEncodingAlignmentInBits = {
- if (isKnownEncoding) {
+ if (alignmentKindDefaulted == AlignmentKind.Manual) 1 // disables any encoding alignment
+ else if (isKnownEncoding) {
schemaDefinitionWarningWhen(
WarnID.DeprecatedEncodingNameUSASCII7BitPacked,
knownEncodingName == "US-ASCII-7-BIT-PACKED",
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/AlignedMixin.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/AlignedMixin.scala
index 1d2e490..27414c3 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/AlignedMixin.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/AlignedMixin.scala
@@ -27,6 +27,7 @@ import org.apache.daffodil.exceptions.Assert
import org.apache.daffodil.dsom.QuasiElementDeclBase
import org.apache.daffodil.dsom.Root
import org.apache.daffodil.dsom.Term
+import org.apache.daffodil.schema.annotation.props.gen.AlignmentKind
case class AlignmentMultipleOf(nBits: Long) {
def *(that: AlignmentMultipleOf) = AlignmentMultipleOf(Math.gcd(nBits, that.nBits))
@@ -49,14 +50,21 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
requiredEvaluationsIfActivated(hasNoSkipRegions)
/**
+ * If "manual" this property disables all automatic alignment. The
+ * schema author must use leadingSkip, trailingSkip, or just ensure
+ * all the elements/terms are aligned based on their length.
+ */
+ lazy val alignmentKindDefaulted: AlignmentKind =
+ optionAlignmentKind.getOrElse(AlignmentKind.Automatic)
+
+ /**
* true if we can statically determine that the start of this
* will be properly aligned by where the prior thing left us positioned.
* Hence we are guaranteed to be properly aligned.
*/
final lazy val isKnownToBeAligned: Boolean = LV('isKnownToBeAligned) {
- if (!isRepresented) {
- true
- } else {
+ if (!isRepresented || (alignmentKindDefaulted == AlignmentKind.Manual)) true
+ else {
val pa = priorAlignmentWithLeadingSkipApprox
val aa = alignmentApprox
val res = (pa % aa) == 0
@@ -73,7 +81,8 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
* considers the surrounding context meeting the alignment needs.
*/
final lazy val isKnownToBeTextAligned: Boolean = LV('isKnownToBeTextAligned) {
- if (isKnownEncoding) {
+ if (alignmentKindDefaulted == AlignmentKind.Manual) true // manual alignment
+ else if (isKnownEncoding) {
if (knownEncodingAlignmentInBits == 1)
true
else if (priorAlignmentWithLeadingSkipApprox.nBits % knownEncodingAlignmentInBits == 0)
@@ -87,7 +96,8 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
}.value
final lazy val isDelimiterKnownToBeTextAligned: Boolean = {
- if (isKnownEncoding) {
+ if (alignmentKindDefaulted == AlignmentKind.Manual) true // manual alignment
+ else if (isKnownEncoding) {
if (knownEncodingAlignmentInBits == 1)
true
else if (endingAlignmentApprox.nBits % knownEncodingAlignmentInBits == 0)
@@ -106,30 +116,23 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
AlignmentMultipleOf(alignmentValueInBits.toLong)
}
- lazy val leadingSkipInBits = {
- alignmentUnits match {
- case AlignmentUnits.Bits => leadingSkip
- case AlignmentUnits.Bytes => leadingSkip * 8
- }
+
+ private def alignmentSkipInBits(skipProp: Int) = alignmentUnits match {
+ case AlignmentUnits.Bits => skipProp
+ case AlignmentUnits.Bytes => skipProp * 8
}
+ lazy val leadingSkipInBits = alignmentSkipInBits(leadingSkip)
+ lazy val trailingSkipInBits = alignmentSkipInBits(trailingSkip)
+
private lazy val leadingSkipApprox: LengthApprox = {
LengthExact(leadingSkipInBits)
}
- lazy val trailingSkipInBits = {
- alignmentUnits match {
- case AlignmentUnits.Bits => trailingSkip
- case AlignmentUnits.Bytes => trailingSkip * 8
- }
- }
-
protected lazy val trailingSkipApprox: LengthApprox = {
LengthExact(trailingSkipInBits)
}
- private lazy val unaligned = AlignmentMultipleOf(1)
-
// FIXME: DAFFODIL-2295
// Does not take into account that in a sequence, what may be prior may be a separator.
// The separator is text in some encoding, might not be the same as this term's encoding, and
@@ -150,11 +153,11 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
// come from a previous element of this same array. Since this
// array element is implicit length, knowing where it ends requires
// knowing where it starts and the approximate length of the
- // children. But we can't know whree it starts without knowing
+ // children. But we can't know where it starts without knowing
// where the previous one array element ends. And we end up in a
// loop.
//
- // So there isn't much we can do regarding alignement. What we can
+ // So there isn't much we can do regarding alignment. What we can
// do is determine if this array element is byte aligned AND all of
// its children are byte lengths/byte aligned, if that is the case
// then we at least know this array and its elements are byte
@@ -197,7 +200,7 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
ps.endingAlignmentApprox
}
eaa
- }.toSeq
+ }
val parentAlignmentApprox =
if (priorSibs.isEmpty || isEverInUnorderedSequence) {
@@ -225,7 +228,7 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
}
protected lazy val contentStartAlignment: AlignmentMultipleOf = {
- if ((priorAlignmentWithLeadingSkipApprox) % alignmentApprox == 0) {
+ if (priorAlignmentWithLeadingSkipApprox % alignmentApprox == 0) {
// alignment won't be needed, continue using prior alignment as start alignment
priorAlignmentWithLeadingSkipApprox
} else {
@@ -277,7 +280,7 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
// can't be last, no possibilities to gather.
None
val lastApproxes = lastApproxesConsideringChildren ++ optApproxIfNoChildren
- Assert.invariant(!lastApproxes.isEmpty)
+ Assert.invariant(lastApproxes.nonEmpty)
val res = lastApproxes.reduce { _ * _ }
res
}
@@ -289,7 +292,7 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
case eb: ElementBase => {
eb.lengthKind match {
case LengthKind.Implicit => {
- // asssert this is simple element base
+ // assert this is simple element base
LengthExact(eb.elementLengthInBitsEv.optConstant.get.get)
}
case LengthKind.Explicit => {
@@ -316,7 +319,7 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
case LengthKind.Prefixed => LengthMultipleOf(1) // NYI
}
}
- case mg: ModelGroup => Assert.usageError("Only for elements")
+ case _: ModelGroup => Assert.usageError("Only for elements")
}
}
@@ -339,9 +342,7 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
val isByteLength = this match {
case mg: ModelGroup => mg.groupMembers.forall { _.isKnownToBeByteAlignedAndByteLength }
case eb: ElementBase => {
- val isSelfByteSizeEncoding = eb.charsetEv.optConstant.map {
- _.bitWidthOfACodeUnit == 8
- }.getOrElse(false)
+ val isSelfByteSizeEncoding = eb.charsetEv.optConstant.exists(_.bitWidthOfACodeUnit == 8)
val isSelfByteLength =
if (eb.isComplexType && eb.lengthKind == LengthKind.Implicit) {
eb.complexType.group.isKnownToBeByteAlignedAndByteLength
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesFraming.scala b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesFraming.scala
index f4308c3..e81358a 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesFraming.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesFraming.scala
@@ -31,6 +31,7 @@ import org.apache.daffodil.processors.unparsers.SkipRegionUnparser
import org.apache.daffodil.processors.unparsers.Unparser
import org.apache.daffodil.schema.annotation.props.gen.LengthKind
import org.apache.daffodil.dsom.TunableLimitExceededError
+import org.apache.daffodil.schema.annotation.props.gen.AlignmentKind
abstract class SkipRegion(e: Term, skipLengthInBits: Int, propName: String) extends Terminal(e, skipLengthInBits > 0) {
@@ -72,7 +73,9 @@ case class MandatoryTextAlignment(
forDelimiter: Boolean)
extends Terminal(
e,
- if (forDelimiter)
+ if (e.alignmentKindDefaulted == AlignmentKind.Manual)
+ false // no MTA if alignmentKind is 'manual'
+ else if (forDelimiter)
!e.isDelimiterKnownToBeTextAligned
else
!e.isKnownToBeTextAligned) {
diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/io/DataInputStream.scala b/daffodil-io/src/main/scala/org/apache/daffodil/io/DataInputStream.scala
index 010e33d..35e6947 100644
--- a/daffodil-io/src/main/scala/org/apache/daffodil/io/DataInputStream.scala
+++ b/daffodil-io/src/main/scala/org/apache/daffodil/io/DataInputStream.scala
@@ -414,14 +414,6 @@ trait DataInputStream
def getBinaryDouble(finfo: FormatInfo): Double
/**
- * Returns One(string) if nChars are available, Nope otherwise.
- *
- * Throws a CharacterCodingException if the encoding error policy is 'error'
- * and a decode error is detected within nChars.
- */
- def getString(nChars: Long, finfo: FormatInfo): Maybe[String]
-
- /**
* Returns One(string) if any (up to nChars) are available, Nope otherwise.
*
* Throws a CharacterCodingException if the encoding error policy is 'error'
diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/io/InputSourceDataInputStream.scala b/daffodil-io/src/main/scala/org/apache/daffodil/io/InputSourceDataInputStream.scala
index 8c07934..7b45729 100644
--- a/daffodil-io/src/main/scala/org/apache/daffodil/io/InputSourceDataInputStream.scala
+++ b/daffodil-io/src/main/scala/org/apache/daffodil/io/InputSourceDataInputStream.scala
@@ -614,40 +614,23 @@ final class InputSourceDataInputStream private(val inputSource: InputSource)
markPool.finalCheck
}
- final def getString(nChars: Long, finfo: FormatInfo): Maybe[String] = {
- val startingBitPos = bitPos0b
- val aligned = align(finfo.encodingMandatoryAlignmentInBits, finfo)
- if (!aligned) {
- Maybe.Nope
- } else {
- withLocalCharBuffer { lcb =>
- val cb = lcb.getBuf(nChars)
- val numDecoded = finfo.decoder.decode(this, finfo, cb)
- if (numDecoded == nChars) {
- Maybe(cb.flip.toString)
- } else {
- setBitPos0b(startingBitPos)
- Maybe.Nope
- }
- }
- }
- }
-
+ /**
+ * Returns some characters (up to nChars) if they are available.
+ *
+ * @param nChars From 1 up to this many characters are returned as the string, or Nope for 0.
+ * @param finfo
+ * @return
+ */
final def getSomeString(nChars: Long, finfo: FormatInfo): Maybe[String] = {
val startingBitPos = bitPos0b
- val aligned = align(finfo.encodingMandatoryAlignmentInBits, finfo)
- if (!aligned) {
- Maybe.Nope
- } else {
- withLocalCharBuffer { lcb =>
- val cb = lcb.getBuf(nChars)
- val numDecoded = finfo.decoder.decode(this, finfo, cb)
- if (numDecoded > 0) {
- Maybe(cb.flip.toString)
- } else {
- setBitPos0b(startingBitPos)
- Maybe.Nope
- }
+ withLocalCharBuffer { lcb =>
+ val cb = lcb.getBuf(nChars)
+ val numDecoded = finfo.decoder.decode(this, finfo, cb)
+ if (numDecoded > 0) {
+ Maybe(cb.flip.toString)
+ } else {
+ setBitPos0b(startingBitPos)
+ Maybe.Nope
}
}
}
diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/io/StringDataInputStreamForUnparse.scala b/daffodil-io/src/main/scala/org/apache/daffodil/io/StringDataInputStreamForUnparse.scala
index 0180d2a..a6397fa 100644
--- a/daffodil-io/src/main/scala/org/apache/daffodil/io/StringDataInputStreamForUnparse.scala
+++ b/daffodil-io/src/main/scala/org/apache/daffodil/io/StringDataInputStreamForUnparse.scala
@@ -62,7 +62,6 @@ final class StringDataInputStreamForUnparse
override def resetPos(m: MarkPos) = dis.resetPos(m)
override def skipChars(nChars: Long, finfo: FormatInfo): Boolean = dis.skipChars(nChars, finfo)
override def getSomeString(nChars: Long,finfo: FormatInfo): Maybe[String] = dis.getSomeString(nChars, finfo)
- override def getString(nChars: Long,finfo: FormatInfo): Maybe[String] = dis.getString(nChars, finfo)
// $COVERAGE-OFF$ Nothing should be calling these.
private def doNotUse = Assert.usageError("Not to be called on " + Misc.getNameFromClass(this))
diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharset.scala b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharset.scala
index 575b9ba..9520b4a 100644
--- a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharset.scala
+++ b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharset.scala
@@ -55,7 +55,7 @@ trait BitsCharset extends Serializable {
def aliases: Seq[String] = Nil
def bitWidthOfACodeUnit: Int // in units of bits
def requiredBitOrder: BitOrder
- def mandatoryBitAlignment: Int
+ def mandatoryBitAlignment: Int // ignored when dfdlx:alignmentKind is 'manual'
def newDecoder(): BitsCharsetDecoder
def newEncoder(): BitsCharsetEncoder
diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharsetDecoder.scala b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharsetDecoder.scala
index 8ebb86a..1bf1c55 100644
--- a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharsetDecoder.scala
+++ b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharsetDecoder.scala
@@ -19,7 +19,6 @@ package org.apache.daffodil.processors.charset
import java.nio.CharBuffer
import java.nio.LongBuffer
-
import org.apache.daffodil.exceptions.Assert
import org.apache.daffodil.exceptions.ThinException
import org.apache.daffodil.io.InputSourceDataInputStream
@@ -32,6 +31,16 @@ import org.apache.daffodil.util.MaybeChar
class BitsCharsetDecoderMalformedException(val malformedBits: Int)
extends ThinException
+class BitsCharsetDecoderUnalignedCharDecodeException(val bitPos1b: Long)
+ extends ThinException {
+ def bitAlignment1b = bitPos1b % 8
+ def bytePos1b = ((bitPos1b - 1) / 8) + 1
+
+ override def getMessage(): String = {
+ s"Charset not byte aligned. bitAlignment1b=${bitAlignment1b}, bitPos1b=${bitPos1b}, bytePos1b=${bytePos1b}."
+ }
+}
+
trait BitsCharsetDecoderState
abstract class BitsCharsetDecoder {
@@ -130,7 +139,10 @@ abstract class BitsCharsetDecoderByteSize
@inline protected final def getByte(dis: InputSourceDataInputStream, bitsConsumedSoFar: Int): Int = {
if (!dis.isDefinedForLength(8)) {
throw new BitsCharsetDecoderMalformedException(bitsConsumedSoFar)
- } else {
+ }
+ if (!dis.isAligned(8)) {
+ throw new BitsCharsetDecoderUnalignedCharDecodeException(dis.bitPos1b)
+ }
// read directly from the input source. This should be faster, but makes
// assumptions that data is aligned. This should always succeed due to
// the above check
@@ -139,7 +151,6 @@ abstract class BitsCharsetDecoderByteSize
// position
dis.setBitPos0b(dis.bitPos0b + 8)
byte
- }
}
override def reset(): Unit = {
diff --git a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestInputSourceDataInputStream6.scala b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestInputSourceDataInputStream6.scala
index 4bcdcd1..5e783aa 100644
--- a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestInputSourceDataInputStream6.scala
+++ b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestInputSourceDataInputStream6.scala
@@ -17,8 +17,11 @@
package org.apache.daffodil.io
+import org.apache.daffodil.Implicits.intercept
+import org.apache.daffodil.processors.charset.BitsCharsetDecoderUnalignedCharDecodeException
import org.junit.Test
import org.junit.Assert._
+
import java.nio.ByteBuffer
import org.apache.daffodil.schema.annotation.props.gen.ByteOrder
import org.apache.daffodil.schema.annotation.props.gen.BitOrder
@@ -115,25 +118,25 @@ class TestInputSourceDataInputStream6 {
@Test def testGetSomeString1(): Unit = {
val dis = InputSourceDataInputStream("01".getBytes())
dis.getSignedLong(1, beFinfo)
- val ms = dis.getSomeString(1, beFinfo)
- assertTrue(ms.isDefined)
- val s = ms.get
- assertEquals(1, s.length)
- assertEquals(16, dis.bitPos0b)
- assertEquals('1', s(0))
+ val e = intercept[BitsCharsetDecoderUnalignedCharDecodeException] {
+ dis.getSomeString(1, beFinfo)
+ }
+ val msg = e.getMessage()
+ assertTrue(msg.toLowerCase.contains("not byte aligned"))
+ assertEquals(2, e.bitAlignment1b)
+ assertEquals(1, e.bytePos1b)
+ assertEquals(2, e.bitPos1b)
}
- @Test def testgetSomeString2(): Unit = {
+ @Test def testGetSomeString2(): Unit = {
val dis = InputSourceDataInputStream("0年月日".getBytes("utf-8"))
dis.getSignedLong(4, beFinfo)
- val ms = dis.getSomeString(3, beFinfo)
- assertTrue(ms.isDefined)
- val s = ms.get
- assertEquals(3, s.length)
- assertEquals('年', s(0))
- assertEquals('月', s(1))
- assertEquals('日', s(2))
- assertEquals(80, dis.bitPos0b)
+ val e = intercept[BitsCharsetDecoderUnalignedCharDecodeException] {
+ dis.getSomeString(3, beFinfo)
+ }
+ val msg = e.getMessage()
+ assertTrue(msg.toLowerCase.contains("not byte aligned"))
+ assertEquals(5, e.bitAlignment1b)
}
@Test def testGetSomeStringDataEndsMidByte(): Unit = {
diff --git a/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/DFDL_part2_attributes.xsd b/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/DFDL_part2_attributes.xsd
index 9eae832..9f0b2a2 100644
--- a/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/DFDL_part2_attributes.xsd
+++ b/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/DFDL_part2_attributes.xsd
@@ -123,6 +123,7 @@
<xsd:attribute name="fillByte" type="dfdl:DFDLStringLiteral" />
<xsd:attribute name="leadingSkip" type="dfdl:DFDLNonNegativeInteger" />
<xsd:attribute name="trailingSkip" type="dfdl:DFDLNonNegativeInteger" />
+ <xsd:attribute ref="dfdlx:alignmentKind" />
</xsd:attributeGroup>
<!-- 12.2 Delimiters and Text Mark up -->
diff --git a/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dfdlx.xsd b/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dfdlx.xsd
index 7cd9a80..f6037e7 100644
--- a/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dfdlx.xsd
+++ b/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dfdlx.xsd
@@ -34,6 +34,7 @@
<xs:simpleType name="PropertyNameType">
<xs:restriction base="xs:string">
+ <xs:enumeration value="dfdlx:alignmentKind" />
<xs:enumeration value="dfdlx:choiceBranchKeyRanges" />
<xs:enumeration value="dfdlx:emptyElementParsePolicy"/>
<xs:enumeration value="dfdlx:inputTypeCalc"/>
@@ -47,6 +48,34 @@
</xs:restriction>
</xs:simpleType>
+
+ <xs:attribute name="alignmentKind" type="dfdlx:AlignmentKindEnum" default="automatic">
+ <xs:annotation>
+ <xs:documentation><![CDATA[
+ If manual, then the dfdl:alignment property is ignored, and all alignment must be done
+ manually by way of dfdl:leadingSkip, dfdl:trailingSkip, or just arranging for elements
+ that have the right size such that alignment is correct.
+
+ The dfdl:alignmentUnits property still applies and is used with dfdl:leadingSkip and dfdl:trailingSkip.
+
+ This property helps overcome limitations of the Daffodil schema compiler where it attempts to optimize
+ out normal alignment regions, but is unable to, resulting in the inability to unparse some formats.
+
+ For all constructs in the scope of this property, there will be no alignment regions created,
+ and charset encodings will not have mandatory text alignment of their characters.
+
+ Since this is a new extension property that is experimental, it will have a default value of "automatic".
+ ]]></xs:documentation>
+ </xs:annotation>
+ </xs:attribute>
+
+ <xs:simpleType name="AlignmentKindEnum">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="automatic" />
+ <xs:enumeration value="manual" />
+ </xs:restriction>
+ </xs:simpleType>
+
<xs:attribute name="parseUnparsePolicy" type="dfdlx:ParseUnparsePolicyEnum"/>
<xs:simpleType name="ParseUnparsePolicyEnum">
<xs:restriction base="xs:string">
diff --git a/daffodil-propgen/src/main/scala/org/apache/daffodil/propGen/PropertyGenerator.scala b/daffodil-propgen/src/main/scala/org/apache/daffodil/propGen/PropertyGenerator.scala
index 79aa300..747e70e 100644
--- a/daffodil-propgen/src/main/scala/org/apache/daffodil/propGen/PropertyGenerator.scala
+++ b/daffodil-propgen/src/main/scala/org/apache/daffodil/propGen/PropertyGenerator.scala
@@ -67,7 +67,7 @@ class PropertyGenerator(arg: Node) {
}
def excludeAttribute(name: String) = {
- excludedAttributes.exists { _.toUpperCase.contains(name.toUpperCase()) }
+ excludedAttributes.exists { _.toUpperCase == name.toUpperCase() }
}
def generate() = {
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/TextParser.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/TextParser.scala
index 6902c19..3cea47e 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/TextParser.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/dfa/TextParser.scala
@@ -30,7 +30,7 @@ class TextParser(override val context: TermRuntimeData)
override lazy val name: String = "TextParser"
override lazy val info: String = "" // Nothing additional to add here
- def parse(state: PState, input: DataInputStream, delimIter: DelimiterIterator, isDelimRequired: Boolean): Maybe[ParseResult] = {
+ def parse(state: PState, input: DataInputStream, delimIter: DelimiterIterator): Maybe[ParseResult] = {
val lmt = new LongestMatchTracker()
@@ -51,12 +51,7 @@ class TextParser(override val context: TermRuntimeData)
val result = {
if (lmt.longestMatches.isEmpty) {
- if (isDelimRequired) Nope
- else {
- val totalNumCharsRead = 0
- input.getString(totalNumCharsRead, state)
- One(new ParseResult(Nope, Nope, lmt.longestMatches))
- }
+ Nope
} else {
val delim: Maybe[String] = {
One(lmt.longestMatchedString.toString)
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/DelimiterParsers.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/DelimiterParsers.scala
index 5c55263..3622678 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/DelimiterParsers.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/DelimiterParsers.scala
@@ -81,7 +81,7 @@ class DelimiterTextParser(
val foundDelimiter =
if (maybeDelimIter.isDefined) {
- textParser.parse(start, start.dataInputStream, maybeDelimIter.get, true)
+ textParser.parse(start, start.dataInputStream, maybeDelimIter.get)
} else {
start.delimitedParseResult
}
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/ParseErrors.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/ParseErrors.scala
index 83d225e..b9210b2 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/ParseErrors.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/ParseErrors.scala
@@ -27,6 +27,7 @@ import org.apache.daffodil.exceptions.Assert
import org.apache.daffodil.exceptions.SchemaFileLocation
import org.apache.daffodil.processors.ParseOrUnparseState
import org.apache.daffodil.processors.ProcessingError
+import org.apache.daffodil.processors.charset.BitsCharsetDecoderUnalignedCharDecodeException
import org.apache.daffodil.util.Maybe
import org.apache.daffodil.util.Maybe.Nope
import org.apache.daffodil.util.Maybe.One
@@ -39,6 +40,14 @@ class ParseError(rd: Maybe[SchemaFileLocation], val loc: Maybe[DataLocation], ca
override def toParseError = this
}
+final class CharsetNotByteAlignedError(pstate: PState,
+ cause: BitsCharsetDecoderUnalignedCharDecodeException)
+ extends ParseError(
+ Maybe.toMaybe(pstate.maybeERD.toScalaOption.map{ _.schemaFileLocation}),
+ One(pstate.currentLocation),
+ One(cause),
+ Nope)
+
class AssertionFailed(rd: SchemaFileLocation, state: PState, msg: String, details: Maybe[String] = Nope)
extends ParseError(One(rd), One(state.currentLocation), "Assertion failed: %s", msg) {
override def componentText: String = {
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/StringLengthParsers.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/StringLengthParsers.scala
index bdaced4..bef7311 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/StringLengthParsers.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/StringLengthParsers.scala
@@ -23,6 +23,7 @@ import org.apache.daffodil.util.MaybeChar
import org.apache.daffodil.util.Misc
import passera.unsigned.ULong
import org.apache.daffodil.processors.CharsetEv
+import org.apache.daffodil.processors.charset.BitsCharsetDecoderUnalignedCharDecodeException
/**
* Specifically designed to be used inside one of the SpecifiedLength parsers.
@@ -85,7 +86,13 @@ trait StringOfSpecifiedLengthMixin
val maxLen = start.tunable.maximumSimpleElementSizeInCharacters
val startBitPos0b = dis.bitPos0b
- val strOpt = dis.getSomeString(maxLen, start)
+ val strOpt =
+ try {
+ dis.getSomeString(maxLen, start)
+ } catch {
+ case e: BitsCharsetDecoderUnalignedCharDecodeException =>
+ throw new CharsetNotByteAlignedError(start, e)
+ }
val str = if (strOpt.isDefined) strOpt.get else ""
// TODO: Performance - trimByJustification wants to operate on a StringBuilder
// That means that dis.getSomeString wants to return a StringBuilder instead of
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section10/representation_properties/encodings.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section10/representation_properties/encodings.tdml
index f9db6ec..f44cadb 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/section10/representation_properties/encodings.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/section10/representation_properties/encodings.tdml
@@ -16,10 +16,15 @@
limitations under the License.
-->
-<tdml:testSuite suiteName="More Encoding Tests" description="Section 10 - Core Representation Properties"
- xmlns:tdml="http://www.ibm.com/xmlns/dfdl/testData" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/" xmlns:xs="http://www.w3.org/2001/XMLSchema"
- xmlns:ct="http://w3.ibm.com/xmlns/dfdl/ctInfoset" xmlns:ex="http://example.com"
+<tdml:testSuite
+ suiteName="More Encoding Tests"
+ description="Section 10 - Core Representation Properties"
+ xmlns:tdml="http://www.ibm.com/xmlns/dfdl/testData"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
+ xmlns:dfdlx="http://www.ogf.org/dfdl/dfdl-1.0/extensions"
+ xmlns:xs="http://www.w3.org/2001/XMLSchema"
+ xmlns:ex="http://example.com"
xmlns:tns="http://example.com"
defaultRoundTrip="true">
@@ -256,7 +261,106 @@
</tdml:dfdlInfoset>
</tdml:infoset>
</tdml:parserTestCase>
-
-
-
+
+
+ <tdml:defineSchema
+ name="unalignedText"
+ elementFormDefault="unqualified"
+ useDefaultNamespace="false">
+
+ <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
+
+ <dfdl:format ref="tns:GeneralFormat"
+ lengthKind="explicit"
+ representation="binary"
+ lengthUnits="bits"
+ alignmentUnits="bits"
+ dfdlx:alignmentKind="manual"/>
+
+ <xs:element name="r" dfdl:lengthKind="implicit">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="fourBits" type="xs:unsignedInt" dfdl:lengthUnits="bits" dfdl:length="4"/>
+ <!--
+ This next element is a string, ASCII, which requires 8-bit mandatory alignment,
+ but the dfdlx:alignmentKind 'manual' turns off automatic alignment.
+ So this parse will try to start not at a byte boundary.
+
+ The decode will then fail with a parse error.
+ -->
+ <xs:element name="str" type="xs:string" dfdl:lengthUnits="bytes" dfdl:length="3"
+ dfdl:alignment="8"/><!-- alignment will be ignored -->
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+ </tdml:defineSchema>
+
+ <tdml:parserTestCase
+ name="unalignedCharsetWithMandatory8BitAlignment"
+ model="unalignedText"
+ root="r"
+ description="Not aligned when we start parsing a charset where the decoder requires 8-bit alignment">
+ <tdml:document>
+ <tdml:documentPart type="byte">
+ F3 34 35 36
+ </tdml:documentPart>
+ </tdml:document>
+ <tdml:errors>
+ <tdml:error>Parse Error</tdml:error>
+ <tdml:error>charset not byte aligned</tdml:error>
+ <tdml:error>5</tdml:error>
+ </tdml:errors>
+ </tdml:parserTestCase>
+
+ <tdml:defineSchema
+ name="alignedText"
+ elementFormDefault="unqualified"
+ useDefaultNamespace="false">
+
+ <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
+
+ <dfdl:format ref="tns:GeneralFormat"
+ lengthKind="explicit"
+ representation="binary"
+ lengthUnits="bits"
+ alignmentUnits="bits"
+ dfdlx:alignmentKind="automatic"
+ fillByte="%#r33;"/>
+
+ <xs:element name="r" dfdl:lengthKind="implicit">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="fourBits" type="xs:unsignedInt" dfdl:lengthUnits="bits" dfdl:length="4"/>
+ <!--
+ This next element is a string, ASCII, which requires 8-bit mandatory alignment,
+ -->
+ <xs:element name="str" type="xs:string" dfdl:lengthUnits="bytes" dfdl:length="3"
+ dfdl:alignment="8"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+ </tdml:defineSchema>
+
+ <tdml:parserTestCase
+ name="automaticAlignedCharsetWithMandatory8BitAlignment"
+ model="alignedText"
+ root="r"
+ description="Automatically aligned when we start parsing a charset where the decoder requires 8-bit alignment">
+ <tdml:document>
+ <tdml:documentPart type="byte">
+ F3 34 35 36
+ </tdml:documentPart>
+ </tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <ex:r>
+ <fourBits>15</fourBits>
+ <str>456</str>
+ </ex:r>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ </tdml:parserTestCase>
+
</tdml:testSuite>
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section17/calc_value_properties/computedLengthFields.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section17/calc_value_properties/computedLengthFields.tdml
index 887e851..19a2f04 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/section17/calc_value_properties/computedLengthFields.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/section17/calc_value_properties/computedLengthFields.tdml
@@ -20,6 +20,7 @@
xmlns:tdml="http://www.ibm.com/xmlns/dfdl/testData"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
+ xmlns:dfdlx="http://www.ogf.org/dfdl/dfdl-1.0/extensions"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:ex="http://example.com">
@@ -28,14 +29,19 @@
<include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
- <dfdl:format ref="ex:GeneralFormat"
+ <dfdl:defineFormat name="base">
+ <dfdl:format ref="ex:GeneralFormat"
bitOrder='mostSignificantBitFirst'
byteOrder='bigEndian'
representation="binary"
lengthUnits="bytes"
lengthKind='implicit'
alignmentUnits='bytes' alignment='1'
- binaryNumberRep='binary'/>
+ binaryNumberRep='binary'
+ />
+ </dfdl:defineFormat>
+
+ <dfdl:format ref="ex:base"/>
<!-- The prefix length field is this 2-byte unsigned integer -->
<simpleType name="strLen">
@@ -184,6 +190,155 @@
</infoset>
</unparserTestCase>
+ <tdml:defineSchema name="sWithAlignmentKindManual" useDefaultNamespace="false" elementFormDefault="unqualified"
+ xmlns="http://www.w3.org/2001/XMLSchema">
+
+ <!--
+ This schema identical to the one above named "s" with exactly 1 line different.
+ The default format has the property dfdlx:alignmentKind 'manual' in effect.
+ -->
+ <include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
+
+ <dfdl:defineFormat name="base">
+ <dfdl:format ref="ex:GeneralFormat"
+ bitOrder='mostSignificantBitFirst'
+ byteOrder='bigEndian'
+ representation="binary"
+ lengthUnits="bytes"
+ lengthKind='implicit'
+ alignmentUnits='bytes' alignment='1'
+ binaryNumberRep='binary'
+ />
+ </dfdl:defineFormat>
+
+ <dfdl:defineFormat name="baseWithAlignmentKindManual">
+ <dfdl:format ref="ex:base"
+ dfdlx:alignmentKind="manual"
+ />
+ </dfdl:defineFormat>
+
+ <dfdl:format ref="ex:baseWithAlignmentKindManual"/> <!-- This is the only difference -->
+
+ <!-- The prefix length field is this 2-byte unsigned integer -->
+ <simpleType name="strLen">
+ <restriction base="xs:unsignedShort"/>
+ </simpleType>
+
+ <simpleType name="pString" dfdl:lengthKind="prefixed" dfdl:prefixLengthType="ex:strLen"
+ dfdl:prefixIncludesPrefixLength="no">
+ <restriction base="xs:string"/>
+ </simpleType>
+
+ <element name="message" type="ex:messageWithPrefixedStringsInPayloadType"/>
+
+ <complexType name="messageWithPrefixedStringsInPayloadType">
+ <sequence>
+ <!--
+ messageLength stores the total length of the message, including the length of the header.
+
+ The header is 8 bytes long.
+ -->
+ <element name="messageLength" type="unsignedInt"
+ dfdl:outputValueCalc="{
+ dfdl:valueLength( ../payload/payloadContents, 'bytes') + 8
+ }"/>
+ <element name="num" type="xs:unsignedInt"/>
+ <!--
+ The payload is a variable-length part of the message. The length is determined using
+ the messageLength.
+
+ In DFDL v1.0, the messageLength is used to determine the messageLength both at parse, and
+ at unparse time.
+
+ This creates a problem, as when unparsing, the length is computed using the messageLength
+ element, but the messageLength element is computed based on the length of the payload.
+
+ To avoid this circular definition, we compute the messageLength (unparsing) from
+ the payloadContents element. This is an artificial tier of element that must exist simply
+ to eliminate this circularity.
+ -->
+ <element name="payload" dfdl:lengthKind="explicit" dfdl:length="{ ../messageLength - 8 }">
+ <complexType>
+ <sequence>
+ <element name="payloadContents">
+ <complexType>
+ <sequence>
+ <!--
+ The contents of the message payload go here, inside the payloadContents element.
+ -->
+ <element name="name" type="ex:pString"/>
+ <element name="address" type="ex:pString"/>
+ </sequence>
+ </complexType>
+ </element>
+ </sequence>
+ </complexType>
+ </element>
+ </sequence>
+ </complexType>
+
+ <!-- As a "control" for this experiment. Let's create a fixed-length string -->
+ <simpleType name="fString" dfdl:lengthKind="explicit">
+ <restriction base="xs:string"/>
+ </simpleType>
+
+ <element name="message2" type="ex:messageWithFixedLengthStringsInPayloadType"/>
+
+ <complexType name="messageWithFixedLengthStringsInPayloadType">
+ <sequence>
+ <element name="messageLength" type="unsignedInt"
+ dfdl:outputValueCalc="{
+ dfdl:valueLength( ../payload/payloadContents, 'bytes') + 8
+ }"/>
+ <element name="num" type="xs:unsignedInt"/>
+ <element name="payload" dfdl:lengthKind="explicit" dfdl:length="{ ../messageLength - 8 }">
+ <complexType>
+ <sequence>
+ <element name="payloadContents">
+ <complexType>
+ <sequence>
+ <element name="name" type="ex:fString" dfdl:length="5"/>
+ <element name="address" type="ex:fString" dfdl:length="6"/>
+ </sequence>
+ </complexType>
+ </element>
+ </sequence>
+ </complexType>
+ </element>
+ </sequence>
+ </complexType>
+
+
+ </tdml:defineSchema>
+
+ <unparserTestCase name="computedLengthAroundPrefixedLengths1uWithAlignmentKindManual"
+ model="sWithAlignmentKindManual"
+ roundTrip="none"
+ description="Same test as computedLengthAroundPrefixedLengths1u but doesn't deadlock">
+ <document>
+ <documentPart type="byte"><![CDATA[
+ 0000 0017
+ 0000 1ABF
+ 0005 46 6F 6F 62 79
+ 0006 4E 6F 6F 62 79 4E
+ ]]></documentPart>
+ </document>
+ <infoset>
+ <dfdlInfoset>
+ <ex:message xmlns="">
+ <messageLength>23</messageLength>
+ <num>6847</num>
+ <payload>
+ <payloadContents>
+ <name>Fooby</name>
+ <address>NoobyN</address>
+ </payloadContents>
+ </payload>
+ </ex:message>
+ </dfdlInfoset>
+ </infoset>
+ </unparserTestCase>
+
<unparserTestCase name="computedLengthAroundFixedLengths1" model="s" roundTrip="none"
description="Control test showing the same thing but without the prefixed-length strings, works fine.">
@@ -265,7 +420,7 @@
<unparserTestCase name="prefixedAroundDelimitedString1" model="modelPrefixed"
roundTrip="none">
<document>
- <documentPart type="byte">02 01</documentPart>
+ <documentPart type="byte">02</documentPart>
<documentPart type="text" encoding="iso-8859-1">A;</documentPart>
</document>
<infoset>
@@ -277,4 +432,70 @@
</infoset>
</unparserTestCase>
+ <tdml:defineSchema
+ name="modelPrefixedAlignmentKindManual"
+ elementFormDefault="unqualified"
+ useDefaultNamespace="false"
+ xmlns="http://www.w3.org/2001/XMLSchema"
+ >
+ <!--
+ This schema identical to the above "modelPrefixed" schema, except it
+ has the dfdlx:alignmentKind property 'manual'.
+
+ We use this to verify that dfdlx:alignmentKind 'manual' eliminates the cyclic deadlocks.
+ -->
+ <include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
+ <dfdl:format
+ ref="ex:GeneralFormat"
+ encoding="iso-8859-1"
+ lengthUnits="bytes"
+ alignmentUnits="bits"
+ fillByte="%#xFF;"
+ alignment="1"
+ representation="binary"
+ dfdlx:alignmentKind="manual" />
+
+ <element
+ name="prefixed"
+ dfdl:lengthKind="prefixed"
+ dfdl:prefixIncludesPrefixLength="no"
+ dfdl:prefixLengthType="ex:prefixLengthType">
+ <complexType>
+ <sequence>
+ <element name="s1" type="xs:string"
+ dfdl:lengthKind="delimited"
+ dfdl:terminator=";"
+ dfdl:alignment="8"/>
+ </sequence>
+ </complexType>
+ </element>
+
+ <simpleType name="prefixLengthType" dfdl:lengthKind="explicit" dfdl:length="1">
+ <restriction base="xs:byte" />
+ </simpleType>
+
+ </tdml:defineSchema>
+
+ <!--
+ Identical test to prefixedAroundDelimitedString1
+
+ Except we use a schema having dfdlx:alignmentKind 'manual', and this
+ test will therefore not deadlock due to cyclic/align interactions.
+ -->
+ <unparserTestCase
+ name="prefixedAroundDelimitedString1WithAlignmentKindManual"
+ model="modelPrefixedAlignmentKindManual"
+ roundTrip="none">
+ <document>
+ <documentPart type="byte">02</documentPart>
+ <documentPart type="text" encoding="iso-8859-1">A;</documentPart>
+ </document>
+ <infoset>
+ <tdml:dfdlInfoset xmlns="">
+ <ex:prefixed>
+ <s1>A</s1>
+ </ex:prefixed>
+ </tdml:dfdlInfoset>
+ </infoset>
+ </unparserTestCase>
</testSuite>
\ No newline at end of file
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section10/representation_properties/TestRepProps2.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section10/representation_properties/TestRepProps2.scala
index 134fd58..515133a 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/section10/representation_properties/TestRepProps2.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/section10/representation_properties/TestRepProps2.scala
@@ -52,4 +52,12 @@ class TestRepProps2 {
@Test def test_iso88591msbbitsmisaligned() = { runner.runOneTest("iso88591msbbitsmisaligned") }
@Test def test_iso88591lsbbitsmisaligned() = { runner.runOneTest("iso88591lsbbitsmisaligned") }
+
+ @Test def test_unalignedCharsetWithMandatory8BitAlignment() = {
+ runner.runOneTest("unalignedCharsetWithMandatory8BitAlignment")
+ }
+
+ @Test def test_automaticAlignedCharsetWithMandatory8BitAlignment() = {
+ runner.runOneTest("automaticAlignedCharsetWithMandatory8BitAlignment")
+ }
}
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section17/calc_value_properties/TestComputedLengthFields.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section17/calc_value_properties/TestComputedLengthFields.scala
index b2e1820..bd59d34 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/section17/calc_value_properties/TestComputedLengthFields.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/section17/calc_value_properties/TestComputedLengthFields.scala
@@ -39,13 +39,24 @@ class TestComputedLengthFields {
@Test def test_computedLengthAroundPrefixedLengths1p(): Unit = { runner.runOneTest("computedLengthAroundPrefixedLengths1p") }
// DAFFODIL-2626 - deadlock interaction between computed length and prefixed-length strings.
- // @Test def test_computedLengthAroundPrefixedLengths1u(): Unit = { runner.runOneTest("computedLengthAroundPrefixedLengths1u") }
+ // @Test
+ def test_computedLengthAroundPrefixedLengths1u(): Unit = { runner.runOneTest("computedLengthAroundPrefixedLengths1u") }
+
+ // This test shows you can work around DAFFODIL-2626 using the dfdlx:alignmentKind='manual' property.
+ @Test def test_computedLengthAroundPrefixedLengths1uWithAlignmentKindManual(): Unit = {
+ runner.runOneTest("computedLengthAroundPrefixedLengths1uWithAlignmentKindManual")
+ }
@Test def test_computedLengthAroundFixedLengths1(): Unit = { runner.runOneTest("computedLengthAroundFixedLengths1") }
// DAFFODIL-2626 circular deadlock
// Reproduces one of the circular issues - with prefixed length for the root element surrounding
// text, where the alignment region isn't optimized out.
- // @Test def test_prefixedAroundDelimitedString1(): Unit = { runner.runOneTest("prefixedAroundDelimitedString1") }
+ // @Test
+ def test_prefixedAroundDelimitedString1(): Unit = { runner.runOneTest("prefixedAroundDelimitedString1") }
+ // This test shows you can work around DAFFODIL-2626 using the dfdlx:alignmentKind='manual' property.
+ @Test def test_prefixedAroundDelimitedString1WithAlignmentKindManual(): Unit = {
+ runner.runOneTest("prefixedAroundDelimitedString1WithAlignmentKindManual")
+ }
}