You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by GitBox <gi...@apache.org> on 2021/01/22 16:36:39 UTC

[GitHub] [incubator-daffodil] stevedlawrence commented on a change in pull request #478: Feature Aware DaffodilParseXMLReader and Prefixes Fixes

stevedlawrence commented on a change in pull request #478:
URL: https://github.com/apache/incubator-daffodil/pull/478#discussion_r562705132



##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
##########
@@ -826,11 +826,14 @@ object XMLUtils {
 Comparison failed.
 Expected (attributes stripped)
           %s
+Excepted (attributes ignored for diff)
+          %s

Review comment:
       I'm concerned this is going to make our already verbose TDML diff ouput even larger. Maybe we should only show this full expected output if we're finding the non-stripped attributes are useful, with the assumption that some parts of it will be ignored?

##########
File path: daffodil-core/src/test/scala/org/apache/daffodil/processor/TestSAXParseUnparseAPI.scala
##########
@@ -71,6 +82,15 @@ object TestSAXParseUnparseAPI {
     }
     dp
   }
+
+  private def getResource(resPath: String): File = {
+    val f = try {
+      new File(this.getClass().getResource(resPath).toURI())
+    } catch {
+      case _: Throwable => null
+    }
+    f
+  }

Review comment:
       We have a Misc.getResourceOption function that does something very similar, can we use that instead?

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -67,12 +73,64 @@ class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty: Boolean
   override def startPrefixMapping(prefix: String, uri: String): Unit = {
     val _prefix = if (prefix == "") null else prefix
     prefixMapping = NamespaceBinding(_prefix, uri, prefixMapping)
+    currentElementPrefixMapping = NamespaceBinding(_prefix, uri, currentElementPrefixMapping)
   }
 
   override def endPrefixMapping(prefix: String): Unit = {
     // do nothing
   }
 
+  /**
+   * Uses Attributes, which is passed in to the startElement callback, to extract prefix mappings
+   * and other attributes and return a tuple of an updated prefixMapping and a Seq of attribute=val
+   * String pairings
+   */
+  def processAttributes(atts: Attributes, prefixMapping: NamespaceBinding): (NamespaceBinding, Seq[String]) = {
+    var pm = prefixMapping
+    var attrPairings: Seq[String] = Seq()
+    var i = 0
+    while (i < atts.getLength) {
+      val qName = atts.getQName(i)
+      try {
+        val pre = qName.replace("xmlns:", "")
+        pm.getURI(pre)
+        // we make this call to check if the prefix already exists. If it doesn't exist, we get the
+        // npe so we can add it, but if it does exist, nothing happens and it doesn't get readded
+        // and we instead proceed to the next item in Attributes
+      } catch {
+        // prefix does not exist, so lookup resulted in npe, therefore
+        // proceed with scraping Attributes
+        case _: NullPointerException => {
+          val attrVal =  atts.getValue(i)
+          if (qName == "xmlns") {
+            pm = NamespaceBinding(null, attrVal, pm)
+          } else if (qName.startsWith("xmlns:")) {
+            val prefix = qName.substring(6)
+            pm = NamespaceBinding(prefix, attrVal, pm)
+          } else {
+            // if not a namespace mapping, it is an attribute we're interested in
+            if (qName.nonEmpty) {
+              attrPairings :+= s" $qName=" + "\"" + s"${attrVal}" + "\""

Review comment:
       We shoudl avoid this seq append, perhaps do a prepend and then reverse at the end if order matters.
   
   Also, this might be cleaner using tripple quotes, e.g.
   ```scala
   s""" ${qName}="${attrVal}""""
   ```

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -67,12 +73,64 @@ class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty: Boolean
   override def startPrefixMapping(prefix: String, uri: String): Unit = {
     val _prefix = if (prefix == "") null else prefix
     prefixMapping = NamespaceBinding(_prefix, uri, prefixMapping)
+    currentElementPrefixMapping = NamespaceBinding(_prefix, uri, currentElementPrefixMapping)
   }
 
   override def endPrefixMapping(prefix: String): Unit = {
     // do nothing
   }
 
+  /**
+   * Uses Attributes, which is passed in to the startElement callback, to extract prefix mappings
+   * and other attributes and return a tuple of an updated prefixMapping and a Seq of attribute=val
+   * String pairings
+   */
+  def processAttributes(atts: Attributes, prefixMapping: NamespaceBinding): (NamespaceBinding, Seq[String]) = {
+    var pm = prefixMapping
+    var attrPairings: Seq[String] = Seq()
+    var i = 0
+    while (i < atts.getLength) {
+      val qName = atts.getQName(i)
+      try {
+        val pre = qName.replace("xmlns:", "")
+        pm.getURI(pre)
+        // we make this call to check if the prefix already exists. If it doesn't exist, we get the
+        // npe so we can add it, but if it does exist, nothing happens and it doesn't get readded
+        // and we instead proceed to the next item in Attributes
+      } catch {
+        // prefix does not exist, so lookup resulted in npe, therefore
+        // proceed with scraping Attributes
+        case _: NullPointerException => {
+          val attrVal =  atts.getValue(i)
+          if (qName == "xmlns") {
+            pm = NamespaceBinding(null, attrVal, pm)
+          } else if (qName.startsWith("xmlns:")) {
+            val prefix = qName.substring(6)
+            pm = NamespaceBinding(prefix, attrVal, pm)
+          } else {
+            // if not a namespace mapping, it is an attribute we're interested in
+            if (qName.nonEmpty) {
+              attrPairings :+= s" $qName=" + "\"" + s"${attrVal}" + "\""
+            } else {
+              val uri = atts.getURI(i)
+              val ln = atts.getLocalName(i)
+              try {
+                val pre = pm.getPrefix(uri)
+                if (pre.nonEmpty) {
+                  attrPairings :+= s" $pre:$ln=" + "\"" + s"${attrVal}" + "\""

Review comment:
       Same comment as above.

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -107,6 +183,36 @@ class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty: Boolean
     outputNewlineStack.push(false)
   }
 
+  private def outputTagName(uri: String, localName: String, qName: String, atts: Maybe[Attributes] = Nope): Unit = {
+    val tagName = {
+      if (qName.nonEmpty) {
+        qName
+      } else {
+        // use attributes
+        lazy val attrs = atts.get
+        lazy val in = attrs.getIndex(uri, localName)
+        if (atts.nonEmpty && attrs.getLength != 0 && in >= 0) {
+          val qn = attrs.getQName(in)
+          qn
+        } else {
+          // use prefixMapping
+          val sanitizedUri = if (uri.isEmpty) null else uri
+          try {
+            val pre = prefixMapping.getPrefix(sanitizedUri)
+            if (pre.nonEmpty) {
+              s"$pre:$localName"
+            } else {
+              localName
+            }
+          } catch {
+            case _: NullPointerException => localName

Review comment:
       Again, we should try to avoid this NPE.

##########
File path: daffodil-core/.settings/org.scala-ide.sdt.core.prefs
##########
@@ -0,0 +1,7 @@
+#Generated by sbteclipse
+#Fri Nov 01 00:37:11 EDT 2019
+scala.compiler.additionalParams=-feature -language\:experimental.macros -Xxml\:-coalescing
+deprecation=true
+Xfatal-warnings=true
+unchecked=true
+scala.compiler.useProjectSettings=true

Review comment:
       Since these .settings files are eclipse specific, they shouldn't  be added. Might want to add .settings to a global gitignore file on yoru system.

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetOutputter.scala
##########
@@ -175,26 +180,99 @@ class SAXInfosetOutputter(xmlReader: DFDL.DaffodilParseXMLReader)
     }
   }
 
-  private def doStartElement(diElem: DIElement, contentHandler: ContentHandler): Unit = {
-    val (ns: String, elemName: String, qName: String) = getNameSpaceElemNameAndQName(diElem)
-    doStartPrefixMapping(diElem, contentHandler)
+  private def doAttributesPrefixMapping(diElem: DIElement, attrs: AttributesImpl): AttributesImpl = {
+    val (nsbStart: NamespaceBinding, nsbEnd: NamespaceBinding) = getNsbStartAndEnd(diElem)
+    var n = nsbStart
+    while (n != nsbEnd && n != null && n != scala.xml.TopScope) {
+      val prefix = if (n.prefix == null) "xmlns" else s"xmlns:${n.prefix}"
+      val uri = if (n.uri == null) "" else n.uri
+      attrs.addAttribute("", "", prefix, "CDATA", uri)
+      n = n.parent
+    }
+    attrs
+  }
 
-    val attrs = if (isNilled(diElem)) {
-      createNilAttribute()
+  private def getNsbStartAndEnd(diElem: DIElement) = {
+    val nsbStart = diElem.erd.minimizedScope
+    val nsbEnd = if (diElem.isRoot) {
+      scala.xml.TopScope
     } else {
-      new AttributesImpl()
+      diElem.diParent.erd.minimizedScope
+    }
+    (nsbStart, nsbEnd)
+  }
+
+  private def doStartElement(diElem: DIElement, contentHandler: ContentHandler): Unit = {
+    val (ns: String, localName: String, qName: String) = getNamespaceLocalNameAndQName(diElem)
+    val attrs = new AttributesImpl()
+    var elemUri: String = ""
+    var elemLocalName: String = ""
+    var elemQname: String = ""
+
+    if (namespacesFeature) {

Review comment:
       I think a more scala-y way to do this and avoid the var's is something like:
   ```scala
   val (elemUri, elemLocalName) =
     if (namespacesFeature) {
       ...
       (ns, localName)
     } else {
       ("", "")
     }
   ```
   Same thing for elemQName below.

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetOutputter.scala
##########
@@ -144,29 +153,25 @@ class SAXInfosetOutputter(xmlReader: DFDL.DaffodilParseXMLReader)
   }
 
   private def doStartPrefixMapping(diElem: DIElement, contentHandler: ContentHandler): Unit = {
-    val nsbStart = diElem.erd.minimizedScope
-    val nsbEnd = if (diElem.isRoot) {
-      scala.xml.TopScope
-    } else {
-      diElem.diParent.erd.minimizedScope
-    }
+    val (nsbStart: NamespaceBinding, nsbEnd: NamespaceBinding) = getNsbStartAndEnd(diElem)
     var n = nsbStart
+    var mappingsList: Seq[(String, String)] = Seq()
     while (n != nsbEnd && n != null && n != scala.xml.TopScope) {
       val prefix = if (n.prefix == null) "" else n.prefix
       val uri = if (n.uri == null) "" else n.uri
-      contentHandler.startPrefixMapping(prefix, uri)
+      mappingsList :+= (prefix, uri)

Review comment:
       This appends to the list, which requires a copy and is going to be quite slow. Instead, I think we can prepend to the list (which doesn't require a copy), and then the list is already in the reversed order, so we don't have to reverse it below.

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -33,6 +36,9 @@ class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty: Boolean
   extends ContentHandler with Indentable with XMLInfosetOutputter {
   private val writer = new OutputStreamWriter(out)
   private var prefixMapping: NamespaceBinding = null
+  private var currentElementPrefixMapping: NamespaceBinding = null
+  private var currentElementAttributes: Seq[String] = Seq()

Review comment:
       I think some documentation could be useful here to describe the different between prefixMapping, currentElemenetPrefixMapping, prefixMappingCOntextStack, etc.

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -67,12 +73,64 @@ class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty: Boolean
   override def startPrefixMapping(prefix: String, uri: String): Unit = {
     val _prefix = if (prefix == "") null else prefix
     prefixMapping = NamespaceBinding(_prefix, uri, prefixMapping)
+    currentElementPrefixMapping = NamespaceBinding(_prefix, uri, currentElementPrefixMapping)
   }
 
   override def endPrefixMapping(prefix: String): Unit = {
     // do nothing
   }
 
+  /**
+   * Uses Attributes, which is passed in to the startElement callback, to extract prefix mappings
+   * and other attributes and return a tuple of an updated prefixMapping and a Seq of attribute=val
+   * String pairings
+   */
+  def processAttributes(atts: Attributes, prefixMapping: NamespaceBinding): (NamespaceBinding, Seq[String]) = {
+    var pm = prefixMapping
+    var attrPairings: Seq[String] = Seq()
+    var i = 0
+    while (i < atts.getLength) {
+      val qName = atts.getQName(i)
+      try {
+        val pre = qName.replace("xmlns:", "")
+        pm.getURI(pre)
+        // we make this call to check if the prefix already exists. If it doesn't exist, we get the
+        // npe so we can add it, but if it does exist, nothing happens and it doesn't get readded
+        // and we instead proceed to the next item in Attributes
+      } catch {

Review comment:
       Is there no way to tell if it exists without NPE? Exceptions are fairly heavyweight, would be much quicker if there's an alterantive. Might be worth implementing our own getUriOption to avoid this.

##########
File path: daffodil-core/src/test/scala/org/apache/daffodil/processor/TestSAXParseAPI.scala
##########
@@ -174,4 +190,46 @@ class TestSAXParseAPI {
     assertTrue(pr.isError)
     assertTrue(spe.getMessage.contains("Insufficient bits in data"))
   }
+
+  @Test def testDaffodilParseXMLReader_parse_features_prefixes_only(): Unit = {
+    val xmlReader = dpNamespaces.newXMLReaderInstance
+    val baos = new ByteArrayOutputStream()
+    val parseOutputStreamContentHandler = new DaffodilParseOutputStreamContentHandler(baos)
+    xmlReader.setContentHandler(parseOutputStreamContentHandler)
+    xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, false)
+    xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    val inArray = testNamespacesData.getBytes()
+    xmlReader.parse(inArray)
+    val pr = xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
+    assertTrue(!pr.isError)
+    assertEquals(testNamespacesInfoset, scala.xml.XML.loadString(baos.toString))
+  }
+
+  @Test def testDaffodilParseXMLReader_parse_features_only(): Unit = {
+    val xmlReader = dpNamespaces.newXMLReaderInstance
+    val baos = new ByteArrayOutputStream()
+    val parseOutputStreamContentHandler = new DaffodilParseOutputStreamContentHandler(baos)
+    xmlReader.setContentHandler(parseOutputStreamContentHandler)
+    xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
+    xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, false)
+    val inArray = testNamespacesData.getBytes()
+    xmlReader.parse(inArray)
+    val pr = xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
+    assertTrue(!pr.isError)
+    assertEquals(testNamespacesInfoset, scala.xml.XML.loadString(baos.toString))
+  }
+
+  @Test def testDaffodilParseXMLReader_parse_features_and_prefixes(): Unit = {
+    val xmlReader = dpNamespaces.newXMLReaderInstance
+    val baos = new ByteArrayOutputStream()
+    val parseOutputStreamContentHandler = new DaffodilParseOutputStreamContentHandler(baos)
+    xmlReader.setContentHandler(parseOutputStreamContentHandler)
+    xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
+    xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    val inArray = testNamespacesData.getBytes()
+    xmlReader.parse(inArray)
+    val pr = xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
+    assertTrue(!pr.isError)
+    assertEquals(testNamespacesInfoset, scala.xml.XML.loadString(baos.toString))
+  }

Review comment:
       One thing about these tests is that they don't really test that the our XMLReader is creating the the correct sax events with the correct parameters, and that they different based on the featuers. All these test really verify is that different features give the same result. For example, we could be completely ignoring the features, and that would give the same result and the tests would still pass.
   
   Ideally, we would have something like a special Test ContentHandler that would have a list of expected SAX events/parameters and ensure we got the right ones, and that they are different with different features set. Though, I'm not sure if the effort is worth that. We know that the new features are implemented, and they all result in the same infoset, so we can probably assume that the right SAX events are created and handled correctly.

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -107,6 +183,36 @@ class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty: Boolean
     outputNewlineStack.push(false)
   }
 
+  private def outputTagName(uri: String, localName: String, qName: String, atts: Maybe[Attributes] = Nope): Unit = {
+    val tagName = {
+      if (qName.nonEmpty) {
+        qName
+      } else {
+        // use attributes
+        lazy val attrs = atts.get
+        lazy val in = attrs.getIndex(uri, localName)
+        if (atts.nonEmpty && attrs.getLength != 0 && in >= 0) {

Review comment:
       Wy are the ``atts.nonEmpty`` && ``attrs.getLength`` checks needed? If ``in >= 0`` that means we found the index and getQName should work, right? 

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetOutputter.scala
##########
@@ -27,6 +30,12 @@ import org.xml.sax.helpers.AttributesImpl
 class SAXInfosetOutputter(xmlReader: DFDL.DaffodilParseXMLReader)
   extends InfosetOutputter
   with XMLInfosetOutputter {
+
+  lazy val namespacesFeature: Boolean = xmlReader.getFeature(XMLUtils.SAX_NAMESPACES_FEATURE)
+  lazy val namespacePrefixesFeature: Boolean = xmlReader.getFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE)
+  Assert.invariant(namespacesFeature || namespacePrefixesFeature,
+    s"Features ${XMLUtils.SAX_NAMESPACES_FEATURE} and ${XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE} cannot both be false")

Review comment:
       I'm not sure an Assert is correct here. Asserts are usually for things that should be impossible, but it's very posible that a user will do the wrong thing and set these features so that they conflict. This should probably just be some sort of SAX exception.
   
   Another question is if this is the right place for this? This makes it so the features are validited when this SAXInfosetOutputter is created, but can't the user change those features after it's created? At want point to we consider these features frozen and cannot be changed?
   
   Looking at Xerces for guidance: https://xerces.apache.org/xerces2-j/features.html. This says about these two features:
   > Access: (parsing) read-only; (not parsing) read-write;
   
   This seems reasonable and probably something we should implement? So we maybe should only allow these to be changed during parsing, and should validate them everytime parse() is called. And this SAX InfosetOutputter needs to consider that these feautures may change and must somehow get the latest values when parse is called. So maybe it just always calls xmlReader.getFeature when it needs a feature, or maybe there's some var's that are changed when startDocument is called?
   
   

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -67,12 +73,64 @@ class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty: Boolean
   override def startPrefixMapping(prefix: String, uri: String): Unit = {
     val _prefix = if (prefix == "") null else prefix
     prefixMapping = NamespaceBinding(_prefix, uri, prefixMapping)
+    currentElementPrefixMapping = NamespaceBinding(_prefix, uri, currentElementPrefixMapping)
   }
 
   override def endPrefixMapping(prefix: String): Unit = {
     // do nothing
   }
 
+  /**
+   * Uses Attributes, which is passed in to the startElement callback, to extract prefix mappings
+   * and other attributes and return a tuple of an updated prefixMapping and a Seq of attribute=val
+   * String pairings
+   */
+  def processAttributes(atts: Attributes, prefixMapping: NamespaceBinding): (NamespaceBinding, Seq[String]) = {
+    var pm = prefixMapping
+    var attrPairings: Seq[String] = Seq()
+    var i = 0
+    while (i < atts.getLength) {
+      val qName = atts.getQName(i)
+      try {
+        val pre = qName.replace("xmlns:", "")

Review comment:
       Can we do atts.getLocalName if we're just going to strip off the xmlns prefix? Also, what if this attribute isn't a xmlns attribute and is something like xsi:nil? Won't we try to find "xsi:nil" as if it were a prefix?

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala
##########
@@ -43,35 +41,34 @@ class DaffodilParseXMLReader(dp: DataProcessor) extends DFDL.DaffodilParseXMLRea
   private var errorHandler: ErrorHandler = _
   private var dtdHandler: DTDHandler = _
   private var entityResolver: EntityResolver = _
-  var saxParseResultPropertyValue: ParseResult = _
-  var saxBlobDirectoryPropertyValue: Path = Paths.get(System.getProperty("java.io.tmpdir"))
-  var saxBlobPrefixPropertyValue: String = "daffodil-sax-"
-  var saxBlobSuffixPropertyValue: String = ".blob"
-
-  private val featureMap = mutable.Map[String, Boolean](
-    XMLUtils.SAX_NAMESPACES_FEATURE -> false,
-    XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE -> false
-  )
+  private var saxParseResultPropertyValue: ParseResult = _
+  private var saxBlobDirectoryPropertyValue: Path = Paths.get(System.getProperty("java.io.tmpdir"))
+  private var saxBlobPrefixPropertyValue: String = "daffodil-sax-"
+  private var saxBlobSuffixPropertyValue: String = ".blob"
+  private var saxNamespaceFeatureValue: Boolean = true
+  private var saxNamespacePrefixesFeatureValue: Boolean = false
 
   override def getFeature(name: String): Boolean = {
-    if (name == XMLUtils.SAX_NAMESPACES_FEATURE ||
-      name == XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE) {
-      featureMap(name)
-    } else {
-      throw new SAXNotRecognizedException("Feature unsupported: " + name + ".\n" +
-        "Supported features are: " + featureMap.keys.mkString(", ")
-      )
+    name match {
+      case XMLUtils.SAX_NAMESPACES_FEATURE => saxNamespaceFeatureValue
+      case XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE => saxNamespacePrefixesFeatureValue
+      case _ =>
+        throw new SAXNotRecognizedException("Feature not found: " + name + ".\n" +
+        "Supported features are: " +
+        Seq(XMLUtils.SAX_NAMESPACES_FEATURE,
+          XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE).mkString(", "))
     }
   }
 
   override def setFeature(name: String, value: Boolean): Unit = {
-    if (name == XMLUtils.SAX_NAMESPACES_FEATURE ||
-      name == XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE) {
-      featureMap(name) = value
-    } else {
-      throw new SAXNotRecognizedException("Feature unsupported: " + name + ".\n" +
-        "Supported features are: " + featureMap.keys.mkString(", ")
-      )
+    name match {
+      case XMLUtils.SAX_NAMESPACES_FEATURE => saxNamespaceFeatureValue = value
+      case XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE => saxNamespacePrefixesFeatureValue = value
+      case _ =>
+        throw new SAXNotRecognizedException("Feature unsupported: " + name + ".\n" +
+          "Supported features are: " +
+          Seq(XMLUtils.SAX_NAMESPACES_FEATURE,
+            XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE).mkString(", "))

Review comment:
       Suggest we have a function that throws this exception so we don't have to duplicate this logic.

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -67,12 +73,64 @@ class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty: Boolean
   override def startPrefixMapping(prefix: String, uri: String): Unit = {
     val _prefix = if (prefix == "") null else prefix
     prefixMapping = NamespaceBinding(_prefix, uri, prefixMapping)
+    currentElementPrefixMapping = NamespaceBinding(_prefix, uri, currentElementPrefixMapping)
   }
 
   override def endPrefixMapping(prefix: String): Unit = {
     // do nothing
   }
 
+  /**
+   * Uses Attributes, which is passed in to the startElement callback, to extract prefix mappings
+   * and other attributes and return a tuple of an updated prefixMapping and a Seq of attribute=val
+   * String pairings
+   */
+  def processAttributes(atts: Attributes, prefixMapping: NamespaceBinding): (NamespaceBinding, Seq[String]) = {
+    var pm = prefixMapping
+    var attrPairings: Seq[String] = Seq()
+    var i = 0
+    while (i < atts.getLength) {
+      val qName = atts.getQName(i)
+      try {
+        val pre = qName.replace("xmlns:", "")
+        pm.getURI(pre)
+        // we make this call to check if the prefix already exists. If it doesn't exist, we get the
+        // npe so we can add it, but if it does exist, nothing happens and it doesn't get readded
+        // and we instead proceed to the next item in Attributes
+      } catch {
+        // prefix does not exist, so lookup resulted in npe, therefore
+        // proceed with scraping Attributes
+        case _: NullPointerException => {
+          val attrVal =  atts.getValue(i)
+          if (qName == "xmlns") {
+            pm = NamespaceBinding(null, attrVal, pm)
+          } else if (qName.startsWith("xmlns:")) {
+            val prefix = qName.substring(6)
+            pm = NamespaceBinding(prefix, attrVal, pm)
+          } else {
+            // if not a namespace mapping, it is an attribute we're interested in
+            if (qName.nonEmpty) {
+              attrPairings :+= s" $qName=" + "\"" + s"${attrVal}" + "\""
+            } else {
+              val uri = atts.getURI(i)
+              val ln = atts.getLocalName(i)
+              try {
+                val pre = pm.getPrefix(uri)
+                if (pre.nonEmpty) {
+                  attrPairings :+= s" $pre:$ln=" + "\"" + s"${attrVal}" + "\""
+                }
+              } catch {
+                case _: NullPointerException => // couldn't find prefix for attribute; do nothing

Review comment:
       Again, we should really avoid NPE's here. That's usuallya sign of a bug.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org