You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@nifi.apache.org by "crissaegrim (Jira)" <ji...@apache.org> on 2023/05/20 06:50:00 UTC

[jira] [Commented] (NIFI-11576) XMLReader not handling namespaced attributes correctly

    [ https://issues.apache.org/jira/browse/NIFI-11576?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17724490#comment-17724490 ] 

crissaegrim commented on NIFI-11576:
------------------------------------

h1. Schema
{code:java}
record RecordWithAttr2 {
    string? content_value;
    string attr2;
}

record RecordWithAttr1 {
    union { null, array<RecordWithAttr2>} record_with_attr2 = null;
    string? attr1 = null;
}

record EntryObjectType {
    array<RecordWithAttr1> something;
}

record ObjectDataType {
    array<EntryObjectType> entry;
}

{code}
h2. Input Data

This will work...
{code:xml}
<?xml version='1.0' encoding='UTF-8'?>
<wd:data xmlns:wd="urn:com.wd.report/foo">
    <wd:entry>
        <wd:something attr1="attr1 content">
            <wd:record_with_attr2 attr2="attr2 content 1">record_with_attr2 content 1</wd:record_with_attr2>
            <wd:record_with_attr2 attr2="attr2 content 2">record_with_attr2 content 2</wd:record_with_attr2>
        </wd:something>
    </wd:entry>
</wd:data> {code}

This will not...
{code:xml}
<?xml version='1.0' encoding='UTF-8'?>
<wd:data xmlns:wd="urn:com.wd.report/foo">
	<wd:entry>
		<wd:something wd:attr1="attr1 content">
			<wd:record_with_attr2 wd:attr2="attr2 content 1">record_with_attr2 content 1</wd:record_with_attr2>
			<wd:record_with_attr2 wd:attr2="attr2 content 2">record_with_attr2 content 2</wd:record_with_attr2>
		</wd:something>
	</wd:entry>
</wd:data>
{code}

h2. Expected Output
{code:json}
[ {
  "entry" : [ {
    "something" : [ {
      "record_with_attr2" : [ {
        "content_value" : "record_with_attr2 content 1",
        "attr2" : "attr2 content 1"
      }, {
        "content_value" : "record_with_attr2 content 2",
        "attr2" : "attr2 content 2"
      } ],
      "attr1" : "attr1 content"
    } ]
  } ]
} ]
{code}

h2. Actual Output
{code:json}
[ {
  "entry" : [ {
    "something" : [ {
      "record_with_attr2" : [ {
        "content_value" : "record_with_attr2 content 1",
        "attr2" : null
      }, {
        "content_value" : "record_with_attr2 content 2",
        "attr2" : null
      } ],
      "attr1" : null
    } ]
  } ]
} ]
{code}

h1. Schema In AVSC
{code:json}
{
  "type" : "record",
  "name" : "ObjectDataType",
  "namespace" : "nifi",
  "fields" : [ {
    "name" : "entry",
    "type" : {
      "type" : "array",
      "items" : {
        "type" : "record",
        "name" : "EntryObjectType",
        "fields" : [ {
          "name" : "something",
          "type" : {
            "type" : "array",
            "items" : {
              "type" : "record",
              "name" : "RecordWithAttr1",
              "fields" : [ {
                "name" : "record_with_attr2",
                "type" : [ "null", {
                  "type" : "array",
                  "items" : {
                    "type" : "record",
                    "name" : "RecordWithAttr2",
                    "fields" : [ {
                      "name" : "content_value",
                      "type" : [ "null", "string" ]
                    }, {
                      "name" : "attr2",
                      "type" : "string"
                    } ]
                  }
                } ],
                "default" : null
              }, {
                "name" : "attr1",
                "type" : [ "null", "string" ],
                "default" : null
              } ]
            }
          }
        } ]
      }
    }
  } ]
}

{code}

> XMLReader not handling namespaced attributes correctly
> ------------------------------------------------------
>
>                 Key: NIFI-11576
>                 URL: https://issues.apache.org/jira/browse/NIFI-11576
>             Project: Apache NiFi
>          Issue Type: Bug
>            Reporter: crissaegrim
>            Priority: Major
>
> Supposing we have
> {code:java}
> <wd:report_data xmlns:wd="">
>     <wd:record>
>         <wd:foo wd:bar="bar content">foo content</wd:foo>
>     </wd:record>
> </wd:report_data> {code}
> `XMLReader` recognizes `foo content` and extracts it correctly but is failing to pick up `bar content`.
> Workaround is to remove the NS for attr's:  `<wd:foo bar="bar content">foo content</wd:foo>`
> More details on how to repro in comments.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)