You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@nifi.apache.org by "crissaegrim (Jira)" <ji...@apache.org> on 2023/05/20 06:50:00 UTC
[jira] [Commented] (NIFI-11576) XMLReader not handling namespaced attributes correctly
[ https://issues.apache.org/jira/browse/NIFI-11576?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17724490#comment-17724490 ]
crissaegrim commented on NIFI-11576:
------------------------------------
h1. Schema
{code:java}
record RecordWithAttr2 {
string? content_value;
string attr2;
}
record RecordWithAttr1 {
union { null, array<RecordWithAttr2>} record_with_attr2 = null;
string? attr1 = null;
}
record EntryObjectType {
array<RecordWithAttr1> something;
}
record ObjectDataType {
array<EntryObjectType> entry;
}
{code}
h2. Input Data
This will work...
{code:xml}
<?xml version='1.0' encoding='UTF-8'?>
<wd:data xmlns:wd="urn:com.wd.report/foo">
<wd:entry>
<wd:something attr1="attr1 content">
<wd:record_with_attr2 attr2="attr2 content 1">record_with_attr2 content 1</wd:record_with_attr2>
<wd:record_with_attr2 attr2="attr2 content 2">record_with_attr2 content 2</wd:record_with_attr2>
</wd:something>
</wd:entry>
</wd:data> {code}
This will not...
{code:xml}
<?xml version='1.0' encoding='UTF-8'?>
<wd:data xmlns:wd="urn:com.wd.report/foo">
<wd:entry>
<wd:something wd:attr1="attr1 content">
<wd:record_with_attr2 wd:attr2="attr2 content 1">record_with_attr2 content 1</wd:record_with_attr2>
<wd:record_with_attr2 wd:attr2="attr2 content 2">record_with_attr2 content 2</wd:record_with_attr2>
</wd:something>
</wd:entry>
</wd:data>
{code}
h2. Expected Output
{code:json}
[ {
"entry" : [ {
"something" : [ {
"record_with_attr2" : [ {
"content_value" : "record_with_attr2 content 1",
"attr2" : "attr2 content 1"
}, {
"content_value" : "record_with_attr2 content 2",
"attr2" : "attr2 content 2"
} ],
"attr1" : "attr1 content"
} ]
} ]
} ]
{code}
h2. Actual Output
{code:json}
[ {
"entry" : [ {
"something" : [ {
"record_with_attr2" : [ {
"content_value" : "record_with_attr2 content 1",
"attr2" : null
}, {
"content_value" : "record_with_attr2 content 2",
"attr2" : null
} ],
"attr1" : null
} ]
} ]
} ]
{code}
h1. Schema In AVSC
{code:json}
{
"type" : "record",
"name" : "ObjectDataType",
"namespace" : "nifi",
"fields" : [ {
"name" : "entry",
"type" : {
"type" : "array",
"items" : {
"type" : "record",
"name" : "EntryObjectType",
"fields" : [ {
"name" : "something",
"type" : {
"type" : "array",
"items" : {
"type" : "record",
"name" : "RecordWithAttr1",
"fields" : [ {
"name" : "record_with_attr2",
"type" : [ "null", {
"type" : "array",
"items" : {
"type" : "record",
"name" : "RecordWithAttr2",
"fields" : [ {
"name" : "content_value",
"type" : [ "null", "string" ]
}, {
"name" : "attr2",
"type" : "string"
} ]
}
} ],
"default" : null
}, {
"name" : "attr1",
"type" : [ "null", "string" ],
"default" : null
} ]
}
}
} ]
}
}
} ]
}
{code}
> XMLReader not handling namespaced attributes correctly
> ------------------------------------------------------
>
> Key: NIFI-11576
> URL: https://issues.apache.org/jira/browse/NIFI-11576
> Project: Apache NiFi
> Issue Type: Bug
> Reporter: crissaegrim
> Priority: Major
>
> Supposing we have
> {code:java}
> <wd:report_data xmlns:wd="">
> <wd:record>
> <wd:foo wd:bar="bar content">foo content</wd:foo>
> </wd:record>
> </wd:report_data> {code}
> `XMLReader` recognizes `foo content` and extracts it correctly but is failing to pick up `bar content`.
> Workaround is to remove the NS for attr's: `<wd:foo bar="bar content">foo content</wd:foo>`
> More details on how to repro in comments.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)