You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by mk...@apache.org on 2023/03/14 11:00:30 UTC
[solr] branch main updated: SOLR-16183: XML Loader: support indexing single nested child document (#1448)
This is an automated email from the ASF dual-hosted git repository.
mkhl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 6bd9e2a39f6 SOLR-16183: XML Loader: support indexing single nested child document (#1448)
6bd9e2a39f6 is described below
commit 6bd9e2a39f66b7ca2b8a6caee54419a8171c82dc
Author: vinayak hegde <vi...@gmail.com>
AuthorDate: Tue Mar 14 16:30:23 2023 +0530
SOLR-16183: XML Loader: support indexing single nested child document (#1448)
* support indexing single nested child document
---------
Co-authored-by: Mikhail Khludnev <mk...@apache.org>
---
solr/CHANGES.txt | 11 +++
.../org/apache/solr/handler/loader/XMLLoader.java | 55 +++++++----
.../org/apache/solr/update/AddBlockUpdateTest.java | 104 +++++++++++++++++++++
3 files changed, 150 insertions(+), 20 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bd6da8daa73..c33f66c9a77 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -34,6 +34,17 @@ Other Changes
Previously, the modules would come transitively.
(David Smiley)
+================== 9.3.0 ==================
+
+Improvements
+---------------------
+
+* SOLR-16183: XML update allows <doc> <doc name="child"> ... </doc> </doc>.
+ (Vinayak Hegde via Mikhail Khludnev)
+
+Bug Fixes
+---------------------
+
================== 9.2.0 ==================
New Features
diff --git a/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java b/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
index 08f0bf2f00b..05dbbc2b31d 100644
--- a/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
+++ b/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
@@ -315,11 +315,17 @@ public class XMLLoader extends ContentStreamLoader {
*
* @since solr 1.3
*/
- @SuppressWarnings({"unchecked"})
public SolrInputDocument readDoc(XMLStreamReader parser) throws XMLStreamException {
+ return readDoc(parser, false);
+ }
+
+ @SuppressWarnings({"unchecked"})
+ protected SolrInputDocument readDoc(XMLStreamReader parser, boolean forgiveNameAttr)
+ throws XMLStreamException {
SolrInputDocument doc = new SolrInputDocument();
- String attrName = "";
+ String attrName;
+ String attrVal;
for (int i = 0; i < parser.getAttributeCount(); i++) {
attrName = parser.getAttributeLocalName(i);
if ("boost".equals(attrName)) {
@@ -333,12 +339,14 @@ public class XMLLoader extends ContentStreamLoader {
log.debug(message);
}
} else {
- log.warn("XML element <doc> has invalid XML attr: {}", attrName);
+ if (!(NAME.equals(attrName) && forgiveNameAttr)) {
+ log.warn("XML element <doc> has invalid XML attr: {}", attrName);
+ }
}
}
StringBuilder text = new StringBuilder();
- String name = null;
+ String currentFieldName = null;
boolean isNull = false;
boolean isLabeledChildDoc = false;
String update = null;
@@ -368,10 +376,10 @@ public class XMLLoader extends ContentStreamLoader {
Object v = isNull ? null : text.toString();
if (update != null) {
if (updateMap == null) updateMap = new HashMap<>();
- Map<String, Object> extendedValues = updateMap.get(name);
+ Map<String, Object> extendedValues = updateMap.get(currentFieldName);
if (extendedValues == null) {
extendedValues = new HashMap<>(1);
- updateMap.put(name, extendedValues);
+ updateMap.put(currentFieldName, extendedValues);
}
Object val = extendedValues.get(update);
if (val == null) {
@@ -393,13 +401,13 @@ public class XMLLoader extends ContentStreamLoader {
}
if (!isLabeledChildDoc) {
// only add data if this is not a childDoc, since it was added already
- doc.addField(name, v);
+ doc.addField(currentFieldName, v);
} else {
// reset so next field is not treated as child doc
isLabeledChildDoc = false;
}
// field is over
- name = null;
+ currentFieldName = null;
}
break;
@@ -407,17 +415,27 @@ public class XMLLoader extends ContentStreamLoader {
text.setLength(0);
String localName = parser.getLocalName();
if ("doc".equals(localName)) {
- if (name != null) {
+ if (currentFieldName != null) { // enclosed in <field>
// flag to prevent spaces after doc from being added
isLabeledChildDoc = true;
- if (!doc.containsKey(name)) {
- doc.setField(name, Lists.newArrayList());
+ SolrInputDocument child = readDoc(parser);
+ if (doc.containsKey(currentFieldName)) {
+ doc.getField(currentFieldName).addValue(child);
+ } else {
+ final List<Object> list = new ArrayList<>(List.of(child));
+ doc.addField(currentFieldName, list);
+ }
+ } else {
+ final String subdocName = parser.getAttributeValue(null, NAME);
+ if (subdocName != null) { // <doc name=""> enclosed in <doc>
+ doc.addField(subdocName, readDoc(parser, true));
+ } else { // unnamed <doc> enclosed in <doc>
+ if (subDocs == null) {
+ subDocs = Lists.newArrayList();
+ }
+ subDocs.add(readDoc(parser));
}
- doc.addField(name, readDoc(parser));
- break;
}
- if (subDocs == null) subDocs = Lists.newArrayList();
- subDocs.add(readDoc(parser));
} else {
if (!"field".equals(localName)) {
String msg = "XML element <doc> has invalid XML child element: " + localName;
@@ -426,12 +444,11 @@ public class XMLLoader extends ContentStreamLoader {
}
update = null;
isNull = false;
- String attrVal = "";
for (int i = 0; i < parser.getAttributeCount(); i++) {
attrName = parser.getAttributeLocalName(i);
attrVal = parser.getAttributeValue(i);
if (NAME.equals(attrName)) {
- name = attrVal;
+ currentFieldName = attrVal;
} else if ("boost".equals(attrName)) {
String message =
"Ignoring field boost: "
@@ -457,9 +474,7 @@ public class XMLLoader extends ContentStreamLoader {
if (updateMap != null) {
for (Map.Entry<String, Map<String, Object>> entry : updateMap.entrySet()) {
- name = entry.getKey();
- Map<String, Object> value = entry.getValue();
- doc.addField(name, value);
+ doc.addField(entry.getKey(), entry.getValue());
}
}
diff --git a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
index 056a7e70ad7..385cf901d5d 100644
--- a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
+++ b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
@@ -709,6 +709,110 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
assertSingleParentOf(searcher, one("bc"), "A");
}
+ @Test
+ public void testXMLSingleLabeledNestedChild() throws IOException, XMLStreamException {
+ UpdateRequest req = new UpdateRequest();
+
+ List<SolrInputDocument> docs = new ArrayList<>();
+
+ String xml_doc1 =
+ "<doc >"
+ + " <field name=\"id\">1</field>"
+ + " <field name=\"parent_s\">A</field>"
+ + " <doc name=\"single_child\">"
+ + " <field name=\"id\">2</field>"
+ + " <field name=\"child_s\">b</field>"
+ + " </doc>"
+ + " <field name=\"children\">"
+ + " <doc>"
+ + " <field name=\"id\">3</field>"
+ + " <field name=\"child_s\">c</field>"
+ + " </doc>"
+ + " <doc>"
+ + " <field name=\"id\">4</field>"
+ + " <field name=\"child_s\">d</field>"
+ + " </doc>"
+ + " </field>"
+ + "</doc>";
+
+ String xml_doc2 =
+ "<doc >"
+ + " <field name=\"id\">5</field>"
+ + " <field name=\"parent_s\">E</field>"
+ + " <doc name=\"single_child_1\">"
+ + " <field name=\"id\">6</field>"
+ + " <field name=\"child_s\">f</field>"
+ + " </doc>"
+ + " <doc name=\"single_child_2\">"
+ + " <field name=\"id\">7</field>"
+ + " <field name=\"child_s\">g</field>"
+ + " </doc>"
+ + " <doc name=\"single_child_3\">"
+ + " <field name=\"id\">8</field>"
+ + " <field name=\"child_s\">h</field>"
+ + " </doc>"
+ + "</doc>";
+
+ XMLStreamReader parser = inputFactory.createXMLStreamReader(new StringReader(xml_doc1));
+ parser.next(); // read the START document...
+ // null for the processor is all right here
+ XMLLoader loader = new XMLLoader();
+ SolrInputDocument document1 = loader.readDoc(parser);
+
+ XMLStreamReader parser2 = inputFactory.createXMLStreamReader(new StringReader(xml_doc2));
+ parser2.next(); // read the START document...
+ // null for the processor is all right here
+ // XMLLoader loader = new XMLLoader();
+ SolrInputDocument document2 = loader.readDoc(parser2);
+
+ assertFalse(document1.hasChildDocuments());
+ assertEquals(
+ document1.toString(),
+ sdoc(
+ "id",
+ "1",
+ "parent_s",
+ "A",
+ "single_child",
+ sdoc("id", "2", "child_s", "b"),
+ "children",
+ sdocs(sdoc("id", "3", "child_s", "c"), sdoc("id", "4", "child_s", "d")))
+ .toString());
+
+ assertFalse(document2.hasChildDocuments());
+ assertEquals(
+ document2.toString(),
+ sdoc(
+ "id",
+ "5",
+ "parent_s",
+ "E",
+ "single_child_1",
+ sdoc("id", "6", "child_s", "f"),
+ "single_child_2",
+ sdoc("id", "7", "child_s", "g"),
+ "single_child_3",
+ sdoc("id", "8", "child_s", "h"))
+ .toString());
+
+ docs.add(document1);
+ docs.add(document2);
+
+ Collections.shuffle(docs, random());
+ req.add(docs);
+
+ RequestWriter requestWriter = new RequestWriter();
+ OutputStream os = new ByteArrayOutputStream();
+ requestWriter.write(req, os);
+ assertBlockU(os.toString());
+ assertU(commit());
+
+ final SolrIndexSearcher searcher = getSearcher();
+ assertSingleParentOf(searcher, "b", "A");
+ assertSingleParentOf(searcher, one("bcd"), "A");
+ assertSingleParentOf(searcher, one("fgh"), "E");
+ }
+
@Test
public void testJavaBinCodecNestedRelation() throws IOException {
SolrInputDocument topDocument = new SolrInputDocument();