You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by mk...@apache.org on 2023/03/14 11:00:30 UTC

[solr] branch main updated: SOLR-16183: XML Loader: support indexing single nested child document (#1448)

This is an automated email from the ASF dual-hosted git repository.

mkhl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 6bd9e2a39f6 SOLR-16183: XML Loader: support indexing single nested child document (#1448)
6bd9e2a39f6 is described below

commit 6bd9e2a39f66b7ca2b8a6caee54419a8171c82dc
Author: vinayak hegde <vi...@gmail.com>
AuthorDate: Tue Mar 14 16:30:23 2023 +0530

    SOLR-16183: XML Loader: support indexing single nested child document (#1448)
    
    * support indexing single nested child document
    
    ---------
    
    Co-authored-by: Mikhail Khludnev <mk...@apache.org>
---
 solr/CHANGES.txt                                   |  11 +++
 .../org/apache/solr/handler/loader/XMLLoader.java  |  55 +++++++----
 .../org/apache/solr/update/AddBlockUpdateTest.java | 104 +++++++++++++++++++++
 3 files changed, 150 insertions(+), 20 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bd6da8daa73..c33f66c9a77 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -34,6 +34,17 @@ Other Changes
   Previously, the modules would come transitively.
   (David Smiley)
 
+==================  9.3.0 ==================
+
+Improvements
+---------------------
+
+* SOLR-16183: XML update allows <doc> <doc name="child"> ... </doc> </doc>.
+  (Vinayak Hegde via Mikhail Khludnev)
+
+Bug Fixes
+---------------------
+
 ==================  9.2.0 ==================
 
 New Features
diff --git a/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java b/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
index 08f0bf2f00b..05dbbc2b31d 100644
--- a/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
+++ b/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
@@ -315,11 +315,17 @@ public class XMLLoader extends ContentStreamLoader {
    *
    * @since solr 1.3
    */
-  @SuppressWarnings({"unchecked"})
   public SolrInputDocument readDoc(XMLStreamReader parser) throws XMLStreamException {
+    return readDoc(parser, false);
+  }
+
+  @SuppressWarnings({"unchecked"})
+  protected SolrInputDocument readDoc(XMLStreamReader parser, boolean forgiveNameAttr)
+      throws XMLStreamException {
     SolrInputDocument doc = new SolrInputDocument();
 
-    String attrName = "";
+    String attrName;
+    String attrVal;
     for (int i = 0; i < parser.getAttributeCount(); i++) {
       attrName = parser.getAttributeLocalName(i);
       if ("boost".equals(attrName)) {
@@ -333,12 +339,14 @@ public class XMLLoader extends ContentStreamLoader {
           log.debug(message);
         }
       } else {
-        log.warn("XML element <doc> has invalid XML attr: {}", attrName);
+        if (!(NAME.equals(attrName) && forgiveNameAttr)) {
+          log.warn("XML element <doc> has invalid XML attr: {}", attrName);
+        }
       }
     }
 
     StringBuilder text = new StringBuilder();
-    String name = null;
+    String currentFieldName = null;
     boolean isNull = false;
     boolean isLabeledChildDoc = false;
     String update = null;
@@ -368,10 +376,10 @@ public class XMLLoader extends ContentStreamLoader {
             Object v = isNull ? null : text.toString();
             if (update != null) {
               if (updateMap == null) updateMap = new HashMap<>();
-              Map<String, Object> extendedValues = updateMap.get(name);
+              Map<String, Object> extendedValues = updateMap.get(currentFieldName);
               if (extendedValues == null) {
                 extendedValues = new HashMap<>(1);
-                updateMap.put(name, extendedValues);
+                updateMap.put(currentFieldName, extendedValues);
               }
               Object val = extendedValues.get(update);
               if (val == null) {
@@ -393,13 +401,13 @@ public class XMLLoader extends ContentStreamLoader {
             }
             if (!isLabeledChildDoc) {
               // only add data if this is not a childDoc, since it was added already
-              doc.addField(name, v);
+              doc.addField(currentFieldName, v);
             } else {
               // reset so next field is not treated as child doc
               isLabeledChildDoc = false;
             }
             // field is over
-            name = null;
+            currentFieldName = null;
           }
           break;
 
@@ -407,17 +415,27 @@ public class XMLLoader extends ContentStreamLoader {
           text.setLength(0);
           String localName = parser.getLocalName();
           if ("doc".equals(localName)) {
-            if (name != null) {
+            if (currentFieldName != null) { // enclosed in <field>
               // flag to prevent spaces after doc from being added
               isLabeledChildDoc = true;
-              if (!doc.containsKey(name)) {
-                doc.setField(name, Lists.newArrayList());
+              SolrInputDocument child = readDoc(parser);
+              if (doc.containsKey(currentFieldName)) {
+                doc.getField(currentFieldName).addValue(child);
+              } else {
+                final List<Object> list = new ArrayList<>(List.of(child));
+                doc.addField(currentFieldName, list);
+              }
+            } else {
+              final String subdocName = parser.getAttributeValue(null, NAME);
+              if (subdocName != null) { // <doc name=""> enclosed in <doc>
+                doc.addField(subdocName, readDoc(parser, true));
+              } else { // unnamed <doc> enclosed in <doc>
+                if (subDocs == null) {
+                  subDocs = Lists.newArrayList();
+                }
+                subDocs.add(readDoc(parser));
               }
-              doc.addField(name, readDoc(parser));
-              break;
             }
-            if (subDocs == null) subDocs = Lists.newArrayList();
-            subDocs.add(readDoc(parser));
           } else {
             if (!"field".equals(localName)) {
               String msg = "XML element <doc> has invalid XML child element: " + localName;
@@ -426,12 +444,11 @@ public class XMLLoader extends ContentStreamLoader {
             }
             update = null;
             isNull = false;
-            String attrVal = "";
             for (int i = 0; i < parser.getAttributeCount(); i++) {
               attrName = parser.getAttributeLocalName(i);
               attrVal = parser.getAttributeValue(i);
               if (NAME.equals(attrName)) {
-                name = attrVal;
+                currentFieldName = attrVal;
               } else if ("boost".equals(attrName)) {
                 String message =
                     "Ignoring field boost: "
@@ -457,9 +474,7 @@ public class XMLLoader extends ContentStreamLoader {
 
     if (updateMap != null) {
       for (Map.Entry<String, Map<String, Object>> entry : updateMap.entrySet()) {
-        name = entry.getKey();
-        Map<String, Object> value = entry.getValue();
-        doc.addField(name, value);
+        doc.addField(entry.getKey(), entry.getValue());
       }
     }
 
diff --git a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
index 056a7e70ad7..385cf901d5d 100644
--- a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
+++ b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
@@ -709,6 +709,110 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
     assertSingleParentOf(searcher, one("bc"), "A");
   }
 
+  @Test
+  public void testXMLSingleLabeledNestedChild() throws IOException, XMLStreamException {
+    UpdateRequest req = new UpdateRequest();
+
+    List<SolrInputDocument> docs = new ArrayList<>();
+
+    String xml_doc1 =
+        "<doc >"
+            + "  <field name=\"id\">1</field>"
+            + "  <field name=\"parent_s\">A</field>"
+            + "  <doc name=\"single_child\">"
+            + "    <field name=\"id\">2</field>"
+            + "    <field name=\"child_s\">b</field>"
+            + "  </doc>"
+            + "  <field name=\"children\">"
+            + "    <doc>"
+            + "      <field name=\"id\">3</field>"
+            + "      <field name=\"child_s\">c</field>"
+            + "    </doc>"
+            + "    <doc>"
+            + "      <field name=\"id\">4</field>"
+            + "      <field name=\"child_s\">d</field>"
+            + "    </doc>"
+            + "  </field>"
+            + "</doc>";
+
+    String xml_doc2 =
+        "<doc >"
+            + "  <field name=\"id\">5</field>"
+            + "  <field name=\"parent_s\">E</field>"
+            + "  <doc name=\"single_child_1\">"
+            + "    <field name=\"id\">6</field>"
+            + "    <field name=\"child_s\">f</field>"
+            + "  </doc>"
+            + "  <doc name=\"single_child_2\">"
+            + "    <field name=\"id\">7</field>"
+            + "    <field name=\"child_s\">g</field>"
+            + "  </doc>"
+            + "  <doc name=\"single_child_3\">"
+            + "    <field name=\"id\">8</field>"
+            + "    <field name=\"child_s\">h</field>"
+            + "  </doc>"
+            + "</doc>";
+
+    XMLStreamReader parser = inputFactory.createXMLStreamReader(new StringReader(xml_doc1));
+    parser.next(); // read the START document...
+    // null for the processor is all right here
+    XMLLoader loader = new XMLLoader();
+    SolrInputDocument document1 = loader.readDoc(parser);
+
+    XMLStreamReader parser2 = inputFactory.createXMLStreamReader(new StringReader(xml_doc2));
+    parser2.next(); // read the START document...
+    // null for the processor is all right here
+    // XMLLoader loader = new XMLLoader();
+    SolrInputDocument document2 = loader.readDoc(parser2);
+
+    assertFalse(document1.hasChildDocuments());
+    assertEquals(
+        document1.toString(),
+        sdoc(
+                "id",
+                "1",
+                "parent_s",
+                "A",
+                "single_child",
+                sdoc("id", "2", "child_s", "b"),
+                "children",
+                sdocs(sdoc("id", "3", "child_s", "c"), sdoc("id", "4", "child_s", "d")))
+            .toString());
+
+    assertFalse(document2.hasChildDocuments());
+    assertEquals(
+        document2.toString(),
+        sdoc(
+                "id",
+                "5",
+                "parent_s",
+                "E",
+                "single_child_1",
+                sdoc("id", "6", "child_s", "f"),
+                "single_child_2",
+                sdoc("id", "7", "child_s", "g"),
+                "single_child_3",
+                sdoc("id", "8", "child_s", "h"))
+            .toString());
+
+    docs.add(document1);
+    docs.add(document2);
+
+    Collections.shuffle(docs, random());
+    req.add(docs);
+
+    RequestWriter requestWriter = new RequestWriter();
+    OutputStream os = new ByteArrayOutputStream();
+    requestWriter.write(req, os);
+    assertBlockU(os.toString());
+    assertU(commit());
+
+    final SolrIndexSearcher searcher = getSearcher();
+    assertSingleParentOf(searcher, "b", "A");
+    assertSingleParentOf(searcher, one("bcd"), "A");
+    assertSingleParentOf(searcher, one("fgh"), "E");
+  }
+
   @Test
   public void testJavaBinCodecNestedRelation() throws IOException {
     SolrInputDocument topDocument = new SolrInputDocument();