You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/08/10 09:13:52 UTC

[12/31] lucene-solr:jira/http2: SOLR-12485: Solr-Update XML format now accepts child documents under a field thus providing a semantic relationship. (like allowed for JSON). Improved XML.java a bit Closes #430

SOLR-12485: Solr-Update XML format now accepts child documents under a field
thus providing a semantic relationship.  (like allowed for JSON).
Improved XML.java a bit
Closes #430


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/38bf976c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/38bf976c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/38bf976c

Branch: refs/heads/jira/http2
Commit: 38bf976cd4b9e324c21664bd7ae3d554df803705
Parents: c3887b3
Author: Moshe <mo...@mail.com>
Authored: Tue Aug 7 13:52:11 2018 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Tue Aug 7 13:52:11 2018 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../apache/solr/handler/loader/XMLLoader.java   |  18 ++-
 .../apache/solr/update/AddBlockUpdateTest.java  | 148 +++++++++++++++++++
 .../solr/client/solrj/util/ClientUtils.java     |  20 ++-
 .../java/org/apache/solr/common/util/XML.java   |  98 +++---------
 .../org/apache/solr/util/BaseTestHarness.java   |  23 +--
 6 files changed, 215 insertions(+), 95 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 94960bb..3ede98b 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -150,6 +150,9 @@ New Features
 
 * SOLR-12592: support #EQUAL function, range operator, decimal and percentage in cores in autoscaling policies (noble)
 
+* SOLR-12485: Uploading docs in XML now supports child documents as field values, thus providing a label to the
+  relationship instead of the current "anonymous" relationship. (Moshe Bla, David Smiley)
+
 Bug Fixes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java b/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
index a07aff2..724a40c 100644
--- a/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
+++ b/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
@@ -404,6 +404,7 @@ public class XMLLoader extends ContentStreamLoader {
     StringBuilder text = new StringBuilder();
     String name = null;
     boolean isNull = false;
+    boolean isLabeledChildDoc = false;
     String update = null;
     Collection<SolrInputDocument> subDocs = null;
     Map<String, Map<String, Object>> updateMap = null;
@@ -453,7 +454,13 @@ public class XMLLoader extends ContentStreamLoader {
               }
               break;
             }
-            doc.addField(name, v);
+            if(!isLabeledChildDoc){
+              // only add data if this is not a childDoc, since it was added already
+              doc.addField(name, v);
+            } else {
+              // reset so next field is not treated as child doc
+              isLabeledChildDoc = false;
+            }
             // field is over
             name = null;
           }
@@ -463,6 +470,15 @@ public class XMLLoader extends ContentStreamLoader {
           text.setLength(0);
           String localName = parser.getLocalName();
           if ("doc".equals(localName)) {
+            if(name != null) {
+              // flag to prevent spaces after doc from being added
+              isLabeledChildDoc = true;
+              if(!doc.containsKey(name)) {
+                doc.setField(name, Lists.newArrayList());
+              }
+              doc.addField(name, readDoc(parser));
+              break;
+            }
             if (subDocs == null)
               subDocs = Lists.newArrayList();
             subDocs.add(readDoc(parser));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
index a302d58..50c0c39 100644
--- a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
+++ b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
@@ -502,6 +502,154 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
   }
 
   @Test
+  public void testXMLMultiLevelLabeledChildren() throws IOException, XMLStreamException {
+    String xml_doc1 =
+        "<doc >" +
+            "  <field name=\"id\">1</field>" +
+            "  <field name=\"empty_s\"></field>" +
+            "  <field name=\"parent_s\">X</field>" +
+            "  <field name=\"test\">" +
+            "    <doc>  " +
+            "      <field name=\"id\" >2</field>" +
+            "      <field name=\"child_s\">y</field>" +
+            "    </doc>" +
+            "    <doc>  " +
+            "      <field name=\"id\" >3</field>" +
+            "      <field name=\"child_s\">z</field>" +
+            "    </doc>" +
+            "  </field> " +
+            "</doc>";
+
+    String xml_doc2 =
+        "<doc >" +
+            "  <field name=\"id\">4</field>" +
+            "  <field name=\"parent_s\">A</field>" +
+            "  <field name=\"test\">" +
+            "    <doc>  " +
+            "      <field name=\"id\" >5</field>" +
+            "      <field name=\"child_s\">b</field>" +
+            "      <field name=\"grandChild\">" +
+            "        <doc>  " +
+            "          <field name=\"id\" >7</field>" +
+            "          <field name=\"child_s\">d</field>" +
+            "        </doc>" +
+            "      </field>" +
+            "    </doc>" +
+            "  </field>" +
+            "  <field name=\"test\">" +
+            "    <doc>  " +
+            "      <field name=\"id\" >6</field>" +
+            "      <field name=\"child_s\">c</field>" +
+            "    </doc>" +
+            "  </field> " +
+            "</doc>";
+
+    XMLStreamReader parser =
+        inputFactory.createXMLStreamReader(new StringReader(xml_doc1));
+    parser.next(); // read the START document...
+    //null for the processor is all right here
+    XMLLoader loader = new XMLLoader();
+    SolrInputDocument document1 = loader.readDoc(parser);
+
+    XMLStreamReader parser2 =
+        inputFactory.createXMLStreamReader(new StringReader(xml_doc2));
+    parser2.next(); // read the START document...
+    //null for the processor is all right here
+    //XMLLoader loader = new XMLLoader();
+    SolrInputDocument document2 = loader.readDoc(parser2);
+
+    assertFalse(document1.hasChildDocuments());
+    assertEquals(document1.toString(), sdoc("id", "1", "empty_s", "", "parent_s", "X", "test",
+        sdocs(sdoc("id", "2", "child_s", "y"), sdoc("id", "3", "child_s", "z"))).toString());
+
+    assertFalse(document2.hasChildDocuments());
+    assertEquals(document2.toString(), sdoc("id", "4", "parent_s", "A", "test",
+        sdocs(sdoc("id", "5", "child_s", "b", "grandChild", Collections.singleton(sdoc("id", "7", "child_s", "d"))),
+            sdoc("id", "6", "child_s", "c"))).toString());
+  }
+
+  @Test
+  public void testXMLLabeledChildren() throws IOException, XMLStreamException {
+    UpdateRequest req = new UpdateRequest();
+
+    List<SolrInputDocument> docs = new ArrayList<>();
+
+    String xml_doc1 =
+        "<doc >" +
+            "  <field name=\"id\">1</field>" +
+            "  <field name=\"empty_s\"></field>" +
+            "  <field name=\"parent_s\">X</field>" +
+            "  <field name=\"test\">" +
+            "    <doc>  " +
+            "      <field name=\"id\" >2</field>" +
+            "      <field name=\"child_s\">y</field>" +
+            "    </doc>"+
+            "    <doc>  " +
+            "      <field name=\"id\" >3</field>" +
+            "      <field name=\"child_s\">z</field>" +
+            "    </doc>" +
+            "  </field> " +
+            "</doc>";
+
+    String xml_doc2 =
+        "<doc >" +
+            "  <field name=\"id\">4</field>" +
+            "  <field name=\"parent_s\">A</field>" +
+            "  <field name=\"test\">" +
+            "    <doc>  " +
+            "      <field name=\"id\" >5</field>" +
+            "      <field name=\"child_s\">b</field>" +
+            "    </doc>"+
+            "  </field>" +
+            "  <field name=\"test\">" +
+            "    <doc>  " +
+            "      <field name=\"id\" >6</field>" +
+            "      <field name=\"child_s\">c</field>" +
+            "    </doc>" +
+            "  </field> " +
+            "</doc>";
+
+    XMLStreamReader parser =
+        inputFactory.createXMLStreamReader( new StringReader( xml_doc1 ) );
+    parser.next(); // read the START document...
+    //null for the processor is all right here
+    XMLLoader loader = new XMLLoader();
+    SolrInputDocument document1 = loader.readDoc( parser );
+
+    XMLStreamReader parser2 =
+        inputFactory.createXMLStreamReader( new StringReader( xml_doc2 ) );
+    parser2.next(); // read the START document...
+    //null for the processor is all right here
+    //XMLLoader loader = new XMLLoader();
+    SolrInputDocument document2 = loader.readDoc( parser2 );
+
+    assertFalse(document1.hasChildDocuments());
+    assertEquals(document1.toString(), sdoc("id", "1", "empty_s", "", "parent_s", "X", "test",
+        sdocs(sdoc("id", "2", "child_s", "y"), sdoc("id", "3", "child_s", "z"))).toString());
+
+    assertFalse(document2.hasChildDocuments());
+    assertEquals(document2.toString(), sdoc("id", "4", "parent_s", "A", "test",
+        sdocs(sdoc("id", "5", "child_s", "b"), sdoc("id", "6", "child_s", "c"))).toString());
+
+    docs.add(document1);
+    docs.add(document2);
+
+    Collections.shuffle(docs, random());
+    req.add(docs);
+
+    RequestWriter requestWriter = new RequestWriter();
+    OutputStream os = new ByteArrayOutputStream();
+    requestWriter.write(req, os);
+    assertBlockU(os.toString());
+    assertU(commit());
+
+    final SolrIndexSearcher searcher = getSearcher();
+    assertSingleParentOf(searcher, one("yz"), "X");
+    assertSingleParentOf(searcher, one("bc"), "A");
+
+  }
+
+  @Test
   public void testJavaBinCodecNestedRelation() throws IOException {
     SolrInputDocument topDocument = new SolrInputDocument();
     topDocument.addField("parent_f1", "v1");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java b/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java
index 54986db..26a188d 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java
@@ -72,7 +72,9 @@ public class ClientUtils
       for( Object v : field ) {
         String update = null;
 
-        if (v instanceof Map) {
+        if(v instanceof SolrInputDocument) {
+          writeVal(writer, name, v , null);
+        } else if (v instanceof Map) {
           // currently only supports a single value
           for (Entry<Object,Object> entry : ((Map<Object,Object>)v).entrySet()) {
             update = entry.getKey().toString();
@@ -112,20 +114,28 @@ public class ClientUtils
       v = Base64.byteArrayToBase64(bytes.array(), bytes.position(),bytes.limit() - bytes.position());
     }
 
+    XML.Writable valWriter = null;
+    if(v instanceof SolrInputDocument) {
+      final SolrInputDocument solrDoc = (SolrInputDocument) v;
+      valWriter = (writer1) -> writeXML(solrDoc, writer1);
+    } else if(v != null) {
+      final Object val = v;
+      valWriter = (writer1) -> XML.escapeCharData(val.toString(), writer1);
+    }
+
     if (update == null) {
       if (v != null) {
-        XML.writeXML(writer, "field", v.toString(), "name", name );
+        XML.writeXML(writer, "field", valWriter, "name", name);
       }
     } else {
       if (v == null)  {
-        XML.writeXML(writer, "field", null, "name", name, "update", update, "null", true);
+        XML.writeXML(writer, "field", (XML.Writable) null, "name", name, "update", update, "null", true);
       } else  {
-        XML.writeXML(writer, "field", v.toString(), "name", name, "update", update);
+        XML.writeXML(writer, "field", valWriter, "name", name, "update", update);
       }
     }
   }
 
-
   public static String toXML( SolrInputDocument doc )
   {
     StringWriter str = new StringWriter();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/solrj/src/java/org/apache/solr/common/util/XML.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/XML.java b/solr/solrj/src/java/org/apache/solr/common/util/XML.java
index c6e5205..9d1b8a8 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/XML.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/XML.java
@@ -16,9 +16,10 @@
  */
 package org.apache.solr.common.util;
 
-import java.io.Writer;
 import java.io.IOException;
+import java.io.Writer;
 import java.util.Map;
+import java.util.stream.Stream;
 
 /**
  *
@@ -37,9 +38,7 @@ public class XML {
   private static final String[] attribute_escapes=
   {"#0;","#1;","#2;","#3;","#4;","#5;","#6;","#7;","#8;",null,null,"#11;","#12;",null,"#14;","#15;","#16;","#17;","#18;","#19;","#20;","#21;","#22;","#23;","#24;","#25;","#26;","#27;","#28;","#29;","#30;","#31;",null,null,"&quot;",null,null,null,"&amp;",null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,"&lt;"};
 
-
-
-  /*****************************************
+  /*
    #Simple python script used to generate the escape table above.  -YCS
    #
    #use individual char arrays or one big char array for better efficiency
@@ -65,13 +64,9 @@ public class XML {
      result += val + ','
 
    print result
-   ****************************************/
+  */
 
 
-/*********
- *
- * @throws IOException If there is a low-level I/O error.
- */
   public static void escapeCharData(String str, Writer out) throws IOException {
     escape(str, out, chardata_escapes);
   }
@@ -84,53 +79,28 @@ public class XML {
     escape(chars, start, length, out, attribute_escapes);
   }
 
+  /** does NOT escape character data in val; it must already be valid XML.  Attributes are always escaped. */
+  public final static void writeUnescapedXML(Writer out, String tag, String val, Object... attrs) throws IOException {
+    writeXML(out, tag, (writer1) -> writer1.write(val), attrs);
+  }
 
-  public final static void writeXML(Writer out, String tag, String val) throws IOException {
-    out.write('<');
-    out.write(tag);
-    if (val == null) {
-      out.write('/');
-      out.write('>');
-    } else {
-      out.write('>');
-      escapeCharData(val,out);
-      out.write('<');
-      out.write('/');
-      out.write(tag);
-      out.write('>');
-    }
+  /** escapes character data in val and attributes */
+  public final static void writeXML(Writer out, String tag, String val, Object... attrs) throws IOException {
+    final Writable writable = val != null ? (writer1) -> XML.escapeCharData(val, writer1) : null;
+    writeXML(out, tag, writable, attrs);
   }
 
-  /** does NOT escape character data in val, must already be valid XML */
-  public final static void writeUnescapedXML(Writer out, String tag, String val, Object... attrs) throws IOException {
-    out.write('<');
-    out.write(tag);
-    for (int i=0; i<attrs.length; i++) {
-      out.write(' ');
-      out.write(attrs[i++].toString());
-      out.write('=');
-      out.write('"');
-      out.write(attrs[i].toString());
-      out.write('"');
-    }
-    if (val == null) {
-      out.write('/');
-      out.write('>');
-    } else {
-      out.write('>');
-      out.write(val);
-      out.write('<');
-      out.write('/');
-      out.write(tag);
-      out.write('>');
-    }
+  /** escapes character data in val and attributes */
+  public static void writeXML(Writer out, String tag, String val, Map<String, String> attrs) throws IOException {
+    writeXML(out, tag, val, attrs.entrySet().stream().flatMap((entry) -> Stream.of(entry.getKey(), entry.getValue())).toArray());
   }
 
-  /** escapes character data in val */
-  public final static void writeXML(Writer out, String tag, String val, Object... attrs) throws IOException {
+  /** @lucene.internal */
+  public final static void writeXML(Writer out, String tag, Writable valWritable, Object... attrs) throws IOException {
     out.write('<');
     out.write(tag);
-    for (int i=0; i<attrs.length; i++) {
+    final int attrsLen = attrs == null ? 0 : attrs.length;
+    for (int i = 0; i< attrsLen; i++) {
       out.write(' ');
       out.write(attrs[i++].toString());
       out.write('=');
@@ -138,12 +108,12 @@ public class XML {
       escapeAttributeValue(attrs[i].toString(), out);
       out.write('"');
     }
-    if (val == null) {
+    if (valWritable == null) {
       out.write('/');
       out.write('>');
     } else {
       out.write('>');
-      escapeCharData(val,out);
+      valWritable.write(out);
       out.write('<');
       out.write('/');
       out.write(tag);
@@ -151,29 +121,9 @@ public class XML {
     }
   }
 
-  /** escapes character data in val */
-  public static void writeXML(Writer out, String tag, String val, Map<String, String> attrs) throws IOException {
-    out.write('<');
-    out.write(tag);
-    for (Map.Entry<String, String> entry : attrs.entrySet()) {
-      out.write(' ');
-      out.write(entry.getKey());
-      out.write('=');
-      out.write('"');
-      escapeAttributeValue(entry.getValue(), out);
-      out.write('"');
-    }
-    if (val == null) {
-      out.write('/');
-      out.write('>');
-    } else {
-      out.write('>');
-      escapeCharData(val,out);
-      out.write('<');
-      out.write('/');
-      out.write(tag);
-      out.write('>');
-    }
+  @FunctionalInterface
+  public interface Writable {
+    void write(Writer w) throws IOException;
   }
 
   private static void escape(char [] chars, int offset, int length, Writer out, String [] escapes) throws IOException{

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java b/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java
index a84d6d1..ad1d38e 100644
--- a/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java
+++ b/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java
@@ -15,11 +15,6 @@
  * limitations under the License.
  */
 package org.apache.solr.util;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.util.XML;
-import org.w3c.dom.Document;
-import org.xml.sax.SAXException;
-
 import javax.xml.namespace.QName;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
@@ -27,13 +22,17 @@ import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpressionException;
 import javax.xml.xpath.XPathFactory;
-
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.StringWriter;
 import java.io.UnsupportedEncodingException;
 import java.nio.charset.StandardCharsets;
 
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.XML;
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
+
 abstract public class BaseTestHarness {
   private static final ThreadLocal<DocumentBuilder> builderTL = new ThreadLocal<>();
   private static final ThreadLocal<XPath> xpathTL = new ThreadLocal<>();
@@ -200,15 +199,9 @@ abstract public class BaseTestHarness {
 
   public static String simpleTag(String tag, String... args) {
     try {
-      StringWriter r = new StringWriter();
-
-      // this is annoying
-      if (null == args || 0 == args.length) {
-        XML.writeXML(r, tag, null);
-      } else {
-        XML.writeXML(r, tag, null, (Object[])args);
-      }
-      return r.getBuffer().toString();
+      StringWriter writer = new StringWriter();
+      XML.writeXML(writer, tag, (String) null, (Object[])args);
+      return writer.getBuffer().toString();
     } catch (IOException e) {
       throw new RuntimeException
           ("this should never happen with a StringWriter", e);