You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/08/09 19:27:05 UTC
[03/17] lucene-solr:jira/solr-12470: SOLR-12485: Solr-Update XML
format now accepts child documents under a field thus providing a semantic
relationship. (like allowed for JSON). Improved XML.java a bit Closes #430
SOLR-12485: Solr-Update XML format now accepts child documents under a field
thus providing a semantic relationship. (like allowed for JSON).
Improved XML.java a bit
Closes #430
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/38bf976c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/38bf976c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/38bf976c
Branch: refs/heads/jira/solr-12470
Commit: 38bf976cd4b9e324c21664bd7ae3d554df803705
Parents: c3887b3
Author: Moshe <mo...@mail.com>
Authored: Tue Aug 7 13:52:11 2018 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Tue Aug 7 13:52:11 2018 -0400
----------------------------------------------------------------------
solr/CHANGES.txt | 3 +
.../apache/solr/handler/loader/XMLLoader.java | 18 ++-
.../apache/solr/update/AddBlockUpdateTest.java | 148 +++++++++++++++++++
.../solr/client/solrj/util/ClientUtils.java | 20 ++-
.../java/org/apache/solr/common/util/XML.java | 98 +++---------
.../org/apache/solr/util/BaseTestHarness.java | 23 +--
6 files changed, 215 insertions(+), 95 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 94960bb..3ede98b 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -150,6 +150,9 @@ New Features
* SOLR-12592: support #EQUAL function, range operator, decimal and percentage in cores in autoscaling policies (noble)
+* SOLR-12485: Uploading docs in XML now supports child documents as field values, thus providing a label to the
+ relationship instead of the current "anonymous" relationship. (Moshe Bla, David Smiley)
+
Bug Fixes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java b/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
index a07aff2..724a40c 100644
--- a/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
+++ b/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
@@ -404,6 +404,7 @@ public class XMLLoader extends ContentStreamLoader {
StringBuilder text = new StringBuilder();
String name = null;
boolean isNull = false;
+ boolean isLabeledChildDoc = false;
String update = null;
Collection<SolrInputDocument> subDocs = null;
Map<String, Map<String, Object>> updateMap = null;
@@ -453,7 +454,13 @@ public class XMLLoader extends ContentStreamLoader {
}
break;
}
- doc.addField(name, v);
+ if(!isLabeledChildDoc){
+ // only add data if this is not a childDoc, since it was added already
+ doc.addField(name, v);
+ } else {
+ // reset so next field is not treated as child doc
+ isLabeledChildDoc = false;
+ }
// field is over
name = null;
}
@@ -463,6 +470,15 @@ public class XMLLoader extends ContentStreamLoader {
text.setLength(0);
String localName = parser.getLocalName();
if ("doc".equals(localName)) {
+ if(name != null) {
+ // flag to prevent spaces after doc from being added
+ isLabeledChildDoc = true;
+ if(!doc.containsKey(name)) {
+ doc.setField(name, Lists.newArrayList());
+ }
+ doc.addField(name, readDoc(parser));
+ break;
+ }
if (subDocs == null)
subDocs = Lists.newArrayList();
subDocs.add(readDoc(parser));
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
index a302d58..50c0c39 100644
--- a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
+++ b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
@@ -502,6 +502,154 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
}
@Test
+ public void testXMLMultiLevelLabeledChildren() throws IOException, XMLStreamException {
+ String xml_doc1 =
+ "<doc >" +
+ " <field name=\"id\">1</field>" +
+ " <field name=\"empty_s\"></field>" +
+ " <field name=\"parent_s\">X</field>" +
+ " <field name=\"test\">" +
+ " <doc> " +
+ " <field name=\"id\" >2</field>" +
+ " <field name=\"child_s\">y</field>" +
+ " </doc>" +
+ " <doc> " +
+ " <field name=\"id\" >3</field>" +
+ " <field name=\"child_s\">z</field>" +
+ " </doc>" +
+ " </field> " +
+ "</doc>";
+
+ String xml_doc2 =
+ "<doc >" +
+ " <field name=\"id\">4</field>" +
+ " <field name=\"parent_s\">A</field>" +
+ " <field name=\"test\">" +
+ " <doc> " +
+ " <field name=\"id\" >5</field>" +
+ " <field name=\"child_s\">b</field>" +
+ " <field name=\"grandChild\">" +
+ " <doc> " +
+ " <field name=\"id\" >7</field>" +
+ " <field name=\"child_s\">d</field>" +
+ " </doc>" +
+ " </field>" +
+ " </doc>" +
+ " </field>" +
+ " <field name=\"test\">" +
+ " <doc> " +
+ " <field name=\"id\" >6</field>" +
+ " <field name=\"child_s\">c</field>" +
+ " </doc>" +
+ " </field> " +
+ "</doc>";
+
+ XMLStreamReader parser =
+ inputFactory.createXMLStreamReader(new StringReader(xml_doc1));
+ parser.next(); // read the START document...
+ //null for the processor is all right here
+ XMLLoader loader = new XMLLoader();
+ SolrInputDocument document1 = loader.readDoc(parser);
+
+ XMLStreamReader parser2 =
+ inputFactory.createXMLStreamReader(new StringReader(xml_doc2));
+ parser2.next(); // read the START document...
+ //null for the processor is all right here
+ //XMLLoader loader = new XMLLoader();
+ SolrInputDocument document2 = loader.readDoc(parser2);
+
+ assertFalse(document1.hasChildDocuments());
+ assertEquals(document1.toString(), sdoc("id", "1", "empty_s", "", "parent_s", "X", "test",
+ sdocs(sdoc("id", "2", "child_s", "y"), sdoc("id", "3", "child_s", "z"))).toString());
+
+ assertFalse(document2.hasChildDocuments());
+ assertEquals(document2.toString(), sdoc("id", "4", "parent_s", "A", "test",
+ sdocs(sdoc("id", "5", "child_s", "b", "grandChild", Collections.singleton(sdoc("id", "7", "child_s", "d"))),
+ sdoc("id", "6", "child_s", "c"))).toString());
+ }
+
+ @Test
+ public void testXMLLabeledChildren() throws IOException, XMLStreamException {
+ UpdateRequest req = new UpdateRequest();
+
+ List<SolrInputDocument> docs = new ArrayList<>();
+
+ String xml_doc1 =
+ "<doc >" +
+ " <field name=\"id\">1</field>" +
+ " <field name=\"empty_s\"></field>" +
+ " <field name=\"parent_s\">X</field>" +
+ " <field name=\"test\">" +
+ " <doc> " +
+ " <field name=\"id\" >2</field>" +
+ " <field name=\"child_s\">y</field>" +
+ " </doc>"+
+ " <doc> " +
+ " <field name=\"id\" >3</field>" +
+ " <field name=\"child_s\">z</field>" +
+ " </doc>" +
+ " </field> " +
+ "</doc>";
+
+ String xml_doc2 =
+ "<doc >" +
+ " <field name=\"id\">4</field>" +
+ " <field name=\"parent_s\">A</field>" +
+ " <field name=\"test\">" +
+ " <doc> " +
+ " <field name=\"id\" >5</field>" +
+ " <field name=\"child_s\">b</field>" +
+ " </doc>"+
+ " </field>" +
+ " <field name=\"test\">" +
+ " <doc> " +
+ " <field name=\"id\" >6</field>" +
+ " <field name=\"child_s\">c</field>" +
+ " </doc>" +
+ " </field> " +
+ "</doc>";
+
+ XMLStreamReader parser =
+ inputFactory.createXMLStreamReader( new StringReader( xml_doc1 ) );
+ parser.next(); // read the START document...
+ //null for the processor is all right here
+ XMLLoader loader = new XMLLoader();
+ SolrInputDocument document1 = loader.readDoc( parser );
+
+ XMLStreamReader parser2 =
+ inputFactory.createXMLStreamReader( new StringReader( xml_doc2 ) );
+ parser2.next(); // read the START document...
+ //null for the processor is all right here
+ //XMLLoader loader = new XMLLoader();
+ SolrInputDocument document2 = loader.readDoc( parser2 );
+
+ assertFalse(document1.hasChildDocuments());
+ assertEquals(document1.toString(), sdoc("id", "1", "empty_s", "", "parent_s", "X", "test",
+ sdocs(sdoc("id", "2", "child_s", "y"), sdoc("id", "3", "child_s", "z"))).toString());
+
+ assertFalse(document2.hasChildDocuments());
+ assertEquals(document2.toString(), sdoc("id", "4", "parent_s", "A", "test",
+ sdocs(sdoc("id", "5", "child_s", "b"), sdoc("id", "6", "child_s", "c"))).toString());
+
+ docs.add(document1);
+ docs.add(document2);
+
+ Collections.shuffle(docs, random());
+ req.add(docs);
+
+ RequestWriter requestWriter = new RequestWriter();
+ OutputStream os = new ByteArrayOutputStream();
+ requestWriter.write(req, os);
+ assertBlockU(os.toString());
+ assertU(commit());
+
+ final SolrIndexSearcher searcher = getSearcher();
+ assertSingleParentOf(searcher, one("yz"), "X");
+ assertSingleParentOf(searcher, one("bc"), "A");
+
+ }
+
+ @Test
public void testJavaBinCodecNestedRelation() throws IOException {
SolrInputDocument topDocument = new SolrInputDocument();
topDocument.addField("parent_f1", "v1");
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java b/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java
index 54986db..26a188d 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java
@@ -72,7 +72,9 @@ public class ClientUtils
for( Object v : field ) {
String update = null;
- if (v instanceof Map) {
+ if(v instanceof SolrInputDocument) {
+ writeVal(writer, name, v , null);
+ } else if (v instanceof Map) {
// currently only supports a single value
for (Entry<Object,Object> entry : ((Map<Object,Object>)v).entrySet()) {
update = entry.getKey().toString();
@@ -112,20 +114,28 @@ public class ClientUtils
v = Base64.byteArrayToBase64(bytes.array(), bytes.position(),bytes.limit() - bytes.position());
}
+ XML.Writable valWriter = null;
+ if(v instanceof SolrInputDocument) {
+ final SolrInputDocument solrDoc = (SolrInputDocument) v;
+ valWriter = (writer1) -> writeXML(solrDoc, writer1);
+ } else if(v != null) {
+ final Object val = v;
+ valWriter = (writer1) -> XML.escapeCharData(val.toString(), writer1);
+ }
+
if (update == null) {
if (v != null) {
- XML.writeXML(writer, "field", v.toString(), "name", name );
+ XML.writeXML(writer, "field", valWriter, "name", name);
}
} else {
if (v == null) {
- XML.writeXML(writer, "field", null, "name", name, "update", update, "null", true);
+ XML.writeXML(writer, "field", (XML.Writable) null, "name", name, "update", update, "null", true);
} else {
- XML.writeXML(writer, "field", v.toString(), "name", name, "update", update);
+ XML.writeXML(writer, "field", valWriter, "name", name, "update", update);
}
}
}
-
public static String toXML( SolrInputDocument doc )
{
StringWriter str = new StringWriter();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/solrj/src/java/org/apache/solr/common/util/XML.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/XML.java b/solr/solrj/src/java/org/apache/solr/common/util/XML.java
index c6e5205..9d1b8a8 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/XML.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/XML.java
@@ -16,9 +16,10 @@
*/
package org.apache.solr.common.util;
-import java.io.Writer;
import java.io.IOException;
+import java.io.Writer;
import java.util.Map;
+import java.util.stream.Stream;
/**
*
@@ -37,9 +38,7 @@ public class XML {
private static final String[] attribute_escapes=
{"#0;","#1;","#2;","#3;","#4;","#5;","#6;","#7;","#8;",null,null,"#11;","#12;",null,"#14;","#15;","#16;","#17;","#18;","#19;","#20;","#21;","#22;","#23;","#24;","#25;","#26;","#27;","#28;","#29;","#30;","#31;",null,null,""",null,null,null,"&",null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,"<"};
-
-
- /*****************************************
+ /*
#Simple python script used to generate the escape table above. -YCS
#
#use individual char arrays or one big char array for better efficiency
@@ -65,13 +64,9 @@ public class XML {
result += val + ','
print result
- ****************************************/
+ */
-/*********
- *
- * @throws IOException If there is a low-level I/O error.
- */
public static void escapeCharData(String str, Writer out) throws IOException {
escape(str, out, chardata_escapes);
}
@@ -84,53 +79,28 @@ public class XML {
escape(chars, start, length, out, attribute_escapes);
}
+ /** does NOT escape character data in val; it must already be valid XML. Attributes are always escaped. */
+ public final static void writeUnescapedXML(Writer out, String tag, String val, Object... attrs) throws IOException {
+ writeXML(out, tag, (writer1) -> writer1.write(val), attrs);
+ }
- public final static void writeXML(Writer out, String tag, String val) throws IOException {
- out.write('<');
- out.write(tag);
- if (val == null) {
- out.write('/');
- out.write('>');
- } else {
- out.write('>');
- escapeCharData(val,out);
- out.write('<');
- out.write('/');
- out.write(tag);
- out.write('>');
- }
+ /** escapes character data in val and attributes */
+ public final static void writeXML(Writer out, String tag, String val, Object... attrs) throws IOException {
+ final Writable writable = val != null ? (writer1) -> XML.escapeCharData(val, writer1) : null;
+ writeXML(out, tag, writable, attrs);
}
- /** does NOT escape character data in val, must already be valid XML */
- public final static void writeUnescapedXML(Writer out, String tag, String val, Object... attrs) throws IOException {
- out.write('<');
- out.write(tag);
- for (int i=0; i<attrs.length; i++) {
- out.write(' ');
- out.write(attrs[i++].toString());
- out.write('=');
- out.write('"');
- out.write(attrs[i].toString());
- out.write('"');
- }
- if (val == null) {
- out.write('/');
- out.write('>');
- } else {
- out.write('>');
- out.write(val);
- out.write('<');
- out.write('/');
- out.write(tag);
- out.write('>');
- }
+ /** escapes character data in val and attributes */
+ public static void writeXML(Writer out, String tag, String val, Map<String, String> attrs) throws IOException {
+ writeXML(out, tag, val, attrs.entrySet().stream().flatMap((entry) -> Stream.of(entry.getKey(), entry.getValue())).toArray());
}
- /** escapes character data in val */
- public final static void writeXML(Writer out, String tag, String val, Object... attrs) throws IOException {
+ /** @lucene.internal */
+ public final static void writeXML(Writer out, String tag, Writable valWritable, Object... attrs) throws IOException {
out.write('<');
out.write(tag);
- for (int i=0; i<attrs.length; i++) {
+ final int attrsLen = attrs == null ? 0 : attrs.length;
+ for (int i = 0; i< attrsLen; i++) {
out.write(' ');
out.write(attrs[i++].toString());
out.write('=');
@@ -138,12 +108,12 @@ public class XML {
escapeAttributeValue(attrs[i].toString(), out);
out.write('"');
}
- if (val == null) {
+ if (valWritable == null) {
out.write('/');
out.write('>');
} else {
out.write('>');
- escapeCharData(val,out);
+ valWritable.write(out);
out.write('<');
out.write('/');
out.write(tag);
@@ -151,29 +121,9 @@ public class XML {
}
}
- /** escapes character data in val */
- public static void writeXML(Writer out, String tag, String val, Map<String, String> attrs) throws IOException {
- out.write('<');
- out.write(tag);
- for (Map.Entry<String, String> entry : attrs.entrySet()) {
- out.write(' ');
- out.write(entry.getKey());
- out.write('=');
- out.write('"');
- escapeAttributeValue(entry.getValue(), out);
- out.write('"');
- }
- if (val == null) {
- out.write('/');
- out.write('>');
- } else {
- out.write('>');
- escapeCharData(val,out);
- out.write('<');
- out.write('/');
- out.write(tag);
- out.write('>');
- }
+ @FunctionalInterface
+ public interface Writable {
+ void write(Writer w) throws IOException;
}
private static void escape(char [] chars, int offset, int length, Writer out, String [] escapes) throws IOException{
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38bf976c/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java b/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java
index a84d6d1..ad1d38e 100644
--- a/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java
+++ b/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java
@@ -15,11 +15,6 @@
* limitations under the License.
*/
package org.apache.solr.util;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.util.XML;
-import org.w3c.dom.Document;
-import org.xml.sax.SAXException;
-
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
@@ -27,13 +22,17 @@ import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
-
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.XML;
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
+
abstract public class BaseTestHarness {
private static final ThreadLocal<DocumentBuilder> builderTL = new ThreadLocal<>();
private static final ThreadLocal<XPath> xpathTL = new ThreadLocal<>();
@@ -200,15 +199,9 @@ abstract public class BaseTestHarness {
public static String simpleTag(String tag, String... args) {
try {
- StringWriter r = new StringWriter();
-
- // this is annoying
- if (null == args || 0 == args.length) {
- XML.writeXML(r, tag, null);
- } else {
- XML.writeXML(r, tag, null, (Object[])args);
- }
- return r.getBuffer().toString();
+ StringWriter writer = new StringWriter();
+ XML.writeXML(writer, tag, (String) null, (Object[])args);
+ return writer.getBuffer().toString();
} catch (IOException e) {
throw new RuntimeException
("this should never happen with a StringWriter", e);