You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jd...@apache.org on 2013/12/24 16:12:08 UTC
svn commit: r1553285 - in /lucene/dev/trunk/solr: CHANGES.txt
contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java
Author: jdyer
Date: Tue Dec 24 15:12:07 2013
New Revision: 1553285
URL: http://svn.apache.org/r1553285
Log:
SOLR-2960: XPathEntityProcessor was adding spurious nulls to multi-valued fields
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1553285&r1=1553284&r2=1553285&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue Dec 24 15:12:07 2013
@@ -230,6 +230,9 @@ Optimizations
* SOLR-5576: Improve concurrency when registering and waiting for all
SolrCore's to register a DOWN state. (Christine Poerschke via Mark Miller)
+* SOLR-2960: fix DIH XPathEntityProcessor to add the correct number of "null"
+ placeholders for multi-valued fields (Michael Watts via James Dyer)
+
Other Changes
---------------------
Modified: lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java?rev=1553285&r1=1553284&r2=1553285&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java (original)
+++ lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java Tue Dec 24 15:12:07 2013
@@ -296,7 +296,7 @@ public class XPathRecordReader {
for (Node n : childNodes) {
// For the multivalue child nodes where we could have, but
// didnt, collect text. Push a null string into values.
- if (!childrenFound.contains(n)) n.putNulls(values);
+ if (!childrenFound.contains(n)) n.putNulls(values, valuesAddedinThisFrame);
}
}
return;
@@ -429,18 +429,28 @@ public class XPathRecordReader {
* pushing a null string onto every multiValued fieldName's List of values
* where a value has not been provided from the stream.
*/
- private void putNulls(Map<String, Object> values) {
+ private void putNulls(Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
if (attributes != null) {
for (Node n : attributes) {
- if (n.multiValued)
- putText(values, null, n.fieldName, true);
+ if (n.multiValued) {
+ putANull(n.fieldName, values, valuesAddedinThisFrame);
+ }
}
}
- if (hasText && multiValued)
- putText(values, null, fieldName, true);
+ if (hasText && multiValued) {
+ putANull(fieldName, values, valuesAddedinThisFrame);
+ }
if (childNodes != null) {
- for (Node childNode : childNodes)
- childNode.putNulls(values);
+ for (Node childNode : childNodes) {
+ childNode.putNulls(values, valuesAddedinThisFrame);
+ }
+ }
+ }
+
+ private void putANull(String thisFieldName, Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
+ putText(values, null, thisFieldName, true);
+ if( valuesAddedinThisFrame != null) {
+ valuesAddedinThisFrame.add(thisFieldName);
}
}
Modified: lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java?rev=1553285&r1=1553284&r2=1553285&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java Tue Dec 24 15:12:07 2013
@@ -93,6 +93,128 @@ public class TestXPathEntityProcessor ex
assertEquals("2", l.get(1));
assertEquals("ü", l.get(2));
}
+
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ @Test
+ public void testMultiValuedWithMultipleDocuments() throws Exception {
+ Map entityAttrs = createMap("name", "e", "url", "testdata.xml", XPathEntityProcessor.FOR_EACH, "/documents/doc");
+ List fields = new ArrayList();
+ fields.add(createMap("column", "id", "xpath", "/documents/doc/id", DataImporter.MULTI_VALUED, "false"));
+ fields.add(createMap("column", "a", "xpath", "/documents/doc/a", DataImporter.MULTI_VALUED, "true"));
+ fields.add(createMap("column", "s1dataA", "xpath", "/documents/doc/sec1/s1dataA", DataImporter.MULTI_VALUED, "true"));
+ fields.add(createMap("column", "s1dataB", "xpath", "/documents/doc/sec1/s1dataB", DataImporter.MULTI_VALUED, "true"));
+ fields.add(createMap("column", "s1dataC", "xpath", "/documents/doc/sec1/s1dataC", DataImporter.MULTI_VALUED, "true"));
+
+ Context c = getContext(null,
+ new VariableResolver(), getDataSource(textMultipleDocuments), Context.FULL_DUMP, fields, entityAttrs);
+ XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor();
+ xPathEntityProcessor.init(c);
+ List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
+ while (true) {
+ Map<String, Object> row = xPathEntityProcessor.nextRow();
+ if (row == null)
+ break;
+ result.add(row);
+ }
+ {
+ assertEquals("1", result.get(0).get("id"));
+ List a = (List)result.get(0).get("a");
+ List s1dataA = (List)result.get(0).get("s1dataA");
+ List s1dataB = (List)result.get(0).get("s1dataB");
+ List s1dataC = (List)result.get(0).get("s1dataC");
+ assertEquals(2, a.size());
+ assertEquals("id1-a1", a.get(0));
+ assertEquals("id1-a2", a.get(1));
+ assertEquals(3, s1dataA.size());
+ assertEquals("id1-s1dataA-1", s1dataA.get(0));
+ assertNull(s1dataA.get(1));
+ assertEquals("id1-s1dataA-3", s1dataA.get(2));
+ assertEquals(3, s1dataB.size());
+ assertEquals("id1-s1dataB-1", s1dataB.get(0));
+ assertEquals("id1-s1dataB-2", s1dataB.get(1));
+ assertEquals("id1-s1dataB-3", s1dataB.get(2));
+ assertEquals(3, s1dataC.size());
+ assertNull(s1dataC.get(0));
+ assertNull(s1dataC.get(1));
+ assertNull(s1dataC.get(2));
+ }
+ {
+ assertEquals("2", result.get(1).get("id"));
+ List a = (List)result.get(1).get("a");
+ List s1dataA = (List)result.get(1).get("s1dataA");
+ List s1dataB = (List)result.get(1).get("s1dataB");
+ List s1dataC = (List)result.get(1).get("s1dataC");
+ assertTrue(a==null || a.size()==0);
+ assertEquals(1, s1dataA.size());
+ assertNull(s1dataA.get(0));
+ assertEquals(1, s1dataB.size());
+ assertEquals("id2-s1dataB-1", s1dataB.get(0));
+ assertEquals(1, s1dataC.size());
+ assertNull(s1dataC.get(0));
+ }
+ {
+ assertEquals("3", result.get(2).get("id"));
+ List a = (List)result.get(2).get("a");
+ List s1dataA = (List)result.get(2).get("s1dataA");
+ List s1dataB = (List)result.get(2).get("s1dataB");
+ List s1dataC = (List)result.get(2).get("s1dataC");
+ assertTrue(a==null || a.size()==0);
+ assertEquals(1, s1dataA.size());
+ assertEquals("id3-s1dataA-1", s1dataA.get(0));
+ assertEquals(1, s1dataB.size());
+ assertNull(s1dataB.get(0));
+ assertEquals(1, s1dataC.size());
+ assertNull(s1dataC.get(0));
+ }
+ {
+ assertEquals("4", result.get(3).get("id"));
+ List a = (List)result.get(3).get("a");
+ List s1dataA = (List)result.get(3).get("s1dataA");
+ List s1dataB = (List)result.get(3).get("s1dataB");
+ List s1dataC = (List)result.get(3).get("s1dataC");
+ assertTrue(a==null || a.size()==0);
+ assertEquals(1, s1dataA.size());
+ assertEquals("id4-s1dataA-1", s1dataA.get(0));
+ assertEquals(1, s1dataB.size());
+ assertEquals("id4-s1dataB-1", s1dataB.get(0));
+ assertEquals(1, s1dataC.size());
+ assertEquals("id4-s1dataC-1", s1dataC.get(0));
+ }
+ {
+ assertEquals("5", result.get(4).get("id"));
+ List a = (List)result.get(4).get("a");
+ List s1dataA = (List)result.get(4).get("s1dataA");
+ List s1dataB = (List)result.get(4).get("s1dataB");
+ List s1dataC = (List)result.get(4).get("s1dataC");
+ assertTrue(a==null || a.size()==0);
+ assertEquals(1, s1dataA.size());
+ assertNull(s1dataA.get(0));
+ assertEquals(1, s1dataB.size());
+ assertNull(s1dataB.get(0));
+ assertEquals(1, s1dataC.size());
+ assertEquals("id5-s1dataC-1", s1dataC.get(0));
+ }
+ {
+ assertEquals("6", result.get(5).get("id"));
+ List a = (List)result.get(5).get("a");
+ List s1dataA = (List)result.get(5).get("s1dataA");
+ List s1dataB = (List)result.get(5).get("s1dataB");
+ List s1dataC = (List)result.get(5).get("s1dataC");
+ assertTrue(a==null || a.size()==0);
+ assertEquals(3, s1dataA.size());
+ assertEquals("id6-s1dataA-1", s1dataA.get(0));
+ assertEquals("id6-s1dataA-2", s1dataA.get(1));
+ assertNull(s1dataA.get(2));
+ assertEquals(3, s1dataB.size());
+ assertEquals("id6-s1dataB-1", s1dataB.get(0));
+ assertEquals("id6-s1dataB-2", s1dataB.get(1));
+ assertEquals("id6-s1dataB-3", s1dataB.get(2));
+ assertEquals(3, s1dataC.size());
+ assertEquals("id6-s1dataC-1", s1dataC.get(0));
+ assertNull(s1dataC.get(1));
+ assertEquals("id6-s1dataC-3", s1dataC.get(2));
+ }
+ }
@Test
public void testMultiValuedFlatten() throws Exception {
@@ -305,4 +427,68 @@ public class TestXPathEntityProcessor ex
private static final String testXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root [\n<!ENTITY uuml \"ü\" >\n]>\n<root><a>1</a><a>2</a><a>ü</a></root>";
private static final String testXmlFlatten = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root><a>1<b>B</b>2</a></root>";
+
+ private static final String textMultipleDocuments =
+ "<?xml version=\"1.0\" ?>" +
+ "<documents>" +
+ " <doc>" +
+ " <id>1</id>" +
+ " <a>id1-a1</a>" +
+ " <a>id1-a2</a>" +
+ " <sec1>" +
+ " <s1dataA>id1-s1dataA-1</s1dataA>" +
+ " <s1dataB>id1-s1dataB-1</s1dataB>" +
+ " </sec1>" +
+ " <sec1>" +
+ " <s1dataB>id1-s1dataB-2</s1dataB>" +
+ " </sec1>" +
+ " <sec1>" +
+ " <s1dataA>id1-s1dataA-3</s1dataA>" +
+ " <s1dataB>id1-s1dataB-3</s1dataB>" +
+ " </sec1>" +
+ " </doc>" +
+ " <doc>" +
+ " <id>2</id>" +
+ " <sec1>" +
+ " <s1dataB>id2-s1dataB-1</s1dataB>" +
+ " </sec1>" +
+ " </doc>" +
+ " <doc>" +
+ " <id>3</id>" +
+ " <sec1>" +
+ " <s1dataA>id3-s1dataA-1</s1dataA>" +
+ " </sec1>" +
+ " </doc>" +
+ " <doc>" +
+ " <id>4</id>" +
+ " <sec1>" +
+ " <s1dataA>id4-s1dataA-1</s1dataA>" +
+ " <s1dataB>id4-s1dataB-1</s1dataB>" +
+ " <s1dataC>id4-s1dataC-1</s1dataC>" +
+ " </sec1>" +
+ " </doc>" +
+ " <doc>" +
+ " <id>5</id>" +
+ " <sec1>" +
+ " <s1dataC>id5-s1dataC-1</s1dataC>" +
+ " </sec1>" +
+ " </doc>" +
+ " <doc>" +
+ " <id>6</id>" +
+ " <sec1>" +
+ " <s1dataA>id6-s1dataA-1</s1dataA>" +
+ " <s1dataB>id6-s1dataB-1</s1dataB>" +
+ " <s1dataC>id6-s1dataC-1</s1dataC>" +
+ " </sec1>" +
+ " <sec1>" +
+ " <s1dataA>id6-s1dataA-2</s1dataA>" +
+ " <s1dataB>id6-s1dataB-2</s1dataB>" +
+ " </sec1>" +
+ " <sec1>" +
+ " <s1dataB>id6-s1dataB-3</s1dataB>" +
+ " <s1dataC>id6-s1dataC-3</s1dataC>" +
+ " </sec1>" +
+ " </doc>" +
+ "</documents>"
+ ;
}