You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jd...@apache.org on 2013/12/24 18:07:15 UTC
svn commit: r1553305 - in /lucene/dev/branches/branch_4x: ./ dev-tools/
lucene/ lucene/analysis/
lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/
lucene/analysis/icu/src/java/org/apache/lucene/collation/ lucene/backwards/
luce...
Author: jdyer
Date: Tue Dec 24 17:07:13 2013
New Revision: 1553305
URL: http://svn.apache.org/r1553305
Log:
SOLR-2960: XPathEntityProcessor was adding spurious nulls to multi-valued fields
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/dev-tools/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/BUILD.txt (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (props changed)
lucene/dev/branches/branch_4x/lucene/JRE_VERSION_MIGRATION.txt (props changed)
lucene/dev/branches/branch_4x/lucene/LICENSE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/MIGRATE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/README.txt (props changed)
lucene/dev/branches/branch_4x/lucene/SYSTEM_REQUIREMENTS.txt (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/ASCIITLD.jflex-macro (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/SUPPLEMENTARY.jflex-macro (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizerImpl40.java (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizerImpl40.jflex (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/UAX29URLEmailTokenizerImpl40.java (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/UAX29URLEmailTokenizerImpl40.jflex (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/package.html (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilterFactory.java (props changed)
lucene/dev/branches/branch_4x/lucene/backwards/ (props changed)
lucene/dev/branches/branch_4x/lucene/benchmark/ (props changed)
lucene/dev/branches/branch_4x/lucene/build.xml (props changed)
lucene/dev/branches/branch_4x/lucene/classification/ (props changed)
lucene/dev/branches/branch_4x/lucene/classification/build.xml (props changed)
lucene/dev/branches/branch_4x/lucene/classification/ivy.xml (props changed)
lucene/dev/branches/branch_4x/lucene/classification/src/ (props changed)
lucene/dev/branches/branch_4x/lucene/codecs/ (props changed)
lucene/dev/branches/branch_4x/lucene/common-build.xml (props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSort.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTotalHitCountCollector.java (props changed)
lucene/dev/branches/branch_4x/lucene/demo/ (props changed)
lucene/dev/branches/branch_4x/lucene/expressions/ (props changed)
lucene/dev/branches/branch_4x/lucene/facet/ (props changed)
lucene/dev/branches/branch_4x/lucene/grouping/ (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/ (props changed)
lucene/dev/branches/branch_4x/lucene/ivy-settings.xml (props changed)
lucene/dev/branches/branch_4x/lucene/ivy-versions.properties (props changed)
lucene/dev/branches/branch_4x/lucene/join/ (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/ (props changed)
lucene/dev/branches/branch_4x/lucene/memory/ (props changed)
lucene/dev/branches/branch_4x/lucene/misc/ (props changed)
lucene/dev/branches/branch_4x/lucene/module-build.xml (props changed)
lucene/dev/branches/branch_4x/lucene/queries/ (props changed)
lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionQuerySort.java (props changed)
lucene/dev/branches/branch_4x/lucene/queryparser/ (props changed)
lucene/dev/branches/branch_4x/lucene/replicator/ (props changed)
lucene/dev/branches/branch_4x/lucene/sandbox/ (props changed)
lucene/dev/branches/branch_4x/lucene/site/ (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/ (props changed)
lucene/dev/branches/branch_4x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/ (props changed)
lucene/dev/branches/branch_4x/lucene/tools/ (props changed)
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/solr/LICENSE.txt (props changed)
lucene/dev/branches/branch_4x/solr/NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/README.txt (props changed)
lucene/dev/branches/branch_4x/solr/SYSTEM_REQUIREMENTS.txt (props changed)
lucene/dev/branches/branch_4x/solr/build.xml (props changed)
lucene/dev/branches/branch_4x/solr/cloud-dev/ (props changed)
lucene/dev/branches/branch_4x/solr/common-build.xml (props changed)
lucene/dev/branches/branch_4x/solr/contrib/ (props changed)
lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java
lucene/dev/branches/branch_4x/solr/core/ (props changed)
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/core/TestConfig.java (props changed)
lucene/dev/branches/branch_4x/solr/example/ (props changed)
lucene/dev/branches/branch_4x/solr/licenses/ (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpclient-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpclient-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpcore-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpcore-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpmime-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpmime-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/scripts/ (props changed)
lucene/dev/branches/branch_4x/solr/site/ (props changed)
lucene/dev/branches/branch_4x/solr/solrj/ (props changed)
lucene/dev/branches/branch_4x/solr/test-framework/ (props changed)
lucene/dev/branches/branch_4x/solr/webapp/ (props changed)
Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1553305&r1=1553304&r2=1553305&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Tue Dec 24 17:07:13 2013
@@ -194,6 +194,9 @@ Optimizations
* SOLR-5576: Improve concurrency when registering and waiting for all
SolrCore's to register a DOWN state. (Christine Poerschke via Mark Miller)
+* SOLR-2960: fix DIH XPathEntityProcessor to add the correct number of "null"
+ placeholders for multi-valued fields (Michael Watts via James Dyer)
+
Other Changes
---------------------
Modified: lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java?rev=1553305&r1=1553304&r2=1553305&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java Tue Dec 24 17:07:13 2013
@@ -296,7 +296,7 @@ public class XPathRecordReader {
for (Node n : childNodes) {
// For the multivalue child nodes where we could have, but
// didnt, collect text. Push a null string into values.
- if (!childrenFound.contains(n)) n.putNulls(values);
+ if (!childrenFound.contains(n)) n.putNulls(values, valuesAddedinThisFrame);
}
}
return;
@@ -429,18 +429,28 @@ public class XPathRecordReader {
* pushing a null string onto every multiValued fieldName's List of values
* where a value has not been provided from the stream.
*/
- private void putNulls(Map<String, Object> values) {
+ private void putNulls(Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
if (attributes != null) {
for (Node n : attributes) {
- if (n.multiValued)
- putText(values, null, n.fieldName, true);
+ if (n.multiValued) {
+ putANull(n.fieldName, values, valuesAddedinThisFrame);
+ }
}
}
- if (hasText && multiValued)
- putText(values, null, fieldName, true);
+ if (hasText && multiValued) {
+ putANull(fieldName, values, valuesAddedinThisFrame);
+ }
if (childNodes != null) {
- for (Node childNode : childNodes)
- childNode.putNulls(values);
+ for (Node childNode : childNodes) {
+ childNode.putNulls(values, valuesAddedinThisFrame);
+ }
+ }
+ }
+
+ private void putANull(String thisFieldName, Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
+ putText(values, null, thisFieldName, true);
+ if( valuesAddedinThisFrame != null) {
+ valuesAddedinThisFrame.add(thisFieldName);
}
}
Modified: lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java?rev=1553305&r1=1553304&r2=1553305&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java Tue Dec 24 17:07:13 2013
@@ -93,6 +93,128 @@ public class TestXPathEntityProcessor ex
assertEquals("2", l.get(1));
assertEquals("ü", l.get(2));
}
+
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ @Test
+ public void testMultiValuedWithMultipleDocuments() throws Exception {
+ Map entityAttrs = createMap("name", "e", "url", "testdata.xml", XPathEntityProcessor.FOR_EACH, "/documents/doc");
+ List fields = new ArrayList();
+ fields.add(createMap("column", "id", "xpath", "/documents/doc/id", DataImporter.MULTI_VALUED, "false"));
+ fields.add(createMap("column", "a", "xpath", "/documents/doc/a", DataImporter.MULTI_VALUED, "true"));
+ fields.add(createMap("column", "s1dataA", "xpath", "/documents/doc/sec1/s1dataA", DataImporter.MULTI_VALUED, "true"));
+ fields.add(createMap("column", "s1dataB", "xpath", "/documents/doc/sec1/s1dataB", DataImporter.MULTI_VALUED, "true"));
+ fields.add(createMap("column", "s1dataC", "xpath", "/documents/doc/sec1/s1dataC", DataImporter.MULTI_VALUED, "true"));
+
+ Context c = getContext(null,
+ new VariableResolver(), getDataSource(textMultipleDocuments), Context.FULL_DUMP, fields, entityAttrs);
+ XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor();
+ xPathEntityProcessor.init(c);
+ List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
+ while (true) {
+ Map<String, Object> row = xPathEntityProcessor.nextRow();
+ if (row == null)
+ break;
+ result.add(row);
+ }
+ {
+ assertEquals("1", result.get(0).get("id"));
+ List a = (List)result.get(0).get("a");
+ List s1dataA = (List)result.get(0).get("s1dataA");
+ List s1dataB = (List)result.get(0).get("s1dataB");
+ List s1dataC = (List)result.get(0).get("s1dataC");
+ assertEquals(2, a.size());
+ assertEquals("id1-a1", a.get(0));
+ assertEquals("id1-a2", a.get(1));
+ assertEquals(3, s1dataA.size());
+ assertEquals("id1-s1dataA-1", s1dataA.get(0));
+ assertNull(s1dataA.get(1));
+ assertEquals("id1-s1dataA-3", s1dataA.get(2));
+ assertEquals(3, s1dataB.size());
+ assertEquals("id1-s1dataB-1", s1dataB.get(0));
+ assertEquals("id1-s1dataB-2", s1dataB.get(1));
+ assertEquals("id1-s1dataB-3", s1dataB.get(2));
+ assertEquals(3, s1dataC.size());
+ assertNull(s1dataC.get(0));
+ assertNull(s1dataC.get(1));
+ assertNull(s1dataC.get(2));
+ }
+ {
+ assertEquals("2", result.get(1).get("id"));
+ List a = (List)result.get(1).get("a");
+ List s1dataA = (List)result.get(1).get("s1dataA");
+ List s1dataB = (List)result.get(1).get("s1dataB");
+ List s1dataC = (List)result.get(1).get("s1dataC");
+ assertTrue(a==null || a.size()==0);
+ assertEquals(1, s1dataA.size());
+ assertNull(s1dataA.get(0));
+ assertEquals(1, s1dataB.size());
+ assertEquals("id2-s1dataB-1", s1dataB.get(0));
+ assertEquals(1, s1dataC.size());
+ assertNull(s1dataC.get(0));
+ }
+ {
+ assertEquals("3", result.get(2).get("id"));
+ List a = (List)result.get(2).get("a");
+ List s1dataA = (List)result.get(2).get("s1dataA");
+ List s1dataB = (List)result.get(2).get("s1dataB");
+ List s1dataC = (List)result.get(2).get("s1dataC");
+ assertTrue(a==null || a.size()==0);
+ assertEquals(1, s1dataA.size());
+ assertEquals("id3-s1dataA-1", s1dataA.get(0));
+ assertEquals(1, s1dataB.size());
+ assertNull(s1dataB.get(0));
+ assertEquals(1, s1dataC.size());
+ assertNull(s1dataC.get(0));
+ }
+ {
+ assertEquals("4", result.get(3).get("id"));
+ List a = (List)result.get(3).get("a");
+ List s1dataA = (List)result.get(3).get("s1dataA");
+ List s1dataB = (List)result.get(3).get("s1dataB");
+ List s1dataC = (List)result.get(3).get("s1dataC");
+ assertTrue(a==null || a.size()==0);
+ assertEquals(1, s1dataA.size());
+ assertEquals("id4-s1dataA-1", s1dataA.get(0));
+ assertEquals(1, s1dataB.size());
+ assertEquals("id4-s1dataB-1", s1dataB.get(0));
+ assertEquals(1, s1dataC.size());
+ assertEquals("id4-s1dataC-1", s1dataC.get(0));
+ }
+ {
+ assertEquals("5", result.get(4).get("id"));
+ List a = (List)result.get(4).get("a");
+ List s1dataA = (List)result.get(4).get("s1dataA");
+ List s1dataB = (List)result.get(4).get("s1dataB");
+ List s1dataC = (List)result.get(4).get("s1dataC");
+ assertTrue(a==null || a.size()==0);
+ assertEquals(1, s1dataA.size());
+ assertNull(s1dataA.get(0));
+ assertEquals(1, s1dataB.size());
+ assertNull(s1dataB.get(0));
+ assertEquals(1, s1dataC.size());
+ assertEquals("id5-s1dataC-1", s1dataC.get(0));
+ }
+ {
+ assertEquals("6", result.get(5).get("id"));
+ List a = (List)result.get(5).get("a");
+ List s1dataA = (List)result.get(5).get("s1dataA");
+ List s1dataB = (List)result.get(5).get("s1dataB");
+ List s1dataC = (List)result.get(5).get("s1dataC");
+ assertTrue(a==null || a.size()==0);
+ assertEquals(3, s1dataA.size());
+ assertEquals("id6-s1dataA-1", s1dataA.get(0));
+ assertEquals("id6-s1dataA-2", s1dataA.get(1));
+ assertNull(s1dataA.get(2));
+ assertEquals(3, s1dataB.size());
+ assertEquals("id6-s1dataB-1", s1dataB.get(0));
+ assertEquals("id6-s1dataB-2", s1dataB.get(1));
+ assertEquals("id6-s1dataB-3", s1dataB.get(2));
+ assertEquals(3, s1dataC.size());
+ assertEquals("id6-s1dataC-1", s1dataC.get(0));
+ assertNull(s1dataC.get(1));
+ assertEquals("id6-s1dataC-3", s1dataC.get(2));
+ }
+ }
@Test
public void testMultiValuedFlatten() throws Exception {
@@ -305,4 +427,68 @@ public class TestXPathEntityProcessor ex
private static final String testXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root [\n<!ENTITY uuml \"ü\" >\n]>\n<root><a>1</a><a>2</a><a>ü</a></root>";
private static final String testXmlFlatten = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root><a>1<b>B</b>2</a></root>";
+
+ private static final String textMultipleDocuments =
+ "<?xml version=\"1.0\" ?>" +
+ "<documents>" +
+ " <doc>" +
+ " <id>1</id>" +
+ " <a>id1-a1</a>" +
+ " <a>id1-a2</a>" +
+ " <sec1>" +
+ " <s1dataA>id1-s1dataA-1</s1dataA>" +
+ " <s1dataB>id1-s1dataB-1</s1dataB>" +
+ " </sec1>" +
+ " <sec1>" +
+ " <s1dataB>id1-s1dataB-2</s1dataB>" +
+ " </sec1>" +
+ " <sec1>" +
+ " <s1dataA>id1-s1dataA-3</s1dataA>" +
+ " <s1dataB>id1-s1dataB-3</s1dataB>" +
+ " </sec1>" +
+ " </doc>" +
+ " <doc>" +
+ " <id>2</id>" +
+ " <sec1>" +
+ " <s1dataB>id2-s1dataB-1</s1dataB>" +
+ " </sec1>" +
+ " </doc>" +
+ " <doc>" +
+ " <id>3</id>" +
+ " <sec1>" +
+ " <s1dataA>id3-s1dataA-1</s1dataA>" +
+ " </sec1>" +
+ " </doc>" +
+ " <doc>" +
+ " <id>4</id>" +
+ " <sec1>" +
+ " <s1dataA>id4-s1dataA-1</s1dataA>" +
+ " <s1dataB>id4-s1dataB-1</s1dataB>" +
+ " <s1dataC>id4-s1dataC-1</s1dataC>" +
+ " </sec1>" +
+ " </doc>" +
+ " <doc>" +
+ " <id>5</id>" +
+ " <sec1>" +
+ " <s1dataC>id5-s1dataC-1</s1dataC>" +
+ " </sec1>" +
+ " </doc>" +
+ " <doc>" +
+ " <id>6</id>" +
+ " <sec1>" +
+ " <s1dataA>id6-s1dataA-1</s1dataA>" +
+ " <s1dataB>id6-s1dataB-1</s1dataB>" +
+ " <s1dataC>id6-s1dataC-1</s1dataC>" +
+ " </sec1>" +
+ " <sec1>" +
+ " <s1dataA>id6-s1dataA-2</s1dataA>" +
+ " <s1dataB>id6-s1dataB-2</s1dataB>" +
+ " </sec1>" +
+ " <sec1>" +
+ " <s1dataB>id6-s1dataB-3</s1dataB>" +
+ " <s1dataC>id6-s1dataC-3</s1dataC>" +
+ " </sec1>" +
+ " </doc>" +
+ "</documents>"
+ ;
}