You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jd...@apache.org on 2013/12/24 16:12:08 UTC

svn commit: r1553285 - in /lucene/dev/trunk/solr: CHANGES.txt contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java

Author: jdyer
Date: Tue Dec 24 15:12:07 2013
New Revision: 1553285

URL: http://svn.apache.org/r1553285
Log:
SOLR-2960: XPathEntityProcessor was adding spurious nulls to multi-valued fields

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
    lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1553285&r1=1553284&r2=1553285&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue Dec 24 15:12:07 2013
@@ -230,6 +230,9 @@ Optimizations
 * SOLR-5576: Improve concurrency when registering and waiting for all 
   SolrCore's to register a DOWN state. (Christine Poerschke via Mark Miller)
 
+* SOLR-2960: fix DIH XPathEntityProcessor to add the correct number of "null"
+  placeholders for multi-valued fields (Michael Watts via James Dyer)
+
 Other Changes
 ---------------------
 

Modified: lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java?rev=1553285&r1=1553284&r2=1553285&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java (original)
+++ lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java Tue Dec 24 15:12:07 2013
@@ -296,7 +296,7 @@ public class XPathRecordReader {
                 for (Node n : childNodes) {
                   // For the multivalue child nodes where we could have, but
                   // didnt, collect text. Push a null string into values.
-                  if (!childrenFound.contains(n)) n.putNulls(values);
+                  if (!childrenFound.contains(n)) n.putNulls(values, valuesAddedinThisFrame);
                 }
               }
               return;
@@ -429,18 +429,28 @@ public class XPathRecordReader {
      * pushing a null string onto every multiValued fieldName's List of values
      * where a value has not been provided from the stream.
      */
-    private void putNulls(Map<String, Object> values) {
+    private void putNulls(Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
       if (attributes != null) {
         for (Node n : attributes) {
-          if (n.multiValued)
-            putText(values, null, n.fieldName, true);
+          if (n.multiValued) {
+            putANull(n.fieldName, values, valuesAddedinThisFrame);
+          }
         }
       }
-      if (hasText && multiValued)
-        putText(values, null, fieldName, true);
+      if (hasText && multiValued) {
+        putANull(fieldName, values, valuesAddedinThisFrame);
+      }
       if (childNodes != null) {
-        for (Node childNode : childNodes)
-          childNode.putNulls(values);
+        for (Node childNode : childNodes) {
+          childNode.putNulls(values, valuesAddedinThisFrame);
+        }
+      }
+    }
+    
+    private void putANull(String thisFieldName, Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
+      putText(values, null, thisFieldName, true);
+      if( valuesAddedinThisFrame != null) {
+        valuesAddedinThisFrame.add(thisFieldName);
       }
     }
 

Modified: lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java?rev=1553285&r1=1553284&r2=1553285&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java Tue Dec 24 15:12:07 2013
@@ -93,6 +93,128 @@ public class TestXPathEntityProcessor ex
     assertEquals("2", l.get(1));
     assertEquals("ü", l.get(2));
   }
+  
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  @Test
+  public void testMultiValuedWithMultipleDocuments() throws Exception {
+    Map entityAttrs = createMap("name", "e", "url", "testdata.xml", XPathEntityProcessor.FOR_EACH, "/documents/doc");
+    List fields = new ArrayList();
+    fields.add(createMap("column", "id", "xpath", "/documents/doc/id", DataImporter.MULTI_VALUED, "false"));
+    fields.add(createMap("column", "a", "xpath", "/documents/doc/a", DataImporter.MULTI_VALUED, "true"));
+    fields.add(createMap("column", "s1dataA", "xpath", "/documents/doc/sec1/s1dataA", DataImporter.MULTI_VALUED, "true"));
+    fields.add(createMap("column", "s1dataB", "xpath", "/documents/doc/sec1/s1dataB", DataImporter.MULTI_VALUED, "true")); 
+    fields.add(createMap("column", "s1dataC", "xpath", "/documents/doc/sec1/s1dataC", DataImporter.MULTI_VALUED, "true")); 
+    
+    Context c = getContext(null,
+            new VariableResolver(), getDataSource(textMultipleDocuments), Context.FULL_DUMP, fields, entityAttrs);
+    XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor();
+    xPathEntityProcessor.init(c);
+    List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
+    while (true) {
+      Map<String, Object> row = xPathEntityProcessor.nextRow();
+      if (row == null)
+        break;
+      result.add(row);
+    }
+    {  
+      assertEquals("1", result.get(0).get("id"));
+      List a = (List)result.get(0).get("a");
+      List s1dataA = (List)result.get(0).get("s1dataA");
+      List s1dataB = (List)result.get(0).get("s1dataB");
+      List s1dataC = (List)result.get(0).get("s1dataC");      
+      assertEquals(2, a.size());
+      assertEquals("id1-a1", a.get(0));
+      assertEquals("id1-a2", a.get(1));
+      assertEquals(3, s1dataA.size());
+      assertEquals("id1-s1dataA-1", s1dataA.get(0));
+      assertNull(s1dataA.get(1));
+      assertEquals("id1-s1dataA-3", s1dataA.get(2));
+      assertEquals(3, s1dataB.size());
+      assertEquals("id1-s1dataB-1", s1dataB.get(0));
+      assertEquals("id1-s1dataB-2", s1dataB.get(1));
+      assertEquals("id1-s1dataB-3", s1dataB.get(2));
+      assertEquals(3, s1dataC.size());
+      assertNull(s1dataC.get(0));
+      assertNull(s1dataC.get(1));
+      assertNull(s1dataC.get(2));
+    }
+    { 
+      assertEquals("2", result.get(1).get("id"));
+      List a = (List)result.get(1).get("a");
+      List s1dataA = (List)result.get(1).get("s1dataA");
+      List s1dataB = (List)result.get(1).get("s1dataB");
+      List s1dataC = (List)result.get(1).get("s1dataC");  
+      assertTrue(a==null || a.size()==0);
+      assertEquals(1, s1dataA.size()); 
+      assertNull(s1dataA.get(0));
+      assertEquals(1, s1dataB.size());
+      assertEquals("id2-s1dataB-1", s1dataB.get(0));
+      assertEquals(1, s1dataC.size());
+      assertNull(s1dataC.get(0));
+    }  
+    {
+      assertEquals("3", result.get(2).get("id"));
+      List a = (List)result.get(2).get("a");
+      List s1dataA = (List)result.get(2).get("s1dataA");
+      List s1dataB = (List)result.get(2).get("s1dataB");
+      List s1dataC = (List)result.get(2).get("s1dataC");  
+      assertTrue(a==null || a.size()==0);
+      assertEquals(1, s1dataA.size());
+      assertEquals("id3-s1dataA-1", s1dataA.get(0));
+      assertEquals(1, s1dataB.size());
+      assertNull(s1dataB.get(0));
+      assertEquals(1, s1dataC.size());
+      assertNull(s1dataC.get(0)); 
+    }
+    {  
+      assertEquals("4", result.get(3).get("id"));
+      List a = (List)result.get(3).get("a");
+      List s1dataA = (List)result.get(3).get("s1dataA");
+      List s1dataB = (List)result.get(3).get("s1dataB");
+      List s1dataC = (List)result.get(3).get("s1dataC");  
+      assertTrue(a==null || a.size()==0);
+      assertEquals(1, s1dataA.size());
+      assertEquals("id4-s1dataA-1", s1dataA.get(0));
+      assertEquals(1, s1dataB.size());
+      assertEquals("id4-s1dataB-1", s1dataB.get(0));
+      assertEquals(1, s1dataC.size());
+      assertEquals("id4-s1dataC-1", s1dataC.get(0));
+    }
+    {
+      assertEquals("5", result.get(4).get("id"));
+      List a = (List)result.get(4).get("a");
+      List s1dataA = (List)result.get(4).get("s1dataA");
+      List s1dataB = (List)result.get(4).get("s1dataB");
+      List s1dataC = (List)result.get(4).get("s1dataC");  
+      assertTrue(a==null || a.size()==0);      
+      assertEquals(1, s1dataA.size());
+      assertNull(s1dataA.get(0)); 
+      assertEquals(1, s1dataB.size());
+      assertNull(s1dataB.get(0)); 
+      assertEquals(1, s1dataC.size());
+      assertEquals("id5-s1dataC-1", s1dataC.get(0));
+    }
+    {  
+      assertEquals("6", result.get(5).get("id"));
+      List a = (List)result.get(5).get("a");
+      List s1dataA = (List)result.get(5).get("s1dataA");
+      List s1dataB = (List)result.get(5).get("s1dataB");
+      List s1dataC = (List)result.get(5).get("s1dataC");     
+      assertTrue(a==null || a.size()==0); 
+      assertEquals(3, s1dataA.size());
+      assertEquals("id6-s1dataA-1", s1dataA.get(0));
+      assertEquals("id6-s1dataA-2", s1dataA.get(1));
+      assertNull(s1dataA.get(2));
+      assertEquals(3, s1dataB.size());
+      assertEquals("id6-s1dataB-1", s1dataB.get(0));
+      assertEquals("id6-s1dataB-2", s1dataB.get(1));
+      assertEquals("id6-s1dataB-3", s1dataB.get(2));
+      assertEquals(3, s1dataC.size());
+      assertEquals("id6-s1dataC-1", s1dataC.get(0));
+      assertNull(s1dataC.get(1));
+      assertEquals("id6-s1dataC-3", s1dataC.get(2));
+    }
+  }
 
   @Test
   public void testMultiValuedFlatten() throws Exception  {
@@ -305,4 +427,68 @@ public class TestXPathEntityProcessor ex
   private static final String testXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root [\n<!ENTITY uuml \"&#252;\" >\n]>\n<root><a>1</a><a>2</a><a>&uuml;</a></root>";
 
   private static final String testXmlFlatten = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root><a>1<b>B</b>2</a></root>";
+  
+  private static final String textMultipleDocuments = 
+      "<?xml version=\"1.0\" ?>" +
+          "<documents>" +          
+          " <doc>" +
+          "  <id>1</id>" +
+          "  <a>id1-a1</a>" +
+          "  <a>id1-a2</a>" +
+          "  <sec1>" +
+          "   <s1dataA>id1-s1dataA-1</s1dataA>" +
+          "   <s1dataB>id1-s1dataB-1</s1dataB>" +
+          "  </sec1>" +
+          "  <sec1>" +
+          "   <s1dataB>id1-s1dataB-2</s1dataB>" +
+          "  </sec1>" +
+          "  <sec1>" +
+          "   <s1dataA>id1-s1dataA-3</s1dataA>" +
+          "   <s1dataB>id1-s1dataB-3</s1dataB>" +
+          "  </sec1>" +
+          " </doc>" +
+          " <doc>" +
+          "  <id>2</id>" +          
+          "  <sec1>" +
+          "   <s1dataB>id2-s1dataB-1</s1dataB>" +
+          "  </sec1>" + 
+          " </doc>" +
+          " <doc>" +
+          "  <id>3</id>" +          
+          "  <sec1>" +
+          "   <s1dataA>id3-s1dataA-1</s1dataA>" +
+          "  </sec1>" + 
+          " </doc>" +
+          " <doc>" +
+          "  <id>4</id>" +          
+          "  <sec1>" +
+          "   <s1dataA>id4-s1dataA-1</s1dataA>" +
+          "   <s1dataB>id4-s1dataB-1</s1dataB>" +
+          "   <s1dataC>id4-s1dataC-1</s1dataC>" +
+          "  </sec1>" + 
+          " </doc>" +
+          " <doc>" +
+          "  <id>5</id>" +          
+          "  <sec1>" +
+          "   <s1dataC>id5-s1dataC-1</s1dataC>" +
+          "  </sec1>" + 
+          " </doc>" +
+          " <doc>" +
+          "  <id>6</id>" +
+          "  <sec1>" +
+          "   <s1dataA>id6-s1dataA-1</s1dataA>" +
+          "   <s1dataB>id6-s1dataB-1</s1dataB>" +
+          "   <s1dataC>id6-s1dataC-1</s1dataC>" +
+          "  </sec1>" +
+          "  <sec1>" +
+          "   <s1dataA>id6-s1dataA-2</s1dataA>" +
+          "   <s1dataB>id6-s1dataB-2</s1dataB>" +
+          "  </sec1>" +
+          "  <sec1>" +
+          "   <s1dataB>id6-s1dataB-3</s1dataB>" +
+          "   <s1dataC>id6-s1dataC-3</s1dataC>" +
+          "  </sec1>" +
+          " </doc>" +
+          "</documents>"
+         ;
 }