You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2016/10/31 14:06:02 UTC

[09/37] lucene-solr:jira/solr-8542-v2: SOLR-2094: XPathEntityProcessor should reinitialize the XPathRecordReader instance if the 'forEach' or 'xpath' attributes are templates & it is not a root entity

SOLR-2094: XPathEntityProcessor should reinitialize the XPathRecordReader instance if the 'forEach' or 'xpath' attributes are templates & it is not a root entity


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d6b6e747
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d6b6e747
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d6b6e747

Branch: refs/heads/jira/solr-8542-v2
Commit: d6b6e74703d5f2d29c110d3a7d9491306af9be2c
Parents: d9c4846
Author: Noble Paul <no...@apache.org>
Authored: Thu Oct 27 14:52:24 2016 +0530
Committer: Noble Paul <no...@apache.org>
Committed: Thu Oct 27 14:52:24 2016 +0530

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../dataimport/XPathEntityProcessor.java        |  12 +-
 .../dataimport/MockStringDataSource.java        |  54 ++++++++
 .../solr/handler/dataimport/TestDocBuilder.java | 129 ++++++++++++++++---
 4 files changed, 181 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d6b6e747/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 7d9a4fa..ae1d709 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -234,6 +234,9 @@ Bug Fixes
   (Cao Manh Dat, Lance Norskog, Webster Homer, hossman, yonik)
 
 * SOLR-9692: blockUnknown property makes inter-node communication impossible (noble)
+
+* SOLR-2094: XPathEntityProcessor should reinitialize the XPathRecordReader instance if
+  the 'forEach' or 'xpath' attributes are templates & it is not a root entity (Cao Manh Dat, noble)
  
 Optimizations
 ----------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d6b6e747/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java
----------------------------------------------------------------------
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java
index 637e1ef..cc28dc4 100644
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java
@@ -85,12 +85,14 @@ public class XPathEntityProcessor extends EntityProcessorBase {
   protected int blockingQueueSize = 1000;
 
   protected Thread publisherThread;
+
+  protected boolean reinitXPathReader = true;
   
   @Override
   @SuppressWarnings("unchecked")
   public void init(Context context) {
     super.init(context);
-    if (xpathReader == null)
+    if (reinitXPathReader)
       initXpathReader(context.getVariableResolver());
     pk = context.getEntityAttribute("pk");
     dataSource = context.getDataSource();
@@ -99,6 +101,7 @@ public class XPathEntityProcessor extends EntityProcessorBase {
   }
 
   private void initXpathReader(VariableResolver resolver) {
+    reinitXPathReader = false;
     useSolrAddXml = Boolean.parseBoolean(context
             .getEntityAttribute(USE_SOLR_ADD_SCHEMA));
     streamRows = Boolean.parseBoolean(context
@@ -147,11 +150,12 @@ public class XPathEntityProcessor extends EntityProcessorBase {
       xpathReader.addField("name", "/add/doc/field/@name", true);
       xpathReader.addField("value", "/add/doc/field", true);
     } else {
-      String forEachXpath = context.getEntityAttribute(FOR_EACH);
+      String forEachXpath = context.getResolvedEntityAttribute(FOR_EACH);
       if (forEachXpath == null)
         throw new DataImportHandlerException(SEVERE,
                 "Entity : " + context.getEntityAttribute("name")
                         + " must have a 'forEach' attribute");
+      if (forEachXpath.equals(context.getEntityAttribute(FOR_EACH))) reinitXPathReader = true;
 
       try {
         xpathReader = new XPathRecordReader(forEachXpath);
@@ -164,6 +168,10 @@ public class XPathEntityProcessor extends EntityProcessorBase {
           }
           String xpath = field.get(XPATH);
           xpath = context.replaceTokens(xpath);
+          //!xpath.equals(field.get(XPATH) means the field xpath has a template
+          //in that case ensure that the XPathRecordReader is reinitialized
+          //for each xml
+          if (!xpath.equals(field.get(XPATH)) && !context.isRootEntity()) reinitXPathReader = true;
           xpathReader.addField(field.get(DataImporter.COLUMN),
                   xpath,
                   Boolean.parseBoolean(field.get(DataImporter.MULTI_VALUED)),

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d6b6e747/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockStringDataSource.java
----------------------------------------------------------------------
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockStringDataSource.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockStringDataSource.java
new file mode 100644
index 0000000..7c9a6d1
--- /dev/null
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockStringDataSource.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.handler.dataimport;
+
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+public class MockStringDataSource extends DataSource<Reader> {
+
+  private static Map<String, String> cache = new HashMap<>();
+
+  public static void setData(String query,
+                                 String data) {
+    cache.put(query, data);
+  }
+
+  public static void clearCache() {
+    cache.clear();
+  }
+  @Override
+  public void init(Context context, Properties initProps) {
+
+  }
+
+  @Override
+  public Reader getData(String query) {
+    return new StringReader(cache.get(query));
+  }
+
+  @Override
+  public void close() {
+    cache.clear();
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d6b6e747/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder.java
----------------------------------------------------------------------
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder.java
index 527dad0..39dd891 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder.java
@@ -39,9 +39,10 @@ public class TestDocBuilder extends AbstractDataImportHandlerTestCase {
   @After
   public void tearDown() throws Exception {
     MockDataSource.clearCache();
+    MockStringDataSource.clearCache();
     super.tearDown();
   }
-  
+
   @Test
   public void loadClass() throws Exception {
     @SuppressWarnings("unchecked")
@@ -180,6 +181,52 @@ public class TestDocBuilder extends AbstractDataImportHandlerTestCase {
     assertEquals(3, di.getDocBuilder().importStatistics.rowsCount.get());
   }
 
+  @Test
+  public void templateXPath() {
+    DataImporter di = new DataImporter();
+    di.loadAndInit(dc_variableXpath);
+    DIHConfiguration cfg = di.getConfig();
+
+    RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
+    List<Map<String, Object>> l = new ArrayList<>();
+    l.add(createMap("id", 1, "name", "iphone", "manufacturer", "Apple"));
+    l.add(createMap("id", 2, "name", "ipad", "manufacturer", "Apple"));
+    l.add(createMap("id", 3, "name", "pixel", "manufacturer", "Google"));
+
+    MockDataSource.setIterator("select * from x", l.iterator());
+
+    List<Map<String,Object>> nestedData = new ArrayList<>();
+    nestedData.add(createMap("founded", "Cupertino, California, U.S", "year", "1976", "year2", "1976"));
+    nestedData.add(createMap("founded", "Cupertino, California, U.S", "year", "1976", "year2", "1976"));
+    nestedData.add(createMap("founded", "Menlo Park, California, U.S", "year", "1998", "year2", "1998"));
+
+    MockStringDataSource.setData("companies.xml", xml_attrVariableXpath);
+    MockStringDataSource.setData("companies2.xml", xml_variableXpath);
+    MockStringDataSource.setData("companies3.xml", xml_variableForEach);
+
+    SolrWriterImpl swi = new SolrWriterImpl();
+    di.runCmd(rp, swi);
+    assertEquals(Boolean.TRUE, swi.deleteAllCalled);
+    assertEquals(Boolean.TRUE, swi.commitCalled);
+    assertEquals(Boolean.TRUE, swi.finishCalled);
+    assertEquals(3, swi.docs.size());
+    for (int i = 0; i < l.size(); i++) {
+      SolrInputDocument doc = swi.docs.get(i);
+
+      Map<String, Object> map = l.get(i);
+      for (Map.Entry<String, Object> entry : map.entrySet()) {
+        assertEquals(entry.getValue(), doc.getFieldValue(entry.getKey()));
+      }
+
+      map = nestedData.get(i);
+      for (Map.Entry<String, Object> entry : map.entrySet()) {
+        assertEquals(entry.getValue(), doc.getFieldValue(entry.getKey()));
+      }
+    }
+    assertEquals(1, di.getDocBuilder().importStatistics.queryCount.get());
+    assertEquals(3, di.getDocBuilder().importStatistics.docCount.get());
+  }
+
   static class SolrWriterImpl extends SolrWriter {
     List<SolrInputDocument> docs = new ArrayList<>();
 
@@ -215,21 +262,73 @@ public class TestDocBuilder extends AbstractDataImportHandlerTestCase {
   }
 
   public static final String dc_singleEntity = "<dataConfig>\n"
-          + "<dataSource  type=\"MockDataSource\"/>\n"
-          + "    <document name=\"X\" >\n"
-          + "        <entity name=\"x\" query=\"select * from x\">\n"
-          + "          <field column=\"id\"/>\n"
-          + "          <field column=\"desc\"/>\n"
-          + "          <field column=\"desc\" name=\"desc_s\" />" + "        </entity>\n"
-          + "    </document>\n" + "</dataConfig>";
+      + "<dataSource  type=\"MockDataSource\"/>\n"
+      + "    <document name=\"X\" >\n"
+      + "        <entity name=\"x\" query=\"select * from x\">\n"
+      + "          <field column=\"id\"/>\n"
+      + "          <field column=\"desc\"/>\n"
+      + "          <field column=\"desc\" name=\"desc_s\" />" + "        </entity>\n"
+      + "    </document>\n" + "</dataConfig>";
 
   public static final String dc_deltaConfig = "<dataConfig>\n"
-          + "<dataSource  type=\"MockDataSource\"/>\n"          
-          + "    <document name=\"X\" >\n"
-          + "        <entity name=\"x\" query=\"select * from x\" deltaQuery=\"select id from x\">\n"
-          + "          <field column=\"id\"/>\n"
-          + "          <field column=\"desc\"/>\n"
-          + "          <field column=\"desc\" name=\"desc_s\" />" + "        </entity>\n"
-          + "    </document>\n" + "</dataConfig>";
+      + "<dataSource  type=\"MockDataSource\"/>\n"
+      + "    <document name=\"X\" >\n"
+      + "        <entity name=\"x\" query=\"select * from x\" deltaQuery=\"select id from x\">\n"
+      + "          <field column=\"id\"/>\n"
+      + "          <field column=\"desc\"/>\n"
+      + "          <field column=\"desc\" name=\"desc_s\" />" + "        </entity>\n"
+      + "    </document>\n" + "</dataConfig>";
+
+  public static final String dc_variableXpath = "<dataConfig>\n"
+      + "<dataSource type=\"MockDataSource\"/>\n"
+      + "<dataSource name=\"xml\" type=\"MockStringDataSource\"/>\n"
+      + "    <document name=\"X\" >\n"
+      + "        <entity name=\"x\" query=\"select * from x\">\n"
+      + "          <field column=\"id\"/>\n"
+      + "          <field column=\"name\"/>\n"
+      + "          <field column=\"manufacturer\"/>"
+      + "          <entity name=\"c1\" url=\"companies.xml\" dataSource=\"xml\" forEach=\"/companies/company\" processor=\"XPathEntityProcessor\">"
+      + "            <field column=\"year\" xpath=\"/companies/company/year[@name='p_${x.manufacturer}_s']\" />"
+      + "          </entity>"
+      + "          <entity name=\"c2\" url=\"companies2.xml\" dataSource=\"xml\" forEach=\"/companies/company\" processor=\"XPathEntityProcessor\">"
+      + "            <field column=\"founded\" xpath=\"/companies/company/p_${x.manufacturer}_s/founded\" />"
+      + "          </entity>"
+      + "          <entity name=\"c3\" url=\"companies3.xml\" dataSource=\"xml\" forEach=\"/companies/${x.manufacturer}\" processor=\"XPathEntityProcessor\">"
+      + "            <field column=\"year2\" xpath=\"/companies/${x.manufacturer}/year\" />"
+      + "          </entity>"
+      + "        </entity>\n"
+      + "    </document>\n" + "</dataConfig>";
+
+
+  public static final String xml_variableForEach = "<companies>\n" +
+      "\t<Apple>\n" +
+      "\t\t<year>1976</year>\n" +
+      "\t</Apple>\n" +
+      "\t<Google>\n" +
+      "\t\t<year>1998</year>\n" +
+      "\t</Google>\n" +
+      "</companies>";
+
+  public static final String xml_variableXpath = "<companies>\n" +
+      "\t<company>\n" +
+      "\t\t<p_Apple_s>\n" +
+      "\t\t\t<founded>Cupertino, California, U.S</founded>\n" +
+      "\t\t</p_Apple_s>\t\t\n" +
+      "\t</company>\n" +
+      "\t<company>\n" +
+      "\t\t<p_Google_s>\n" +
+      "\t\t\t<founded>Menlo Park, California, U.S</founded>\n" +
+      "\t\t</p_Google_s>\n" +
+      "\t</company>\n" +
+      "</companies>";
+
+  public static final String xml_attrVariableXpath = "<companies>\n" +
+      "\t<company>\n" +
+      "\t\t<year name='p_Apple_s'>1976</year>\n" +
+      "\t</company>\n" +
+      "\t<company>\n" +
+      "\t\t<year name='p_Google_s'>1998</year>\t\t\n" +
+      "\t</company>\n" +
+      "</companies>";
 
 }