You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by sh...@apache.org on 2009/02/25 06:27:31 UTC

svn commit: r747664 - in /lucene/solr/trunk/contrib/dataimporthandler: ./ src/main/java/org/apache/solr/handler/dataimport/ src/test/java/org/apache/solr/handler/dataimport/

Author: shalin
Date: Wed Feb 25 05:27:31 2009
New Revision: 747664

URL: http://svn.apache.org/viewvc?rev=747664&view=rev
Log:
SOLR-1033 -- Current entity's namespace is made available to all Transformers. This allows one to use an output field of TemplateTransformer in other transformers, among other things.

Modified:
    lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
    lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
    lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java
    lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java
    lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java
    lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java

Modified: lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt Wed Feb 25 05:27:31 2009
@@ -82,6 +82,10 @@
 18.SOLR-783:  Enhance delta-imports by maintaining separate last_index_time for each entity.
               (Jon Baer, Noble Paul via shalin)
 
+19.SOLR-1033: Current entity's namespace is made available to all Transformers. This allows one to use an output field
+              of TemplateTransformer in other transformers, among other things.
+              (Fergus McMenemie, Noble Paul via shalin)
+
 Optimizations
 ----------------------
 1. SOLR-846:  Reduce memory consumption during delta import by removing keys when used

Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java Wed Feb 25 05:27:31 2009
@@ -51,9 +51,6 @@
 
   protected String query;
 
-  @SuppressWarnings("unchecked")
-  private Map session;
-
   protected String onError = ABORT;
 
   public void init(Context context) {
@@ -67,7 +64,6 @@
     }
     resolver = (VariableResolverImpl) context.getVariableResolver();
     query = null;
-    session = null;
     isFirstInit = false;
 
   }
@@ -169,6 +165,7 @@
         if (rows != null) {
           List<Map<String, Object>> tmpRows = new ArrayList<Map<String, Object>>();
           for (Map<String, Object> map : rows) {
+            resolver.addNamespace(entityName, map);
             Object o = t.transformRow(map, context);
             if (o == null)
               continue;
@@ -184,6 +181,7 @@
           }
           rows = tmpRows;
         } else {
+          resolver.addNamespace(entityName, transformedRow);
           Object o = t.transformRow(transformedRow, context);
           if (o == null)
             return null;
@@ -253,19 +251,6 @@
     return null;
   }
 
-  public void setSessionAttribute(Object key, Object val) {
-    if (session == null) {
-      session = new HashMap();
-    }
-    session.put(key, val);
-  }
-
-  public Object getSessionAttribute(Object key) {
-    if (session == null)
-      return null;
-    return session.get(key);
-  }
-
   /**
    * For a simple implementation, this is the only method that the sub-class should implement. This is intended to
    * stream rows one-by-one. Return null to signal end of rows
@@ -283,14 +268,6 @@
   }
 
   /**
-   * Clears the internal session maintained by this EntityProcessor
-   */
-  public void clearSession() {
-    if (session != null)
-      session.clear();
-  }
-
-  /**
    * Only used by cache implementations
    */
   protected String cachePk;

Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java Wed Feb 25 05:27:31 2009
@@ -53,26 +53,10 @@
   @SuppressWarnings("unchecked")
   public Object transformRow(Map<String, Object> row, Context context) {
 
-    String entityName = context.getEntityAttribute(DataImporter.NAME);
-
     VariableResolverImpl resolver = (VariableResolverImpl) context
             .getVariableResolver();
-    Map<String, Object> resolverMap = (Map<String, Object>) resolver
-            .resolve(entityName);
-
-    // Clone resolver map because the resolver map contains common fields or any
-    // others
-    // that the entity processor chooses to keep.
-    Map<String, Object> resolverMapCopy = new HashMap<String, Object>();
-    if (resolverMap != null) {
-      for (Map.Entry<String, Object> entry : resolverMap.entrySet())
-        resolverMapCopy.put(entry.getKey(), entry.getValue());
-    }
     // Add current row to the copy of resolver map
-    for (Map.Entry<String, Object> entry : row.entrySet())
-      resolverMapCopy.put(entry.getKey(), entry.getValue());
-    // Add this copy to the namespace of the current entity in the resolver
-    resolver.addNamespace(entityName, resolverMapCopy);
+//    for (Map.Entry<String, Object> entry : row.entrySet())
 
     for (Map<String, String> map : context.getAllEntityFields()) {
       String expr = map.get(TEMPLATE);
@@ -98,8 +82,6 @@
       row.put(column, resolver.replaceTokens(expr));
     }
 
-    // Restore the original resolver map
-    resolver.addNamespace(entityName, resolverMap);
 
     return row;
   }

Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java Wed Feb 25 05:27:31 2009
@@ -182,15 +182,12 @@
         initQuery(resolver.replaceTokens(context.getEntityAttribute(URL)));
       r = getNext();
       if (r == null) {
-        Object hasMore = getSessionAttribute(HAS_MORE);
+        Object hasMore = context.getSessionAttribute(HAS_MORE, Context.SCOPE_ENTITY);
         if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) {
-          String url = (String) getSessionAttribute(NEXT_URL);
+          String url = (String) context.getSessionAttribute(NEXT_URL, Context.SCOPE_ENTITY);
           if (url == null)
             url = context.getEntityAttribute(URL);
-          Map namespace = (Map) getSessionAttribute(entityName);
-          if (namespace != null)
-            resolver.addNamespace(entityName, namespace);
-          clearSession();
+          addNamespace();
           initQuery(resolver.replaceTokens(url));
           r = getNext();
           if (r == null)
@@ -199,12 +196,41 @@
           return null;
         }
       }
+      addCommonFields(r);
       r = applyTransformer(r);
       if (r != null)
         return readUsefulVars(r);
     }
   }
 
+  private void addNamespace() {
+    Map<String, Object> namespace = new HashMap<String, Object>();
+    Set<String> allNames = new HashSet<String>();
+    if (commonFields != null) allNames.addAll(commonFields);
+    if (placeHolderVariables != null) allNames.addAll(placeHolderVariables);
+    if(allNames.isEmpty()) return;
+
+    for (String name : allNames) {
+      Object val = context.getSessionAttribute(name, Context.SCOPE_ENTITY);
+      if (val != null) namespace.put(name, val);
+    }
+    resolver.addNamespace(entityName, namespace);
+
+  }
+
+  private void addCommonFields(Map<String, Object> r) {
+    if(commonFields != null){
+      for (String commonField : commonFields) {
+        if(r.get(commonField) == null) {
+          Object val = context.getSessionAttribute(commonField, Context.SCOPE_ENTITY);
+          if(val != null) r.put(commonField, val);
+        }
+
+      }
+    }
+
+  }
+
   private void initQuery(String s) {
     Reader data = null;
     try {
@@ -251,8 +277,8 @@
             }
           });
         } catch (Exception e) {
-          String msg = "Parsing failed for xml, url:" + s + "rows processed :" + rows.size();
-          if (rows.size() > 0) msg += "last row : " + rows.get(rows.size() - 1);
+          String msg = "Parsing failed for xml, url:" + s + " rows processed:" + rows.size();
+          if (rows.size() > 0) msg += " last row: " + rows.get(rows.size() - 1);
           if (ABORT.equals(onError)) {
             wrapAndThrow(SEVERE, e, msg);
           } else if (SKIP.equals(onError)) {
@@ -321,28 +347,21 @@
   private Map<String, Object> readUsefulVars(Map<String, Object> r) {
     Object val = r.get(HAS_MORE);
     if (val != null)
-      setSessionAttribute(HAS_MORE, val);
+      context.setSessionAttribute(HAS_MORE, val,Context.SCOPE_ENTITY);
     val = r.get(NEXT_URL);
     if (val != null)
-      setSessionAttribute(NEXT_URL, val);
+      context.setSessionAttribute(NEXT_URL, val,Context.SCOPE_ENTITY);
     if (placeHolderVariables != null) {
-      Map namespace = getNameSpace();
       for (String s : placeHolderVariables) {
         val = r.get(s);
-        if (val != null)
-          namespace.put(s, val);
+        context.setSessionAttribute(s, val,Context.SCOPE_ENTITY);
       }
     }
     if (commonFields != null) {
       for (String s : commonFields) {
         Object commonVal = r.get(s);
         if (commonVal != null) {
-          setSessionAttribute(s, commonVal);
-          getNameSpace().put(s, commonVal);
-        } else {
-          commonVal = getSessionAttribute(s);
-          if (commonVal != null)
-            r.put(s, commonVal);
+          context.setSessionAttribute(s, commonVal,Context.SCOPE_ENTITY);
         }
       }
     }
@@ -395,8 +414,8 @@
           if (row == null || row == Collections.EMPTY_MAP) {
             isEnd.set(true);
             if (exp.get() != null) {
-              String msg = "Parsing failed for xml, url:" + s + "rows processed in this xml:" + count;
-              if (lastRow != null) msg += "last row in this xml: " + lastRow;
+              String msg = "Parsing failed for xml, url:" + s + " rows processed in this xml:" + count;
+              if (lastRow != null) msg += " last row in this xml:" + lastRow;
               if (ABORT.equals(onError)) {
                 wrapAndThrow(SEVERE, exp.get(), msg);
               } else if (SKIP.equals(onError)) {
@@ -422,15 +441,6 @@
 
   }
 
-  @SuppressWarnings("unchecked")
-  private Map getNameSpace() {
-    Map namespace = (Map) getSessionAttribute(entityName);
-    if (namespace == null) {
-      namespace = new HashMap();
-      setSessionAttribute(entityName, namespace);
-    }
-    return namespace;
-  }
 
   public static final String URL = "url";
 

Modified: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java Wed Feb 25 05:27:31 2009
@@ -25,9 +25,7 @@
 import java.util.Map;
 
 /**
- * <p>
- * Test for RegexTransformer
- * </p>
+ * <p> Test for RegexTransformer </p>
  *
  * @version $Id$
  * @since solr 1.3
@@ -37,14 +35,14 @@
   @Test
   public void commaSeparated() {
     List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
+    // <field column="col1" sourceColName="a" splitBy="," />
     fields.add(getField("col1", "string", null, "a", ","));
-    Context context = AbstractDataImportHandlerTest.getContext(null, null,
-            null, 0, fields, null);
+    Context context = AbstractDataImportHandlerTest.getContext(null, null, null, 0, fields, null);
+
     Map<String, Object> src = new HashMap<String, Object>();
-    String s = "a,bb,cc,d";
-    src.put("a", s);
-    Map<String, Object> result = new RegexTransformer().transformRow(src,
-            context);
+    src.put("a", "a,bb,cc,d");
+
+    Map<String, Object> result = new RegexTransformer().transformRow(src, context);
     Assert.assertEquals(2, result.size());
     Assert.assertEquals(4, ((List) result.get("col1")).size());
   }
@@ -52,14 +50,17 @@
   @Test
   public void replaceWith() {
     List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
+    // <field column="name" sourceColName="a" regexp="'" replaceWith="''" />
     Map<String, String> fld = getField("name", "string", "'", null, null);
     fld.put("replaceWith", "''");
     fields.add(fld);
     Context context = AbstractDataImportHandlerTest.getContext(null, null,
             null, 0, fields, null);
+
     Map<String, Object> src = new HashMap<String, Object>();
     String s = "D'souza";
     src.put("name", s);
+
     Map<String, Object> result = new RegexTransformer().transformRow(src,
             context);
     Assert.assertEquals("D''souza", result.get("name"));
@@ -67,36 +68,57 @@
 
   @Test
   public void mileage() {
-    Context context = AbstractDataImportHandlerTest.getContext(null, null,
-            null, 0, getFields(), null);
+    List<Map<String, String>> fields = getFields();
 
-    Map<String, Object> src = new HashMap<String, Object>();
+    // add another regex which reuses result from previous regex again!
+    // <field column="hltCityMPG" sourceColName="rowdata" regexp="(${e.city_mileage})" />
+    Map<String, String> fld = getField("hltCityMPG", "string",
+            ".*(${e.city_mileage})", "rowdata", null);
+    fld.put("replaceWith", "*** $1 ***");
+    fields.add(fld);
+
+    Map<String, Object> row = new HashMap<String, Object>();
     String s = "Fuel Economy Range: 26 mpg Hwy, 19 mpg City";
-    src.put("rowdata", s);
-    Map<String, Object> result = new RegexTransformer().transformRow(src,
-            context);
-    Assert.assertEquals(3, result.size());
+    row.put("rowdata", s);
+
+    VariableResolverImpl resolver = new VariableResolverImpl();
+    resolver.addNamespace("e", row);
+    Map<String, String> eAttrs = AbstractDataImportHandlerTest.createMap("name", "e");
+    Context context = AbstractDataImportHandlerTest.getContext(null, resolver, null, 0, fields, eAttrs);
+
+    Map<String, Object> result = new RegexTransformer().transformRow(row, context);
+    Assert.assertEquals(4, result.size());
     Assert.assertEquals(s, result.get("rowdata"));
     Assert.assertEquals("26", result.get("highway_mileage"));
     Assert.assertEquals("19", result.get("city_mileage"));
-
+    Assert.assertEquals("*** 19 *** mpg City", result.get("hltCityMPG"));
   }
 
   public static List<Map<String, String>> getFields() {
     List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
+
+    // <field column="city_mileage" sourceColName="rowdata" regexp=
+    //    "Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City"
     fields.add(getField("city_mileage", "sint",
             "Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City",
             "rowdata", null));
+
+    // <field column="highway_mileage" sourceColName="rowdata" regexp=
+    //    "Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City"
     fields.add(getField("highway_mileage", "sint",
             "Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City",
             "rowdata", null));
+
+    // <field column="seating_capacity" sourceColName="rowdata" regexp="Seating capacity:(.*)"
     fields.add(getField("seating_capacity", "sint", "Seating capacity:(.*)",
             "rowdata", null));
-    fields
-            .add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
+
+    // <field column="warranty" sourceColName="rowdata" regexp="Warranty:(.*)" />
+    fields.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
+
+    // <field column="rowdata" sourceColName="rowdata" />
     fields.add(getField("rowdata", "string", null, "rowdata", null));
     return fields;
-
   }
 
   public static Map<String, String> getField(String col, String type,

Modified: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java Wed Feb 25 05:27:31 2009
@@ -43,11 +43,17 @@
     fields.add(AbstractDataImportHandlerTest.createMap("column", "name",
             TemplateTransformer.TEMPLATE,
             "${e.lastName}, ${e.firstName} ${e.middleName}"));
-
-    Map row = AbstractDataImportHandlerTest.createMap("firstName", "Shalin",
-            "middleName", "Shekhar", "lastName", "Mangar");
+    // test reuse of template output in another template 
+    fields.add(AbstractDataImportHandlerTest.createMap("column", "mrname",
+            TemplateTransformer.TEMPLATE,"Mr ${e.name}"));
+            
+    Map row = AbstractDataImportHandlerTest.createMap(
+            "firstName", "Shalin",
+            "middleName", "Shekhar", 
+            "lastName", "Mangar");
 
     VariableResolverImpl resolver = new VariableResolverImpl();
+    resolver.addNamespace("e", row);
     Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
             "name", "e");
 
@@ -55,6 +61,7 @@
             null, 0, fields, entityAttrs);
     new TemplateTransformer().transformRow(row, context);
     Assert.assertEquals("Mangar, Shalin Shekhar", row.get("name"));
+    Assert.assertEquals("Mr Mangar, Shalin Shekhar", row.get("mrname"));
   }
 
 }