You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by sh...@apache.org on 2009/02/25 06:27:31 UTC
svn commit: r747664 - in /lucene/solr/trunk/contrib/dataimporthandler: ./
src/main/java/org/apache/solr/handler/dataimport/
src/test/java/org/apache/solr/handler/dataimport/
Author: shalin
Date: Wed Feb 25 05:27:31 2009
New Revision: 747664
URL: http://svn.apache.org/viewvc?rev=747664&view=rev
Log:
SOLR-1033 -- Current entity's namespace is made available to all Transformers. This allows one to use an output field of TemplateTransformer in other transformers, among other things.
Modified:
lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java
lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java
lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java
Modified: lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt Wed Feb 25 05:27:31 2009
@@ -82,6 +82,10 @@
18.SOLR-783: Enhance delta-imports by maintaining separate last_index_time for each entity.
(Jon Baer, Noble Paul via shalin)
+19.SOLR-1033: Current entity's namespace is made available to all Transformers. This allows one to use an output field
+ of TemplateTransformer in other transformers, among other things.
+ (Fergus McMenemie, Noble Paul via shalin)
+
Optimizations
----------------------
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used
Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java Wed Feb 25 05:27:31 2009
@@ -51,9 +51,6 @@
protected String query;
- @SuppressWarnings("unchecked")
- private Map session;
-
protected String onError = ABORT;
public void init(Context context) {
@@ -67,7 +64,6 @@
}
resolver = (VariableResolverImpl) context.getVariableResolver();
query = null;
- session = null;
isFirstInit = false;
}
@@ -169,6 +165,7 @@
if (rows != null) {
List<Map<String, Object>> tmpRows = new ArrayList<Map<String, Object>>();
for (Map<String, Object> map : rows) {
+ resolver.addNamespace(entityName, map);
Object o = t.transformRow(map, context);
if (o == null)
continue;
@@ -184,6 +181,7 @@
}
rows = tmpRows;
} else {
+ resolver.addNamespace(entityName, transformedRow);
Object o = t.transformRow(transformedRow, context);
if (o == null)
return null;
@@ -253,19 +251,6 @@
return null;
}
- public void setSessionAttribute(Object key, Object val) {
- if (session == null) {
- session = new HashMap();
- }
- session.put(key, val);
- }
-
- public Object getSessionAttribute(Object key) {
- if (session == null)
- return null;
- return session.get(key);
- }
-
/**
* For a simple implementation, this is the only method that the sub-class should implement. This is intended to
* stream rows one-by-one. Return null to signal end of rows
@@ -283,14 +268,6 @@
}
/**
- * Clears the internal session maintained by this EntityProcessor
- */
- public void clearSession() {
- if (session != null)
- session.clear();
- }
-
- /**
* Only used by cache implementations
*/
protected String cachePk;
Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java Wed Feb 25 05:27:31 2009
@@ -53,26 +53,10 @@
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> row, Context context) {
- String entityName = context.getEntityAttribute(DataImporter.NAME);
-
VariableResolverImpl resolver = (VariableResolverImpl) context
.getVariableResolver();
- Map<String, Object> resolverMap = (Map<String, Object>) resolver
- .resolve(entityName);
-
- // Clone resolver map because the resolver map contains common fields or any
- // others
- // that the entity processor chooses to keep.
- Map<String, Object> resolverMapCopy = new HashMap<String, Object>();
- if (resolverMap != null) {
- for (Map.Entry<String, Object> entry : resolverMap.entrySet())
- resolverMapCopy.put(entry.getKey(), entry.getValue());
- }
// Add current row to the copy of resolver map
- for (Map.Entry<String, Object> entry : row.entrySet())
- resolverMapCopy.put(entry.getKey(), entry.getValue());
- // Add this copy to the namespace of the current entity in the resolver
- resolver.addNamespace(entityName, resolverMapCopy);
+// for (Map.Entry<String, Object> entry : row.entrySet())
for (Map<String, String> map : context.getAllEntityFields()) {
String expr = map.get(TEMPLATE);
@@ -98,8 +82,6 @@
row.put(column, resolver.replaceTokens(expr));
}
- // Restore the original resolver map
- resolver.addNamespace(entityName, resolverMap);
return row;
}
Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java Wed Feb 25 05:27:31 2009
@@ -182,15 +182,12 @@
initQuery(resolver.replaceTokens(context.getEntityAttribute(URL)));
r = getNext();
if (r == null) {
- Object hasMore = getSessionAttribute(HAS_MORE);
+ Object hasMore = context.getSessionAttribute(HAS_MORE, Context.SCOPE_ENTITY);
if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) {
- String url = (String) getSessionAttribute(NEXT_URL);
+ String url = (String) context.getSessionAttribute(NEXT_URL, Context.SCOPE_ENTITY);
if (url == null)
url = context.getEntityAttribute(URL);
- Map namespace = (Map) getSessionAttribute(entityName);
- if (namespace != null)
- resolver.addNamespace(entityName, namespace);
- clearSession();
+ addNamespace();
initQuery(resolver.replaceTokens(url));
r = getNext();
if (r == null)
@@ -199,12 +196,41 @@
return null;
}
}
+ addCommonFields(r);
r = applyTransformer(r);
if (r != null)
return readUsefulVars(r);
}
}
+ private void addNamespace() {
+ Map<String, Object> namespace = new HashMap<String, Object>();
+ Set<String> allNames = new HashSet<String>();
+ if (commonFields != null) allNames.addAll(commonFields);
+ if (placeHolderVariables != null) allNames.addAll(placeHolderVariables);
+ if(allNames.isEmpty()) return;
+
+ for (String name : allNames) {
+ Object val = context.getSessionAttribute(name, Context.SCOPE_ENTITY);
+ if (val != null) namespace.put(name, val);
+ }
+ resolver.addNamespace(entityName, namespace);
+
+ }
+
+ private void addCommonFields(Map<String, Object> r) {
+ if(commonFields != null){
+ for (String commonField : commonFields) {
+ if(r.get(commonField) == null) {
+ Object val = context.getSessionAttribute(commonField, Context.SCOPE_ENTITY);
+ if(val != null) r.put(commonField, val);
+ }
+
+ }
+ }
+
+ }
+
private void initQuery(String s) {
Reader data = null;
try {
@@ -251,8 +277,8 @@
}
});
} catch (Exception e) {
- String msg = "Parsing failed for xml, url:" + s + "rows processed :" + rows.size();
- if (rows.size() > 0) msg += "last row : " + rows.get(rows.size() - 1);
+ String msg = "Parsing failed for xml, url:" + s + " rows processed:" + rows.size();
+ if (rows.size() > 0) msg += " last row: " + rows.get(rows.size() - 1);
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, e, msg);
} else if (SKIP.equals(onError)) {
@@ -321,28 +347,21 @@
private Map<String, Object> readUsefulVars(Map<String, Object> r) {
Object val = r.get(HAS_MORE);
if (val != null)
- setSessionAttribute(HAS_MORE, val);
+ context.setSessionAttribute(HAS_MORE, val,Context.SCOPE_ENTITY);
val = r.get(NEXT_URL);
if (val != null)
- setSessionAttribute(NEXT_URL, val);
+ context.setSessionAttribute(NEXT_URL, val,Context.SCOPE_ENTITY);
if (placeHolderVariables != null) {
- Map namespace = getNameSpace();
for (String s : placeHolderVariables) {
val = r.get(s);
- if (val != null)
- namespace.put(s, val);
+ context.setSessionAttribute(s, val,Context.SCOPE_ENTITY);
}
}
if (commonFields != null) {
for (String s : commonFields) {
Object commonVal = r.get(s);
if (commonVal != null) {
- setSessionAttribute(s, commonVal);
- getNameSpace().put(s, commonVal);
- } else {
- commonVal = getSessionAttribute(s);
- if (commonVal != null)
- r.put(s, commonVal);
+ context.setSessionAttribute(s, commonVal,Context.SCOPE_ENTITY);
}
}
}
@@ -395,8 +414,8 @@
if (row == null || row == Collections.EMPTY_MAP) {
isEnd.set(true);
if (exp.get() != null) {
- String msg = "Parsing failed for xml, url:" + s + "rows processed in this xml:" + count;
- if (lastRow != null) msg += "last row in this xml: " + lastRow;
+ String msg = "Parsing failed for xml, url:" + s + " rows processed in this xml:" + count;
+ if (lastRow != null) msg += " last row in this xml:" + lastRow;
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, exp.get(), msg);
} else if (SKIP.equals(onError)) {
@@ -422,15 +441,6 @@
}
- @SuppressWarnings("unchecked")
- private Map getNameSpace() {
- Map namespace = (Map) getSessionAttribute(entityName);
- if (namespace == null) {
- namespace = new HashMap();
- setSessionAttribute(entityName, namespace);
- }
- return namespace;
- }
public static final String URL = "url";
Modified: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java Wed Feb 25 05:27:31 2009
@@ -25,9 +25,7 @@
import java.util.Map;
/**
- * <p>
- * Test for RegexTransformer
- * </p>
+ * <p> Test for RegexTransformer </p>
*
* @version $Id$
* @since solr 1.3
@@ -37,14 +35,14 @@
@Test
public void commaSeparated() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
+ // <field column="col1" sourceColName="a" splitBy="," />
fields.add(getField("col1", "string", null, "a", ","));
- Context context = AbstractDataImportHandlerTest.getContext(null, null,
- null, 0, fields, null);
+ Context context = AbstractDataImportHandlerTest.getContext(null, null, null, 0, fields, null);
+
Map<String, Object> src = new HashMap<String, Object>();
- String s = "a,bb,cc,d";
- src.put("a", s);
- Map<String, Object> result = new RegexTransformer().transformRow(src,
- context);
+ src.put("a", "a,bb,cc,d");
+
+ Map<String, Object> result = new RegexTransformer().transformRow(src, context);
Assert.assertEquals(2, result.size());
Assert.assertEquals(4, ((List) result.get("col1")).size());
}
@@ -52,14 +50,17 @@
@Test
public void replaceWith() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
+ // <field column="name" sourceColName="a" regexp="'" replaceWith="''" />
Map<String, String> fld = getField("name", "string", "'", null, null);
fld.put("replaceWith", "''");
fields.add(fld);
Context context = AbstractDataImportHandlerTest.getContext(null, null,
null, 0, fields, null);
+
Map<String, Object> src = new HashMap<String, Object>();
String s = "D'souza";
src.put("name", s);
+
Map<String, Object> result = new RegexTransformer().transformRow(src,
context);
Assert.assertEquals("D''souza", result.get("name"));
@@ -67,36 +68,57 @@
@Test
public void mileage() {
- Context context = AbstractDataImportHandlerTest.getContext(null, null,
- null, 0, getFields(), null);
+ List<Map<String, String>> fields = getFields();
- Map<String, Object> src = new HashMap<String, Object>();
+ // add another regex which reuses result from previous regex again!
+ // <field column="hltCityMPG" sourceColName="rowdata" regexp="(${e.city_mileage})" />
+ Map<String, String> fld = getField("hltCityMPG", "string",
+ ".*(${e.city_mileage})", "rowdata", null);
+ fld.put("replaceWith", "*** $1 ***");
+ fields.add(fld);
+
+ Map<String, Object> row = new HashMap<String, Object>();
String s = "Fuel Economy Range: 26 mpg Hwy, 19 mpg City";
- src.put("rowdata", s);
- Map<String, Object> result = new RegexTransformer().transformRow(src,
- context);
- Assert.assertEquals(3, result.size());
+ row.put("rowdata", s);
+
+ VariableResolverImpl resolver = new VariableResolverImpl();
+ resolver.addNamespace("e", row);
+ Map<String, String> eAttrs = AbstractDataImportHandlerTest.createMap("name", "e");
+ Context context = AbstractDataImportHandlerTest.getContext(null, resolver, null, 0, fields, eAttrs);
+
+ Map<String, Object> result = new RegexTransformer().transformRow(row, context);
+ Assert.assertEquals(4, result.size());
Assert.assertEquals(s, result.get("rowdata"));
Assert.assertEquals("26", result.get("highway_mileage"));
Assert.assertEquals("19", result.get("city_mileage"));
-
+ Assert.assertEquals("*** 19 *** mpg City", result.get("hltCityMPG"));
}
public static List<Map<String, String>> getFields() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
+
+ // <field column="city_mileage" sourceColName="rowdata" regexp=
+ // "Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City"
fields.add(getField("city_mileage", "sint",
"Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City",
"rowdata", null));
+
+ // <field column="highway_mileage" sourceColName="rowdata" regexp=
+ // "Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City"
fields.add(getField("highway_mileage", "sint",
"Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City",
"rowdata", null));
+
+ // <field column="seating_capacity" sourceColName="rowdata" regexp="Seating capacity:(.*)"
fields.add(getField("seating_capacity", "sint", "Seating capacity:(.*)",
"rowdata", null));
- fields
- .add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
+
+ // <field column="warranty" sourceColName="rowdata" regexp="Warranty:(.*)" />
+ fields.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
+
+ // <field column="rowdata" sourceColName="rowdata" />
fields.add(getField("rowdata", "string", null, "rowdata", null));
return fields;
-
}
public static Map<String, String> getField(String col, String type,
Modified: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java?rev=747664&r1=747663&r2=747664&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java Wed Feb 25 05:27:31 2009
@@ -43,11 +43,17 @@
fields.add(AbstractDataImportHandlerTest.createMap("column", "name",
TemplateTransformer.TEMPLATE,
"${e.lastName}, ${e.firstName} ${e.middleName}"));
-
- Map row = AbstractDataImportHandlerTest.createMap("firstName", "Shalin",
- "middleName", "Shekhar", "lastName", "Mangar");
+ // test reuse of template output in another template
+ fields.add(AbstractDataImportHandlerTest.createMap("column", "mrname",
+ TemplateTransformer.TEMPLATE,"Mr ${e.name}"));
+
+ Map row = AbstractDataImportHandlerTest.createMap(
+ "firstName", "Shalin",
+ "middleName", "Shekhar",
+ "lastName", "Mangar");
VariableResolverImpl resolver = new VariableResolverImpl();
+ resolver.addNamespace("e", row);
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
"name", "e");
@@ -55,6 +61,7 @@
null, 0, fields, entityAttrs);
new TemplateTransformer().transformRow(row, context);
Assert.assertEquals("Mangar, Shalin Shekhar", row.get("name"));
+ Assert.assertEquals("Mr Mangar, Shalin Shekhar", row.get("mrname"));
}
}