You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2009/09/15 14:31:12 UTC
svn commit: r815293 - in /lucene/solr/trunk/contrib/extraction/src:
main/java/org/apache/solr/handler/extraction/
test/java/org/apache/solr/handler/ test/resources/solr/conf/
Author: gsingers
Date: Tue Sep 15 12:31:11 2009
New Revision: 815293
URL: http://svn.apache.org/viewvc?rev=815293&view=rev
Log:
SOLR-284: Add defaultField capability
Modified:
lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java
lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml
Modified: lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java?rev=815293&r1=815292&r2=815293&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java Tue Sep 15 12:31:11 2009
@@ -133,4 +133,10 @@
* to setup a dynamic field to automatically capture it
*/
public static final String UNKNOWN_FIELD_PREFIX = "uprefix";
+
+ /**
+ * Optional. If specified and the name of a potential field cannot be determined, the default Field specified
+ * will be used instead.
+ */
+ public static final String DEFAULT_FIELD = "defaultField";
}
Modified: lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java?rev=815293&r1=815292&r2=815293&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java Tue Sep 15 12:31:11 2009
@@ -64,7 +64,7 @@
private String contentFieldName = "content";
private String unknownFieldPrefix = "";
-
+ private String defaultField = "";
public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS);
@@ -82,6 +82,7 @@
this.lowerNames = params.getBool(LOWERNAMES, false);
this.captureAttribs = params.getBool(CAPTURE_ATTRIBUTES, false);
this.unknownFieldPrefix = params.get(UNKNOWN_FIELD_PREFIX, "");
+ this.defaultField = params.get(DEFAULT_FIELD, "");
String[] captureFields = params.getParams(CAPTURE_ELEMENTS);
if (captureFields != null && captureFields.length > 0) {
fieldBuilders = new HashMap<String, StringBuilder>();
@@ -155,6 +156,9 @@
if (sf==null && unknownFieldPrefix.length() > 0) {
name = unknownFieldPrefix + name;
sf = schema.getFieldOrNull(name);
+ } else if (sf == null && defaultField.length() > 0 && name.equals(Metadata.RESOURCE_NAME_KEY) == false /*let the fall through below handle this*/){
+ name = defaultField;
+ sf = schema.getFieldOrNull(name);
}
// Arguably we should handle this as a special case. Why? Because unlike basically
Modified: lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java?rev=815293&r1=815292&r2=815293&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java Tue Sep 15 12:31:11 2009
@@ -134,6 +134,49 @@
}
+ public void testDefaultField() throws Exception {
+ ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
+ assertTrue("handler is null and it shouldn't be", handler != null);
+ try {
+ loadLocal("simple.html",
+ "literal.id","simple2",
+ "lowernames", "true",
+ "captureAttr", "true",
+ //"map.content_type", "abcxyz",
+ "commit", "true" // test immediate commit
+ );
+ assertTrue(false);
+
+ } catch (SolrException e) {
+ //do nothing
+ }
+
+
+ loadLocal("simple.html",
+ "literal.id","simple2",
+ ExtractingParams.DEFAULT_FIELD, "defaultExtr",//test that unmapped fields go to the text field when no uprefix is specified
+ "lowernames", "true",
+ "captureAttr", "true",
+ //"map.content_type", "abcxyz",
+ "commit", "true" // test immediate commit
+ );
+ assertQ(req("id:simple2"), "//*[@numFound='1']");
+ assertQ(req("defaultExtr:http\\://www.apache.org"), "//*[@numFound='1']");
+
+ //Test when both uprefix and default are specified.
+ loadLocal("simple.html",
+ "literal.id","simple2",
+ ExtractingParams.DEFAULT_FIELD, "defaultExtr",//test that unmapped fields go to the text field when no uprefix is specified
+ ExtractingParams.UNKNOWN_FIELD_PREFIX, "t_",
+ "lowernames", "true",
+ "captureAttr", "true",
+ "map.a","t_href",
+ //"map.content_type", "abcxyz",
+ "commit", "true" // test immediate commit
+ );
+ assertQ(req("+id:simple2 +t_href:[* TO *]"), "//*[@numFound='1']");
+ }
+
public void testLiterals() throws Exception {
ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
Modified: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml?rev=815293&r1=815292&r2=815293&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml Tue Sep 15 12:31:11 2009
@@ -405,6 +405,7 @@
<field name="extractionLiteralMV" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="extractionLiteral" type="string" indexed="true" stored="true" multiValued="false"/>
+ <field name="defaultExtr" type="string" indexed="true" stored="false" />
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.