You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2009/09/15 14:31:12 UTC

svn commit: r815293 - in /lucene/solr/trunk/contrib/extraction/src: main/java/org/apache/solr/handler/extraction/ test/java/org/apache/solr/handler/ test/resources/solr/conf/

Author: gsingers
Date: Tue Sep 15 12:31:11 2009
New Revision: 815293

URL: http://svn.apache.org/viewvc?rev=815293&view=rev
Log:
SOLR-284: Add defaultField capability

Modified:
    lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java
    lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
    lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
    lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml

Modified: lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java?rev=815293&r1=815292&r2=815293&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java Tue Sep 15 12:31:11 2009
@@ -133,4 +133,10 @@
    * to setup a dynamic field to automatically capture it
    */
   public static final String UNKNOWN_FIELD_PREFIX = "uprefix";
+
+  /**
+   * Optional.  If specified and the name of a potential field cannot be determined, the default Field specified
+   * will be used instead.
+   */
+  public static final String DEFAULT_FIELD = "defaultField";
 }

Modified: lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java?rev=815293&r1=815292&r2=815293&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java Tue Sep 15 12:31:11 2009
@@ -64,7 +64,7 @@
   private String contentFieldName = "content";
 
   private String unknownFieldPrefix = "";
-
+  private String defaultField = "";
 
   public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
     this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS);
@@ -82,6 +82,7 @@
     this.lowerNames = params.getBool(LOWERNAMES, false);
     this.captureAttribs = params.getBool(CAPTURE_ATTRIBUTES, false);
     this.unknownFieldPrefix = params.get(UNKNOWN_FIELD_PREFIX, "");
+    this.defaultField = params.get(DEFAULT_FIELD, "");
     String[] captureFields = params.getParams(CAPTURE_ELEMENTS);
     if (captureFields != null && captureFields.length > 0) {
       fieldBuilders = new HashMap<String, StringBuilder>();
@@ -155,6 +156,9 @@
     if (sf==null && unknownFieldPrefix.length() > 0) {
       name = unknownFieldPrefix + name;
       sf = schema.getFieldOrNull(name);
+    } else if (sf == null && defaultField.length() > 0 && name.equals(Metadata.RESOURCE_NAME_KEY) == false /*let the fall through below handle this*/){
+      name = defaultField;
+      sf = schema.getFieldOrNull(name);
     }
 
     // Arguably we should handle this as a special case. Why? Because unlike basically

Modified: lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java?rev=815293&r1=815292&r2=815293&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java Tue Sep 15 12:31:11 2009
@@ -134,6 +134,49 @@
 
   }
 
+  public void testDefaultField() throws Exception {
+    ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
+    assertTrue("handler is null and it shouldn't be", handler != null);
+    try {
+      loadLocal("simple.html",
+      "literal.id","simple2",
+      "lowernames", "true",
+        "captureAttr", "true",
+        //"map.content_type", "abcxyz",
+        "commit", "true"  // test immediate commit
+      );
+      assertTrue(false);
+
+    } catch (SolrException e) {
+      //do nothing
+    }
+    
+
+    loadLocal("simple.html",
+      "literal.id","simple2",
+      ExtractingParams.DEFAULT_FIELD, "defaultExtr",//test that unmapped fields go to the text field when no uprefix is specified
+      "lowernames", "true",
+      "captureAttr", "true",
+      //"map.content_type", "abcxyz",
+      "commit", "true"  // test immediate commit
+    );
+    assertQ(req("id:simple2"), "//*[@numFound='1']");
+    assertQ(req("defaultExtr:http\\://www.apache.org"), "//*[@numFound='1']");
+
+    //Test when both uprefix and default are specified.
+    loadLocal("simple.html",
+      "literal.id","simple2",
+      ExtractingParams.DEFAULT_FIELD, "defaultExtr",//test that unmapped fields go to the text field when no uprefix is specified
+            ExtractingParams.UNKNOWN_FIELD_PREFIX, "t_",
+      "lowernames", "true",
+      "captureAttr", "true",
+      "map.a","t_href",
+      //"map.content_type", "abcxyz",
+      "commit", "true"  // test immediate commit
+    );
+    assertQ(req("+id:simple2 +t_href:[* TO *]"), "//*[@numFound='1']");
+  }
+
 
   public void testLiterals() throws Exception {
     ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");

Modified: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml?rev=815293&r1=815292&r2=815293&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml Tue Sep 15 12:31:11 2009
@@ -405,6 +405,7 @@
    <field name="extractionLiteralMV" type="string" indexed="true" stored="true" multiValued="true"/>
    <field name="extractionLiteral" type="string" indexed="true" stored="true" multiValued="false"/>
 
+   <field name="defaultExtr" type="string" indexed="true" stored="false" />
    
    <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
         will be used if the name matches any of the patterns.